aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/ata/Kconfig41
-rw-r--r--drivers/ata/Makefile1
-rw-r--r--drivers/ata/ahci.c28
-rw-r--r--drivers/ata/ata_generic.c5
-rw-r--r--drivers/ata/ata_piix.c6
-rw-r--r--drivers/ata/libata-acpi.c15
-rw-r--r--drivers/ata/libata-core.c7
-rw-r--r--drivers/ata/libata-eh.c94
-rw-r--r--drivers/ata/libata-scsi.c107
-rw-r--r--drivers/ata/libata-sff.c2
-rw-r--r--drivers/ata/pata_ali.c4
-rw-r--r--drivers/ata/pata_cmd64x.c125
-rw-r--r--drivers/ata/pata_cs5520.c39
-rw-r--r--drivers/ata/pata_cs5536.c2
-rw-r--r--drivers/ata/pata_efar.c9
-rw-r--r--drivers/ata/pata_hpt366.c38
-rw-r--r--drivers/ata/pata_hpt37x.c137
-rw-r--r--drivers/ata/pata_hpt3x2n.c40
-rw-r--r--drivers/ata/pata_hpt3x3.c11
-rw-r--r--drivers/ata/pata_it8213.c27
-rw-r--r--drivers/ata/pata_it821x.c2
-rw-r--r--drivers/ata/pata_legacy.c14
-rw-r--r--drivers/ata/pata_marvell.c2
-rw-r--r--drivers/ata/pata_ns87415.c32
-rw-r--r--drivers/ata/pata_oldpiix.c2
-rw-r--r--drivers/ata/pata_piccolo.c140
-rw-r--r--drivers/ata/pata_radisys.c4
-rw-r--r--drivers/ata/pata_rdc.c2
-rw-r--r--drivers/ata/pata_rz1000.c11
-rw-r--r--drivers/ata/pata_sil680.c6
-rw-r--r--drivers/ata/pata_sis.c21
-rw-r--r--drivers/ata/pata_via.c56
-rw-r--r--drivers/ata/sata_fsl.c18
-rw-r--r--drivers/ata/sata_mv.c2
-rw-r--r--drivers/ata/sata_sil24.c9
-rw-r--r--drivers/base/power/runtime.c23
-rw-r--r--drivers/block/Kconfig2
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/cciss.c544
-rw-r--r--drivers/block/cciss.h18
-rw-r--r--drivers/block/cciss_cmd.h7
-rw-r--r--drivers/block/cciss_scsi.c4
-rw-r--r--drivers/block/drbd/Kconfig71
-rw-r--r--drivers/block/drbd/Makefile5
-rw-r--r--drivers/block/drbd/drbd_actlog.c1424
-rw-r--r--drivers/block/drbd/drbd_bitmap.c1327
-rw-r--r--drivers/block/drbd/drbd_int.h2252
-rw-r--r--drivers/block/drbd/drbd_main.c3699
-rw-r--r--drivers/block/drbd/drbd_nl.c2364
-rw-r--r--drivers/block/drbd/drbd_proc.c265
-rw-r--r--drivers/block/drbd/drbd_receiver.c4426
-rw-r--r--drivers/block/drbd/drbd_req.c1125
-rw-r--r--drivers/block/drbd/drbd_req.h326
-rw-r--r--drivers/block/drbd/drbd_strings.c113
-rw-r--r--drivers/block/drbd/drbd_vli.h351
-rw-r--r--drivers/block/drbd/drbd_worker.c1512
-rw-r--r--drivers/block/drbd/drbd_wrappers.h91
-rw-r--r--drivers/block/ps3vram.c10
-rw-r--r--drivers/cdrom/cdrom.c20
-rw-r--r--drivers/char/hpet.c11
-rw-r--r--drivers/char/ipmi/ipmi_poweroff.c11
-rw-r--r--drivers/char/pty.c14
-rw-r--r--drivers/char/random.c54
-rw-r--r--drivers/char/rtc.c11
-rw-r--r--drivers/dio/dio-driver.c1
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/edac/edac_mce_amd.c21
-rw-r--r--drivers/firewire/core-card.c75
-rw-r--r--drivers/firewire/core-cdev.c113
-rw-r--r--drivers/firewire/core-topology.c17
-rw-r--r--drivers/firewire/core-transaction.c19
-rw-r--r--drivers/firewire/core.h9
-rw-r--r--drivers/firewire/ohci.c39
-rw-r--r--drivers/firewire/sbp2.c9
-rw-r--r--drivers/i2c/Kconfig1
-rw-r--r--drivers/i2c/busses/Kconfig16
-rw-r--r--drivers/i2c/busses/Makefile3
-rw-r--r--drivers/i2c/busses/i2c-ali1535.c2
-rw-r--r--drivers/i2c/busses/i2c-ali15x3.c2
-rw-r--r--drivers/i2c/busses/i2c-i801.c3
-rw-r--r--drivers/i2c/busses/i2c-iop3xx.c6
-rw-r--r--drivers/i2c/busses/i2c-mv64xxx.c3
-rw-r--r--drivers/i2c/busses/i2c-powermac.c139
-rw-r--r--drivers/i2c/busses/i2c-sis5595.c2
-rw-r--r--drivers/i2c/busses/i2c-sis630.c2
-rw-r--r--drivers/i2c/busses/i2c-stub.c34
-rw-r--r--drivers/i2c/busses/i2c-voodoo3.c248
-rw-r--r--drivers/i2c/chips/Kconfig10
-rw-r--r--drivers/i2c/chips/Makefile1
-rw-r--r--drivers/i2c/i2c-core.c169
-rw-r--r--drivers/i2c/i2c-dev.c22
-rw-r--r--drivers/ide/ide-pci-generic.c3
-rw-r--r--drivers/ieee1394/ohci1394.c8
-rw-r--r--drivers/input/keyboard/omap-keypad.c6
-rw-r--r--drivers/leds/leds-ams-delta.c2
-rw-r--r--drivers/leds/leds-locomo.c2
-rw-r--r--drivers/macintosh/mac_hid.c17
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/media/radio/Kconfig18
-rw-r--r--drivers/media/radio/Makefile1
-rw-r--r--drivers/media/radio/radio-miropcm20.c270
-rw-r--r--drivers/mfd/Kconfig6
-rw-r--r--drivers/mfd/Makefile1
-rw-r--r--drivers/mfd/mcp-core.c2
-rw-r--r--drivers/mfd/mcp-sa11x0.c3
-rw-r--r--drivers/mfd/mcp.h66
-rw-r--r--drivers/mfd/menelaus.c2
-rw-r--r--drivers/mfd/twl4030-codec.c276
-rw-r--r--drivers/mfd/twl4030-core.c18
-rw-r--r--drivers/mfd/ucb1x00-assabet.c2
-rw-r--r--drivers/mfd/ucb1x00-core.c89
-rw-r--r--drivers/mfd/ucb1x00-ts.c2
-rw-r--r--drivers/mfd/ucb1x00.h255
-rw-r--r--drivers/misc/Kconfig10
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/ds1682.c (renamed from drivers/i2c/chips/ds1682.c)0
-rw-r--r--drivers/misc/ics932s401.c37
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c14
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c5
-rw-r--r--drivers/mmc/host/mmci.c2
-rw-r--r--drivers/mmc/host/omap.c10
-rw-r--r--drivers/mmc/host/omap_hsmmc.c8
-rw-r--r--drivers/mmc/host/pxamci.c10
-rw-r--r--drivers/mtd/maps/omap_nor.c2
-rw-r--r--drivers/mtd/mtd_blkdevs.c2
-rw-r--r--drivers/mtd/nand/Kconfig2
-rw-r--r--drivers/mtd/nand/ams-delta.c2
-rw-r--r--drivers/mtd/nand/omap2.c6
-rw-r--r--drivers/mtd/nand/pxa3xx_nand.c82
-rw-r--r--drivers/mtd/onenand/omap2.c8
-rw-r--r--drivers/of/base.c26
-rw-r--r--drivers/parport/parport_mfc3.c2
-rw-r--r--drivers/parport/procfs.c39
-rw-r--r--drivers/pcmcia/Kconfig2
-rw-r--r--drivers/pcmcia/omap_cf.c4
-rw-r--r--drivers/pcmcia/sa1100_generic.c2
-rw-r--r--drivers/pcmcia/sa1100_h3600.c140
-rw-r--r--drivers/power/Kconfig7
-rw-r--r--drivers/power/Makefile1
-rw-r--r--drivers/power/collie_battery.c418
-rw-r--r--drivers/rtc/Kconfig21
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/rtc-msm6242.c269
-rw-r--r--drivers/rtc/rtc-rp5c01.c222
-rw-r--r--drivers/s390/char/sclp_async.c5
-rw-r--r--drivers/scsi/scsi_sysctl.c11
-rw-r--r--drivers/serial/Kconfig13
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/apbuart.c710
-rw-r--r--drivers/serial/apbuart.h64
-rw-r--r--drivers/serial/s3c2410.c2
-rw-r--r--drivers/serial/s3c2412.c2
-rw-r--r--drivers/serial/s3c2440.c2
-rw-r--r--drivers/serial/s3c24a0.c2
-rw-r--r--drivers/serial/samsung.c2
-rw-r--r--drivers/serial/samsung.h2
-rw-r--r--drivers/spi/omap2_mcspi.c4
-rw-r--r--drivers/spi/omap_uwire.c10
-rw-r--r--drivers/staging/arlan/arlan-proc.c245
-rw-r--r--drivers/staging/pohmelfs/inode.c10
-rw-r--r--drivers/usb/gadget/omap_udc.c25
-rw-r--r--drivers/usb/host/ohci-omap.c6
-rw-r--r--drivers/usb/musb/omap2430.c2
-rw-r--r--drivers/usb/musb/omap2430.h2
-rw-r--r--drivers/usb/musb/tusb6010_omap.c4
-rw-r--r--drivers/usb/otg/isp1301_omap.c4
-rw-r--r--drivers/video/Kconfig5
-rw-r--r--drivers/video/atafb.c3
-rw-r--r--drivers/video/backlight/da903x_bl.c7
-rw-r--r--drivers/video/backlight/omap1_bl.c4
-rw-r--r--drivers/video/backlight/tdo24m.c1
-rw-r--r--drivers/video/omap/Makefile1
-rw-r--r--drivers/video/omap/blizzard.c6
-rw-r--r--drivers/video/omap/dispc.c26
-rw-r--r--drivers/video/omap/hwa742.c6
-rw-r--r--drivers/video/omap/lcd_2430sdp.c4
-rw-r--r--drivers/video/omap/lcd_ams_delta.c4
-rw-r--r--drivers/video/omap/lcd_apollon.c4
-rw-r--r--drivers/video/omap/lcd_h3.c2
-rw-r--r--drivers/video/omap/lcd_h4.c2
-rw-r--r--drivers/video/omap/lcd_htcherald.c130
-rw-r--r--drivers/video/omap/lcd_inn1510.c4
-rw-r--r--drivers/video/omap/lcd_inn1610.c2
-rw-r--r--drivers/video/omap/lcd_ldp.c4
-rw-r--r--drivers/video/omap/lcd_mipid.c4
-rw-r--r--drivers/video/omap/lcd_omap2evm.c4
-rw-r--r--drivers/video/omap/lcd_omap3beagle.c4
-rw-r--r--drivers/video/omap/lcd_omap3evm.c4
-rw-r--r--drivers/video/omap/lcd_osk.c4
-rw-r--r--drivers/video/omap/lcd_overo.c4
-rw-r--r--drivers/video/omap/lcd_palmte.c4
-rw-r--r--drivers/video/omap/lcd_palmtt.c2
-rw-r--r--drivers/video/omap/lcd_palmz71.c2
-rw-r--r--drivers/video/omap/lcdc.c4
-rw-r--r--drivers/video/omap/omapfb_main.c4
-rw-r--r--drivers/video/omap/rfbi.c2
-rw-r--r--drivers/video/omap/sossi.c4
-rw-r--r--drivers/video/pxa168fb.c1
-rw-r--r--drivers/video/pxafb.c23
-rw-r--r--drivers/watchdog/omap_wdt.c2
-rw-r--r--drivers/watchdog/riowd.c6
-rw-r--r--drivers/zorro/zorro-driver.c1
202 files changed, 23921 insertions, 2028 deletions
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index f2df6e2a224c..676f08b004b3 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -3,7 +3,7 @@
#
menuconfig ATA
- tristate "Serial ATA (prod) and Parallel ATA (experimental) drivers"
+ tristate "Serial ATA and Parallel ATA drivers"
depends on HAS_IOMEM
depends on BLOCK
depends on !(M32R || M68K) || BROKEN
@@ -374,8 +374,8 @@ config PATA_HPT366
If unsure, say N.
config PATA_HPT37X
- tristate "HPT 370/370A/371/372/374/302 PATA support (Experimental)"
- depends on PCI && EXPERIMENTAL
+ tristate "HPT 370/370A/371/372/374/302 PATA support"
+ depends on PCI
help
This option enables support for the majority of the later HPT
PATA controllers via the new ATA layer.
@@ -383,8 +383,8 @@ config PATA_HPT37X
If unsure, say N.
config PATA_HPT3X2N
- tristate "HPT 372N/302N PATA support (Experimental)"
- depends on PCI && EXPERIMENTAL
+ tristate "HPT 372N/302N PATA support"
+ depends on PCI
help
This option enables support for the N variant HPT PATA
controllers via the new ATA layer
@@ -401,7 +401,7 @@ config PATA_HPT3X3
If unsure, say N.
config PATA_HPT3X3_DMA
- bool "HPT 343/363 DMA support (Experimental)"
+ bool "HPT 343/363 DMA support"
depends on PATA_HPT3X3
help
This option enables DMA support for the HPT343/363
@@ -510,8 +510,8 @@ config PATA_NETCELL
If unsure, say N.
config PATA_NINJA32
- tristate "Ninja32/Delkin Cardbus ATA support (Experimental)"
- depends on PCI && EXPERIMENTAL
+ tristate "Ninja32/Delkin Cardbus ATA support"
+ depends on PCI
help
This option enables support for the Ninja32, Delkin and
possibly other brands of Cardbus ATA adapter
@@ -573,6 +573,14 @@ config PATA_PCMCIA
If unsure, say N.
+config PATA_PDC2027X
+ tristate "Promise PATA 2027x support"
+ depends on PCI
+ help
+ This option enables support for Promise PATA pdc20268 to pdc20277 host adapters.
+
+ If unsure, say N.
+
config PATA_PDC_OLD
tristate "Older Promise PATA controller support"
depends on PCI
@@ -643,14 +651,6 @@ config PATA_SERVERWORKS
If unsure, say N.
-config PATA_PDC2027X
- tristate "Promise PATA 2027x support"
- depends on PCI
- help
- This option enables support for Promise PATA pdc20268 to pdc20277 host adapters.
-
- If unsure, say N.
-
config PATA_SIL680
tristate "CMD / Silicon Image 680 PATA support"
depends on PCI
@@ -667,6 +667,15 @@ config PATA_SIS
If unsure, say N.
+config PATA_TOSHIBA
+ tristate "Toshiba Piccolo support (Experimental)"
+ depends on PCI && EXPERIMENTAL
+ help
+ Support for the Toshiba Piccolo controllers. Currently only the
+ primary channel is supported by this driver.
+
+ If unsure, say N.
+
config PATA_VIA
tristate "VIA PATA support"
depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 01e126f343b3..d909435e9d81 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_PATA_RZ1000) += pata_rz1000.o
obj-$(CONFIG_PATA_SC1200) += pata_sc1200.o
obj-$(CONFIG_PATA_SERVERWORKS) += pata_serverworks.o
obj-$(CONFIG_PATA_SIL680) += pata_sil680.o
+obj-$(CONFIG_PATA_TOSHIBA) += pata_piccolo.o
obj-$(CONFIG_PATA_VIA) += pata_via.o
obj-$(CONFIG_PATA_WINBOND) += pata_sl82c105.o
obj-$(CONFIG_PATA_WINBOND_VLB) += pata_winbond.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a3241a1a710b..b8bea100a160 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -113,6 +113,7 @@ enum {
board_ahci_mcp65 = 6,
board_ahci_nopmp = 7,
board_ahci_yesncq = 8,
+ board_ahci_nosntf = 9,
/* global controller registers */
HOST_CAP = 0x00, /* host capabilities */
@@ -235,6 +236,7 @@ enum {
AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
link offline */
+ AHCI_HFLAG_NO_SNTF = (1 << 12), /* no sntf */
/* ap->flags bits */
@@ -508,7 +510,7 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
- /* board_ahci_yesncq */
+ [board_ahci_yesncq] =
{
AHCI_HFLAGS (AHCI_HFLAG_YES_NCQ),
.flags = AHCI_FLAG_COMMON,
@@ -516,6 +518,14 @@ static const struct ata_port_info ahci_port_info[] = {
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
},
+ [board_ahci_nosntf] =
+ {
+ AHCI_HFLAGS (AHCI_HFLAG_NO_SNTF),
+ .flags = AHCI_FLAG_COMMON,
+ .pio_mask = ATA_PIO4,
+ .udma_mask = ATA_UDMA6,
+ .port_ops = &ahci_ops,
+ },
};
static const struct pci_device_id ahci_pci_tbl[] = {
@@ -531,7 +541,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x2683), board_ahci }, /* ESB2 */
{ PCI_VDEVICE(INTEL, 0x27c6), board_ahci }, /* ICH7-M DH */
{ PCI_VDEVICE(INTEL, 0x2821), board_ahci }, /* ICH8 */
- { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* ICH8 */
+ { PCI_VDEVICE(INTEL, 0x2822), board_ahci_nosntf }, /* ICH8 */
{ PCI_VDEVICE(INTEL, 0x2824), board_ahci }, /* ICH8 */
{ PCI_VDEVICE(INTEL, 0x2829), board_ahci }, /* ICH8M */
{ PCI_VDEVICE(INTEL, 0x282a), board_ahci }, /* ICH8M */
@@ -849,6 +859,12 @@ static void ahci_save_initial_config(struct pci_dev *pdev,
cap &= ~HOST_CAP_PMP;
}
+ if ((cap & HOST_CAP_SNTF) && (hpriv->flags & AHCI_HFLAG_NO_SNTF)) {
+ dev_printk(KERN_INFO, &pdev->dev,
+ "controller can't do SNTF, turning off CAP_SNTF\n");
+ cap &= ~HOST_CAP_SNTF;
+ }
+
if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361 &&
port_map != 1) {
dev_printk(KERN_INFO, &pdev->dev,
@@ -2988,6 +3004,14 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable)
return -ENODEV;
+ /* Promise's PDC42819 is a SAS/SATA controller that has an AHCI mode.
+ * At the moment, we can only use the AHCI mode. Let the users know
+ * that for SAS drives they're out of luck.
+ */
+ if (pdev->vendor == PCI_VENDOR_ID_PROMISE)
+ dev_printk(KERN_INFO, &pdev->dev, "PDC42819 "
+ "can only drive SATA devices with this driver\n");
+
/* acquire resources */
rc = pcim_enable_device(pdev);
if (rc)
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index ecfd22b4f1ce..12e26c3c68e3 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -168,9 +168,12 @@ static struct pci_device_id ata_generic[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561), },
{ PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), },
{ PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), },
- { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO), },
+#if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE)
{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), },
{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), },
+#endif
/* Must come last. If you add entries adjust this table appropriately */
{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 1},
{ 0, },
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 9ac4e378992e..0c6155f51173 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -869,10 +869,10 @@ static void do_pata_set_dmamode(struct ata_port *ap, struct ata_device *adev, in
(timings[pio][1] << 8);
}
- if (ap->udma_mask) {
+ if (ap->udma_mask)
udma_enable &= ~(1 << devid);
- pci_write_config_word(dev, master_port, master_data);
- }
+
+ pci_write_config_word(dev, master_port, master_data);
}
/* Don't scribble on 0x48 if the controller does not support UDMA */
if (ap->udma_mask)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index b0882cddfd4c..1245838ac13d 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -807,12 +807,11 @@ static int ata_acpi_exec_tfs(struct ata_device *dev, int *nr_executed)
* EH context.
*
* RETURNS:
- * 0 on success, -errno on failure.
+ * 0 on success, -ENOENT if _SDD doesn't exist, -errno on failure.
*/
static int ata_acpi_push_id(struct ata_device *dev)
{
struct ata_port *ap = dev->link->ap;
- int err;
acpi_status status;
struct acpi_object_list input;
union acpi_object in_params[1];
@@ -835,12 +834,16 @@ static int ata_acpi_push_id(struct ata_device *dev)
status = acpi_evaluate_object(dev->acpi_handle, "_SDD", &input, NULL);
swap_buf_le16(dev->id, ATA_ID_WORDS);
- err = ACPI_FAILURE(status) ? -EIO : 0;
- if (err < 0)
+ if (status == AE_NOT_FOUND)
+ return -ENOENT;
+
+ if (ACPI_FAILURE(status)) {
ata_dev_printk(dev, KERN_WARNING,
"ACPI _SDD failed (AE 0x%x)\n", status);
+ return -EIO;
+ }
- return err;
+ return 0;
}
/**
@@ -971,7 +974,7 @@ int ata_acpi_on_devcfg(struct ata_device *dev)
/* do _SDD if SATA */
if (acpi_sata) {
rc = ata_acpi_push_id(dev);
- if (rc)
+ if (rc && rc != -ENOENT)
goto acpi_err;
}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index dc72690ed5db..22ff51bdbc8a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6616,6 +6616,13 @@ static int __init ata_init(void)
{
ata_parse_force_param();
+ /*
+ * FIXME: In UP case, there is only one workqueue thread and if you
+ * have more than one PIO device, latency is bloody awful, with
+ * occasional multi-second "hiccups" as one PIO device waits for
+ * another. It's an ugly wart that users DO occasionally complain
+ * about; luckily most users have at most one PIO polled device.
+ */
ata_wq = create_workqueue("ata");
if (!ata_wq)
goto free_force_tbl;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bba2ae5df1c2..0ea97c942ced 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -110,6 +110,13 @@ static const unsigned long ata_eh_identify_timeouts[] = {
ULONG_MAX,
};
+static const unsigned long ata_eh_flush_timeouts[] = {
+ 15000, /* be generous with flush */
+ 15000, /* ditto */
+ 30000, /* and even more generous */
+ ULONG_MAX,
+};
+
static const unsigned long ata_eh_other_timeouts[] = {
5000, /* same rationale as identify timeout */
10000, /* ditto */
@@ -147,6 +154,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
.timeouts = ata_eh_other_timeouts, },
{ .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
.timeouts = ata_eh_other_timeouts, },
+ { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
+ .timeouts = ata_eh_flush_timeouts },
};
#undef CMDS
@@ -3112,6 +3121,82 @@ static int atapi_eh_clear_ua(struct ata_device *dev)
return 0;
}
+/**
+ * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
+ * @dev: ATA device which may need FLUSH retry
+ *
+ * If @dev failed FLUSH, it needs to be reported upper layer
+ * immediately as it means that @dev failed to remap and already
+ * lost at least a sector and further FLUSH retrials won't make
+ * any difference to the lost sector. However, if FLUSH failed
+ * for other reasons, for example transmission error, FLUSH needs
+ * to be retried.
+ *
+ * This function determines whether FLUSH failure retry is
+ * necessary and performs it if so.
+ *
+ * RETURNS:
+ * 0 if EH can continue, -errno if EH needs to be repeated.
+ */
+static int ata_eh_maybe_retry_flush(struct ata_device *dev)
+{
+ struct ata_link *link = dev->link;
+ struct ata_port *ap = link->ap;
+ struct ata_queued_cmd *qc;
+ struct ata_taskfile tf;
+ unsigned int err_mask;
+ int rc = 0;
+
+ /* did flush fail for this device? */
+ if (!ata_tag_valid(link->active_tag))
+ return 0;
+
+ qc = __ata_qc_from_tag(ap, link->active_tag);
+ if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
+ qc->tf.command != ATA_CMD_FLUSH))
+ return 0;
+
+ /* if the device failed it, it should be reported to upper layers */
+ if (qc->err_mask & AC_ERR_DEV)
+ return 0;
+
+ /* flush failed for some other reason, give it another shot */
+ ata_tf_init(dev, &tf);
+
+ tf.command = qc->tf.command;
+ tf.flags |= ATA_TFLAG_DEVICE;
+ tf.protocol = ATA_PROT_NODATA;
+
+ ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n",
+ tf.command, qc->err_mask);
+
+ err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+ if (!err_mask) {
+ /*
+ * FLUSH is complete but there's no way to
+ * successfully complete a failed command from EH.
+ * Making sure retry is allowed at least once and
+ * retrying it should do the trick - whatever was in
+ * the cache is already on the platter and this won't
+ * cause infinite loop.
+ */
+ qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
+ } else {
+ ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n",
+ err_mask);
+ rc = -EIO;
+
+ /* if device failed it, report it to upper layers */
+ if (err_mask & AC_ERR_DEV) {
+ qc->err_mask |= AC_ERR_DEV;
+ qc->result_tf = tf;
+ if (!(ap->pflags & ATA_PFLAG_FROZEN))
+ rc = 0;
+ }
+ }
+ return rc;
+}
+
static int ata_link_nr_enabled(struct ata_link *link)
{
struct ata_device *dev;
@@ -3455,6 +3540,15 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
}
}
+ /* retry flush if necessary */
+ ata_for_each_dev(dev, link, ALL) {
+ if (dev->class != ATA_DEV_ATA)
+ continue;
+ rc = ata_eh_maybe_retry_flush(dev);
+ if (rc)
+ goto dev_fail;
+ }
+
/* configure link power saving */
if (ehc->i.action & ATA_EH_LPM)
ata_for_each_dev(dev, link, ALL)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b4ee28dec521..62e6b9ea96af 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -47,6 +47,7 @@
#include <linux/hdreg.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
+#include <asm/unaligned.h>
#include "libata.h"
@@ -154,8 +155,7 @@ static ssize_t ata_scsi_lpm_put(struct device *dev,
*/
for (i = 1; i < ARRAY_SIZE(link_pm_policy); i++) {
const int len = strlen(link_pm_policy[i].name);
- if (strncmp(link_pm_policy[i].name, buf, len) == 0 &&
- buf[len] == '\n') {
+ if (strncmp(link_pm_policy[i].name, buf, len) == 0) {
policy = link_pm_policy[i].value;
break;
}
@@ -1964,6 +1964,7 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
0x80, /* page 0x80, unit serial no page */
0x83, /* page 0x83, device ident page */
0x89, /* page 0x89, ata info page */
+ 0xb0, /* page 0xb0, block limits page */
0xb1, /* page 0xb1, block device characteristics page */
};
@@ -2085,6 +2086,43 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
return 0;
}
+static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
+{
+ u32 min_io_sectors;
+
+ rbuf[1] = 0xb0;
+ rbuf[3] = 0x3c; /* required VPD size with unmap support */
+
+ /*
+ * Optimal transfer length granularity.
+ *
+ * This is always one physical block, but for disks with a smaller
+ * logical than physical sector size we need to figure out what the
+ * latter is.
+ */
+ if (ata_id_has_large_logical_sectors(args->id))
+ min_io_sectors = ata_id_logical_per_physical_sectors(args->id);
+ else
+ min_io_sectors = 1;
+ put_unaligned_be16(min_io_sectors, &rbuf[6]);
+
+ /*
+ * Optimal unmap granularity.
+ *
+ * The ATA spec doesn't even know about a granularity or alignment
+ * for the TRIM command. We can leave away most of the unmap related
+ * VPD page entries, but we have specifify a granularity to signal
+ * that we support some form of unmap - in thise case via WRITE SAME
+ * with the unmap bit set.
+ */
+ if (ata_id_has_trim(args->id)) {
+ put_unaligned_be32(65535 * 512 / 8, &rbuf[20]);
+ put_unaligned_be32(1, &rbuf[28]);
+ }
+
+ return 0;
+}
+
static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
{
int form_factor = ata_id_form_factor(args->id);
@@ -2374,6 +2412,13 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
rbuf[13] = log_per_phys;
rbuf[14] = (lowest_aligned >> 8) & 0x3f;
rbuf[15] = lowest_aligned;
+
+ if (ata_id_has_trim(args->id)) {
+ rbuf[14] |= 0x80; /* TPE */
+
+ if (ata_id_has_zero_after_trim(args->id))
+ rbuf[14] |= 0x40; /* TPRZ */
+ }
}
return 0;
@@ -2896,6 +2941,58 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
return 1;
}
+static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
+{
+ struct ata_taskfile *tf = &qc->tf;
+ struct scsi_cmnd *scmd = qc->scsicmd;
+ struct ata_device *dev = qc->dev;
+ const u8 *cdb = scmd->cmnd;
+ u64 block;
+ u32 n_block;
+ u32 size;
+ void *buf;
+
+ /* we may not issue DMA commands if no DMA mode is set */
+ if (unlikely(!dev->dma_mode))
+ goto invalid_fld;
+
+ if (unlikely(scmd->cmd_len < 16))
+ goto invalid_fld;
+ scsi_16_lba_len(cdb, &block, &n_block);
+
+ /* for now we only support WRITE SAME with the unmap bit set */
+ if (unlikely(!(cdb[1] & 0x8)))
+ goto invalid_fld;
+
+ /*
+ * WRITE SAME always has a sector sized buffer as payload, this
+ * should never be a multiple entry S/G list.
+ */
+ if (!scsi_sg_count(scmd))
+ goto invalid_fld;
+
+ buf = page_address(sg_page(scsi_sglist(scmd)));
+ size = ata_set_lba_range_entries(buf, 512, block, n_block);
+
+ tf->protocol = ATA_PROT_DMA;
+ tf->hob_feature = 0;
+ tf->feature = ATA_DSM_TRIM;
+ tf->hob_nsect = (size / 512) >> 8;
+ tf->nsect = size / 512;
+ tf->command = ATA_CMD_DSM;
+ tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
+ ATA_TFLAG_WRITE;
+
+ ata_qc_set_pc_nbytes(qc);
+
+ return 0;
+
+ invalid_fld:
+ ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x00);
+ /* "Invalid field in cdb" */
+ return 1;
+}
+
/**
* ata_get_xlat_func - check if SCSI to ATA translation is possible
* @dev: ATA device
@@ -2920,6 +3017,9 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
case WRITE_16:
return ata_scsi_rw_xlat;
+ case 0x93 /*WRITE_SAME_16*/:
+ return ata_scsi_write_same_xlat;
+
case SYNCHRONIZE_CACHE:
if (ata_try_flush_cache(dev))
return ata_scsi_flush_xlat;
@@ -3109,6 +3209,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
case 0x89:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
break;
+ case 0xb0:
+ ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0);
+ break;
case 0xb1:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
break;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index bbbb1fab1755..51eb1e298601 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2384,7 +2384,7 @@ void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
ap->hsm_task_state = HSM_ST_IDLE;
if (ap->ioaddr.bmdma_addr)
- ata_bmdma_stop(qc);
+ ap->ops->bmdma_stop(qc);
spin_unlock_irqrestore(ap->lock, flags);
}
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 1432dc9d0ab8..9434114b2ca8 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -453,7 +453,9 @@ static void ali_init_chipset(struct pci_dev *pdev)
/* Clear CD-ROM DMA write bit */
tmp &= 0x7F;
/* Cable and UDMA */
- pci_write_config_byte(pdev, 0x4B, tmp | 0x09);
+ if (pdev->revision >= 0xc2)
+ tmp |= 0x01;
+ pci_write_config_byte(pdev, 0x4B, tmp | 0x08);
/*
* CD_ROM DMA on (0x53 bit 0). Enable this even if we want
* to use PIO. 0x53 bit 1 (rev 20 only) - enable FIFO control
diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index f98dffedf4bc..dadfc358ba1c 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c
@@ -31,7 +31,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_cmd64x"
-#define DRV_VERSION "0.2.5"
+#define DRV_VERSION "0.3.1"
/*
* CMD64x specific registers definition.
@@ -254,17 +254,109 @@ static void cmd648_bmdma_stop(struct ata_queued_cmd *qc)
}
/**
- * cmd646r1_dma_stop - DMA stop callback
+ * cmd64x_bmdma_stop - DMA stop callback
* @qc: Command in progress
*
- * Stub for now while investigating the r1 quirk in the old driver.
+ * Track the completion of live DMA commands and clear the
+ * host->private_data DMA tracking flag as we do.
*/
-static void cmd646r1_bmdma_stop(struct ata_queued_cmd *qc)
+static void cmd64x_bmdma_stop(struct ata_queued_cmd *qc)
{
+ struct ata_port *ap = qc->ap;
ata_bmdma_stop(qc);
+ WARN_ON(ap->host->private_data != ap);
+ ap->host->private_data = NULL;
}
+/**
+ * cmd64x_qc_defer - Defer logic for chip limits
+ * @qc: queued command
+ *
+ * Decide whether we can issue the command. Called under the host lock.
+ */
+
+static int cmd64x_qc_defer(struct ata_queued_cmd *qc)
+{
+ struct ata_host *host = qc->ap->host;
+ struct ata_port *alt = host->ports[1 ^ qc->ap->port_no];
+ int rc;
+ int dma = 0;
+
+ /* Apply the ATA rules first */
+ rc = ata_std_qc_defer(qc);
+ if (rc)
+ return rc;
+
+ if (qc->tf.protocol == ATAPI_PROT_DMA ||
+ qc->tf.protocol == ATA_PROT_DMA)
+ dma = 1;
+
+ /* If the other port is not live then issue the command */
+ if (alt == NULL || !alt->qc_active) {
+ if (dma)
+ host->private_data = qc->ap;
+ return 0;
+ }
+ /* If there is a live DMA command then wait */
+ if (host->private_data != NULL)
+ return ATA_DEFER_PORT;
+ if (dma)
+ /* Cannot overlap our DMA command */
+ return ATA_DEFER_PORT;
+ return 0;
+}
+
+/**
+ * cmd64x_interrupt - ATA host interrupt handler
+ * @irq: irq line (unused)
+ * @dev_instance: pointer to our ata_host information structure
+ *
+ * Our interrupt handler for PCI IDE devices. Calls
+ * ata_sff_host_intr() for each port that is flagging an IRQ. We cannot
+ * use the defaults as we need to avoid touching status/altstatus during
+ * a DMA.
+ *
+ * LOCKING:
+ * Obtains host lock during operation.
+ *
+ * RETURNS:
+ * IRQ_NONE or IRQ_HANDLED.
+ */
+irqreturn_t cmd64x_interrupt(int irq, void *dev_instance)
+{
+ struct ata_host *host = dev_instance;
+ struct pci_dev *pdev = to_pci_dev(host->dev);
+ unsigned int i;
+ unsigned int handled = 0;
+ unsigned long flags;
+ static const u8 irq_reg[2] = { CFR, ARTTIM23 };
+ static const u8 irq_mask[2] = { 1 << 2, 1 << 4 };
+
+ /* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */
+ spin_lock_irqsave(&host->lock, flags);
+
+ for (i = 0; i < host->n_ports; i++) {
+ struct ata_port *ap;
+ u8 reg;
+
+ pci_read_config_byte(pdev, irq_reg[i], &reg);
+ ap = host->ports[i];
+ if (ap && (reg & irq_mask[i]) &&
+ !(ap->flags & ATA_FLAG_DISABLED)) {
+ struct ata_queued_cmd *qc;
+
+ qc = ata_qc_from_tag(ap, ap->link.active_tag);
+ if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) &&
+ (qc->flags & ATA_QCFLAG_ACTIVE))
+ handled |= ata_sff_host_intr(ap, qc);
+ }
+ }
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ return IRQ_RETVAL(handled);
+}
static struct scsi_host_template cmd64x_sht = {
ATA_BMDMA_SHT(DRV_NAME),
};
@@ -273,6 +365,8 @@ static const struct ata_port_operations cmd64x_base_ops = {
.inherits = &ata_bmdma_port_ops,
.set_piomode = cmd64x_set_piomode,
.set_dmamode = cmd64x_set_dmamode,
+ .bmdma_stop = cmd64x_bmdma_stop,
+ .qc_defer = cmd64x_qc_defer,
};
static struct ata_port_operations cmd64x_port_ops = {
@@ -282,7 +376,6 @@ static struct ata_port_operations cmd64x_port_ops = {
static struct ata_port_operations cmd646r1_port_ops = {
.inherits = &cmd64x_base_ops,
- .bmdma_stop = cmd646r1_bmdma_stop,
.cable_detect = ata_cable_40wire,
};
@@ -290,12 +383,11 @@ static struct ata_port_operations cmd648_port_ops = {
.inherits = &cmd64x_base_ops,
.bmdma_stop = cmd648_bmdma_stop,
.cable_detect = cmd648_cable_detect,
+ .qc_defer = ata_std_qc_defer
};
static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
- u32 class_rev;
-
static const struct ata_port_info cmd_info[6] = {
{ /* CMD 643 - no UDMA */
.flags = ATA_FLAG_SLAVE_POSS,
@@ -340,40 +432,43 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
const struct ata_port_info *ppi[] = { &cmd_info[id->driver_data], NULL };
u8 mrdmode;
int rc;
+ struct ata_host *host;
rc = pcim_enable_device(pdev);
if (rc)
return rc;
- pci_read_config_dword(pdev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xFF;
-
if (id->driver_data == 0) /* 643 */
ata_pci_bmdma_clear_simplex(pdev);
if (pdev->device == PCI_DEVICE_ID_CMD_646) {
/* Does UDMA work ? */
- if (class_rev > 4)
+ if (pdev->revision > 4)
ppi[0] = &cmd_info[2];
/* Early rev with other problems ? */
- else if (class_rev == 1)
+ else if (pdev->revision == 1)
ppi[0] = &cmd_info[3];
}
+
pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
pci_read_config_byte(pdev, MRDMODE, &mrdmode);
mrdmode &= ~ 0x30; /* IRQ set up */
mrdmode |= 0x02; /* Memory read line enable */
pci_write_config_byte(pdev, MRDMODE, mrdmode);
- /* Force PIO 0 here.. */
-
/* PPC specific fixup copied from old driver */
#ifdef CONFIG_PPC
pci_write_config_byte(pdev, UDIDETCR0, 0xF0);
#endif
+ rc = ata_pci_sff_prepare_host(pdev, ppi, &host);
+ if (rc)
+ return rc;
+ /* We use this pointer to track the AP which has DMA running */
+ host->private_data = NULL;
- return ata_pci_sff_init_one(pdev, ppi, &cmd64x_sht, NULL);
+ pci_set_master(pdev);
+ return ata_pci_sff_activate_host(host, cmd64x_interrupt, &cmd64x_sht);
}
#ifdef CONFIG_PM
diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index 0df83cf74233..95ebdac517f2 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c
@@ -90,48 +90,12 @@ static void cs5520_set_timings(struct ata_port *ap, struct ata_device *adev, int
}
/**
- * cs5520_enable_dma - turn on DMA bits
- *
- * Turn on the DMA bits for this disk. Needed because the BIOS probably
- * has not done the work for us. Belongs in the core SATA code.
- */
-
-static void cs5520_enable_dma(struct ata_port *ap, struct ata_device *adev)
-{
- /* Set the DMA enable/disable flag */
- u8 reg = ioread8(ap->ioaddr.bmdma_addr + 0x02);
- reg |= 1<<(adev->devno + 5);
- iowrite8(reg, ap->ioaddr.bmdma_addr + 0x02);
-}
-
-/**
- * cs5520_set_dmamode - program DMA timings
- * @ap: ATA port
- * @adev: ATA device
- *
- * Program the DMA mode timings for the controller according to the pio
- * clocking table. Note that this device sets the DMA timings to PIO
- * mode values. This may seem bizarre but the 5520 architecture talks
- * PIO mode to the disk and DMA mode to the controller so the underlying
- * transfers are PIO timed.
- */
-
-static void cs5520_set_dmamode(struct ata_port *ap, struct ata_device *adev)
-{
- static const int dma_xlate[3] = { XFER_PIO_0, XFER_PIO_3, XFER_PIO_4 };
- cs5520_set_timings(ap, adev, dma_xlate[adev->dma_mode]);
- cs5520_enable_dma(ap, adev);
-}
-
-/**
* cs5520_set_piomode - program PIO timings
* @ap: ATA port
* @adev: ATA device
*
* Program the PIO mode timings for the controller according to the pio
- * clocking table. We know pio_mode will equal dma_mode because of the
- * CS5520 architecture. At least once we turned DMA on and wrote a
- * mode setter.
+ * clocking table.
*/
static void cs5520_set_piomode(struct ata_port *ap, struct ata_device *adev)
@@ -149,7 +113,6 @@ static struct ata_port_operations cs5520_port_ops = {
.qc_prep = ata_sff_dumb_qc_prep,
.cable_detect = ata_cable_40wire,
.set_piomode = cs5520_set_piomode,
- .set_dmamode = cs5520_set_dmamode,
};
static int __devinit cs5520_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index 6da4cb486c8d..ffee3978ec83 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -224,7 +224,7 @@ static struct scsi_host_template cs5536_sht = {
};
static struct ata_port_operations cs5536_port_ops = {
- .inherits = &ata_bmdma_port_ops,
+ .inherits = &ata_bmdma32_port_ops,
.cable_detect = cs5536_cable_detect,
.set_piomode = cs5536_set_piomode,
.set_dmamode = cs5536_set_dmamode,
diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index 2a6412f5d117..b2e71e6473ed 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c
@@ -2,6 +2,7 @@
* pata_efar.c - EFAR PIIX clone controller driver
*
* (C) 2005 Red Hat
+ * (C) 2009 Bartlomiej Zolnierkiewicz
*
* Some parts based on ata_piix.c by Jeff Garzik and others.
*
@@ -118,12 +119,12 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
int shift = 4 * ap->port_no;
u8 slave_data;
- idetm_data &= 0xCC0F;
+ idetm_data &= 0xFF0F;
idetm_data |= (control << 4);
/* Slave timing in separate register */
pci_read_config_byte(dev, 0x44, &slave_data);
- slave_data &= 0x0F << shift;
+ slave_data &= ap->port_no ? 0x0F : 0xF0;
slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << shift;
pci_write_config_byte(dev, 0x44, slave_data);
}
@@ -200,7 +201,7 @@ static void efar_set_dmamode (struct ata_port *ap, struct ata_device *adev)
master_data &= 0xFF4F; /* Mask out IORDY|TIME1|DMAONLY */
master_data |= control << 4;
pci_read_config_byte(dev, 0x44, &slave_data);
- slave_data &= (0x0F + 0xE1 * ap->port_no);
+ slave_data &= ap->port_no ? 0x0F : 0xF0;
/* Load the matching timing */
slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << (ap->port_no ? 4 : 0);
pci_write_config_byte(dev, 0x44, slave_data);
@@ -251,7 +252,7 @@ static int efar_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
static const struct ata_port_info info = {
.flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
- .mwdma_mask = ATA_MWDMA2,
+ .mwdma_mask = ATA_MWDMA12_ONLY,
.udma_mask = ATA_UDMA4,
.port_ops = &efar_ops,
};
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index d7f2da127d13..0bd48e8f21bd 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -27,7 +27,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt366"
-#define DRV_VERSION "0.6.2"
+#define DRV_VERSION "0.6.7"
struct hpt_clock {
u8 xfer_mode;
@@ -36,24 +36,22 @@ struct hpt_clock {
/* key for bus clock timings
* bit
- * 0:3 data_high_time. inactive time of DIOW_/DIOR_ for PIO and MW
- * DMA. cycles = value + 1
- * 4:8 data_low_time. active time of DIOW_/DIOR_ for PIO and MW
- * DMA. cycles = value + 1
- * 9:12 cmd_high_time. inactive time of DIOW_/DIOR_ during task file
+ * 0:3 data_high_time. Inactive time of DIOW_/DIOR_ for PIO and MW DMA.
+ * cycles = value + 1
+ * 4:7 data_low_time. Active time of DIOW_/DIOR_ for PIO and MW DMA.
+ * cycles = value + 1
+ * 8:11 cmd_high_time. Inactive time of DIOW_/DIOR_ during task file
* register access.
- * 13:17 cmd_low_time. active time of DIOW_/DIOR_ during task file
+ * 12:15 cmd_low_time. Active time of DIOW_/DIOR_ during task file
* register access.
- * 18:21 udma_cycle_time. clock freq and clock cycles for UDMA xfer.
- * during task file register access.
- * 22:24 pre_high_time. time to initialize 1st cycle for PIO and MW DMA
- * xfer.
- * 25:27 cmd_pre_high_time. time to initialize 1st PIO cycle for task
+ * 16:18 udma_cycle_time. Clock cycles for UDMA xfer?
+ * 19:21 pre_high_time. Time to initialize 1st cycle for PIO and MW DMA xfer.
+ * 22:24 cmd_pre_high_time. Time to initialize 1st PIO cycle for task file
* register access.
- * 28 UDMA enable
- * 29 DMA enable
- * 30 PIO_MST enable. if set, the chip is in bus master mode during
- * PIO.
+ * 28 UDMA enable.
+ * 29 DMA enable.
+ * 30 PIO_MST enable. If set, the chip is in bus master mode during
+ * PIO xfer.
* 31 FIFO enable.
*/
@@ -344,7 +342,6 @@ static int hpt36x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
const struct ata_port_info *ppi[] = { &info_hpt366, NULL };
void *hpriv = NULL;
- u32 class_rev;
u32 reg1;
int rc;
@@ -352,13 +349,10 @@ static int hpt36x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (rc)
return rc;
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xFF;
-
/* May be a later chip in disguise. Check */
/* Newer chips are not in the HPT36x driver. Ignore them */
- if (class_rev > 2)
- return -ENODEV;
+ if (dev->revision > 2)
+ return -ENODEV;
hpt36x_init_chipset(dev);
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index d0a7df2e5ca7..4224cfccedef 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -24,7 +24,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt37x"
-#define DRV_VERSION "0.6.12"
+#define DRV_VERSION "0.6.14"
struct hpt_clock {
u8 xfer_speed;
@@ -303,72 +303,79 @@ static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
}
/**
- * hpt37x_pre_reset - reset the hpt37x bus
- * @link: ATA link to reset
- * @deadline: deadline jiffies for the operation
+ * hpt37x_cable_detect - Detect the cable type
+ * @ap: ATA port to detect on
*
- * Perform the initial reset handling for the 370/372 and 374 func 0
+ * Return the cable type attached to this port
*/
-static int hpt37x_pre_reset(struct ata_link *link, unsigned long deadline)
+static int hpt37x_cable_detect(struct ata_port *ap)
{
- u8 scr2, ata66;
- struct ata_port *ap = link->ap;
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
- static const struct pci_bits hpt37x_enable_bits[] = {
- { 0x50, 1, 0x04, 0x04 },
- { 0x54, 1, 0x04, 0x04 }
- };
- if (!pci_test_config_bits(pdev, &hpt37x_enable_bits[ap->port_no]))
- return -ENOENT;
+ u8 scr2, ata66;
pci_read_config_byte(pdev, 0x5B, &scr2);
pci_write_config_byte(pdev, 0x5B, scr2 & ~0x01);
+
+ udelay(10); /* debounce */
+
/* Cable register now active */
pci_read_config_byte(pdev, 0x5A, &ata66);
/* Restore state */
pci_write_config_byte(pdev, 0x5B, scr2);
if (ata66 & (2 >> ap->port_no))
- ap->cbl = ATA_CBL_PATA40;
+ return ATA_CBL_PATA40;
else
- ap->cbl = ATA_CBL_PATA80;
-
- /* Reset the state machine */
- pci_write_config_byte(pdev, 0x50 + 4 * ap->port_no, 0x37);
- udelay(100);
-
- return ata_sff_prereset(link, deadline);
+ return ATA_CBL_PATA80;
}
-static int hpt374_fn1_pre_reset(struct ata_link *link, unsigned long deadline)
+/**
+ * hpt374_fn1_cable_detect - Detect the cable type
+ * @ap: ATA port to detect on
+ *
+ * Return the cable type attached to this port
+ */
+
+static int hpt374_fn1_cable_detect(struct ata_port *ap)
{
- static const struct pci_bits hpt37x_enable_bits[] = {
- { 0x50, 1, 0x04, 0x04 },
- { 0x54, 1, 0x04, 0x04 }
- };
- u16 mcr3;
- u8 ata66;
- struct ata_port *ap = link->ap;
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
unsigned int mcrbase = 0x50 + 4 * ap->port_no;
-
- if (!pci_test_config_bits(pdev, &hpt37x_enable_bits[ap->port_no]))
- return -ENOENT;
+ u16 mcr3;
+ u8 ata66;
/* Do the extra channel work */
pci_read_config_word(pdev, mcrbase + 2, &mcr3);
- /* Set bit 15 of 0x52 to enable TCBLID as input
- */
+ /* Set bit 15 of 0x52 to enable TCBLID as input */
pci_write_config_word(pdev, mcrbase + 2, mcr3 | 0x8000);
pci_read_config_byte(pdev, 0x5A, &ata66);
/* Reset TCBLID/FCBLID to output */
pci_write_config_word(pdev, mcrbase + 2, mcr3);
if (ata66 & (2 >> ap->port_no))
- ap->cbl = ATA_CBL_PATA40;
+ return ATA_CBL_PATA40;
else
- ap->cbl = ATA_CBL_PATA80;
+ return ATA_CBL_PATA80;
+}
+
+/**
+ * hpt37x_pre_reset - reset the hpt37x bus
+ * @link: ATA link to reset
+ * @deadline: deadline jiffies for the operation
+ *
+ * Perform the initial reset handling for the HPT37x.
+ */
+
+static int hpt37x_pre_reset(struct ata_link *link, unsigned long deadline)
+{
+ struct ata_port *ap = link->ap;
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ static const struct pci_bits hpt37x_enable_bits[] = {
+ { 0x50, 1, 0x04, 0x04 },
+ { 0x54, 1, 0x04, 0x04 }
+ };
+ if (!pci_test_config_bits(pdev, &hpt37x_enable_bits[ap->port_no]))
+ return -ENOENT;
/* Reset the state machine */
pci_write_config_byte(pdev, 0x50 + 4 * ap->port_no, 0x37);
@@ -404,9 +411,8 @@ static void hpt370_set_piomode(struct ata_port *ap, struct ata_device *adev)
pci_read_config_dword(pdev, addr1, &reg);
mode = hpt37x_find_mode(ap, adev->pio_mode);
- mode &= ~0x8000000; /* No FIFO in PIO */
- mode &= ~0x30070000; /* Leave config bits alone */
- reg &= 0x30070000; /* Strip timing bits */
+ mode &= 0xCFC3FFFF; /* Leave DMA bits alone */
+ reg &= ~0xCFC3FFFF; /* Strip timing bits */
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -423,8 +429,7 @@ static void hpt370_set_dmamode(struct ata_port *ap, struct ata_device *adev)
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
u32 addr1, addr2;
- u32 reg;
- u32 mode;
+ u32 reg, mode, mask;
u8 fast;
addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
@@ -436,11 +441,12 @@ static void hpt370_set_dmamode(struct ata_port *ap, struct ata_device *adev)
fast |= 0x01;
pci_write_config_byte(pdev, addr2, fast);
+ mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000;
+
pci_read_config_dword(pdev, addr1, &reg);
mode = hpt37x_find_mode(ap, adev->dma_mode);
- mode |= 0x8000000; /* FIFO in MWDMA or UDMA */
- mode &= ~0xC0000000; /* Leave config bits alone */
- reg &= 0xC0000000; /* Strip timing bits */
+ mode &= mask;
+ reg &= ~mask;
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -508,9 +514,8 @@ static void hpt372_set_piomode(struct ata_port *ap, struct ata_device *adev)
mode = hpt37x_find_mode(ap, adev->pio_mode);
printk("Find mode for %d reports %X\n", adev->pio_mode, mode);
- mode &= ~0x80000000; /* No FIFO in PIO */
- mode &= ~0x30070000; /* Leave config bits alone */
- reg &= 0x30070000; /* Strip timing bits */
+ mode &= 0xCFC3FFFF; /* Leave DMA bits alone */
+ reg &= ~0xCFC3FFFF; /* Strip timing bits */
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -527,8 +532,7 @@ static void hpt372_set_dmamode(struct ata_port *ap, struct ata_device *adev)
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
u32 addr1, addr2;
- u32 reg;
- u32 mode;
+ u32 reg, mode, mask;
u8 fast;
addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
@@ -539,12 +543,13 @@ static void hpt372_set_dmamode(struct ata_port *ap, struct ata_device *adev)
fast &= ~0x07;
pci_write_config_byte(pdev, addr2, fast);
+ mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000;
+
pci_read_config_dword(pdev, addr1, &reg);
mode = hpt37x_find_mode(ap, adev->dma_mode);
printk("Find mode for DMA %d reports %X\n", adev->dma_mode, mode);
- mode &= ~0xC0000000; /* Leave config bits alone */
- mode |= 0x80000000; /* FIFO in MWDMA or UDMA */
- reg &= 0xC0000000; /* Strip timing bits */
+ mode &= mask;
+ reg &= ~mask;
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -584,6 +589,7 @@ static struct ata_port_operations hpt370_port_ops = {
.bmdma_stop = hpt370_bmdma_stop,
.mode_filter = hpt370_filter,
+ .cable_detect = hpt37x_cable_detect,
.set_piomode = hpt370_set_piomode,
.set_dmamode = hpt370_set_dmamode,
.prereset = hpt37x_pre_reset,
@@ -608,6 +614,7 @@ static struct ata_port_operations hpt372_port_ops = {
.bmdma_stop = hpt37x_bmdma_stop,
+ .cable_detect = hpt37x_cable_detect,
.set_piomode = hpt372_set_piomode,
.set_dmamode = hpt372_set_dmamode,
.prereset = hpt37x_pre_reset,
@@ -620,7 +627,8 @@ static struct ata_port_operations hpt372_port_ops = {
static struct ata_port_operations hpt374_fn1_port_ops = {
.inherits = &hpt372_port_ops,
- .prereset = hpt374_fn1_pre_reset,
+ .cable_detect = hpt374_fn1_cable_detect,
+ .prereset = hpt37x_pre_reset,
};
/**
@@ -791,9 +799,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
static const int MHz[4] = { 33, 40, 50, 66 };
void *private_data = NULL;
const struct ata_port_info *ppi[] = { NULL, NULL };
-
+ u8 rev = dev->revision;
u8 irqmask;
- u32 class_rev;
u8 mcr1;
u32 freq;
int prefer_dpll = 1;
@@ -808,19 +815,16 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (rc)
return rc;
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xFF;
-
if (dev->device == PCI_DEVICE_ID_TTI_HPT366) {
/* May be a later chip in disguise. Check */
/* Older chips are in the HPT366 driver. Ignore them */
- if (class_rev < 3)
+ if (rev < 3)
return -ENODEV;
/* N series chips have their own driver. Ignore */
- if (class_rev == 6)
+ if (rev == 6)
return -ENODEV;
- switch(class_rev) {
+ switch(rev) {
case 3:
ppi[0] = &info_hpt370;
chip_table = &hpt370;
@@ -836,28 +840,29 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
chip_table = &hpt372;
break;
default:
- printk(KERN_ERR "pata_hpt37x: Unknown HPT366 subtype please report (%d).\n", class_rev);
+ printk(KERN_ERR "pata_hpt37x: Unknown HPT366 "
+ "subtype, please report (%d).\n", rev);
return -ENODEV;
}
} else {
switch(dev->device) {
case PCI_DEVICE_ID_TTI_HPT372:
/* 372N if rev >= 2*/
- if (class_rev >= 2)
+ if (rev >= 2)
return -ENODEV;
ppi[0] = &info_hpt372;
chip_table = &hpt372a;
break;
case PCI_DEVICE_ID_TTI_HPT302:
/* 302N if rev > 1 */
- if (class_rev > 1)
+ if (rev > 1)
return -ENODEV;
ppi[0] = &info_hpt372;
/* Check this */
chip_table = &hpt302;
break;
case PCI_DEVICE_ID_TTI_HPT371:
- if (class_rev > 1)
+ if (rev > 1)
return -ENODEV;
ppi[0] = &info_hpt372;
chip_table = &hpt371;
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index 3d59fe0a408d..9a09a1b11ca5 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -25,7 +25,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt3x2n"
-#define DRV_VERSION "0.3.4"
+#define DRV_VERSION "0.3.7"
enum {
HPT_PCI_FAST = (1 << 31),
@@ -80,14 +80,13 @@ static struct hpt_clock hpt3x2n_clocks[] = {
{ XFER_MW_DMA_2, 0x2c829c62 },
{ XFER_MW_DMA_1, 0x2c829c66 },
- { XFER_MW_DMA_0, 0x2c829d2c },
+ { XFER_MW_DMA_0, 0x2c829d2e },
{ XFER_PIO_4, 0x0c829c62 },
{ XFER_PIO_3, 0x0c829c84 },
{ XFER_PIO_2, 0x0c829ca6 },
{ XFER_PIO_1, 0x0d029d26 },
{ XFER_PIO_0, 0x0d029d5e },
- { 0, 0x0d029d5e }
};
/**
@@ -128,12 +127,15 @@ static int hpt3x2n_cable_detect(struct ata_port *ap)
pci_read_config_byte(pdev, 0x5B, &scr2);
pci_write_config_byte(pdev, 0x5B, scr2 & ~0x01);
+
+ udelay(10); /* debounce */
+
/* Cable register now active */
pci_read_config_byte(pdev, 0x5A, &ata66);
/* Restore state */
pci_write_config_byte(pdev, 0x5B, scr2);
- if (ata66 & (1 << ap->port_no))
+ if (ata66 & (2 >> ap->port_no))
return ATA_CBL_PATA40;
else
return ATA_CBL_PATA80;
@@ -185,9 +187,8 @@ static void hpt3x2n_set_piomode(struct ata_port *ap, struct ata_device *adev)
pci_read_config_dword(pdev, addr1, &reg);
mode = hpt3x2n_find_mode(ap, adev->pio_mode);
- mode &= ~0x8000000; /* No FIFO in PIO */
- mode &= ~0x30070000; /* Leave config bits alone */
- reg &= 0x30070000; /* Strip timing bits */
+ mode &= 0xCFC3FFFF; /* Leave DMA bits alone */
+ reg &= ~0xCFC3FFFF; /* Strip timing bits */
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -204,8 +205,7 @@ static void hpt3x2n_set_dmamode(struct ata_port *ap, struct ata_device *adev)
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
u32 addr1, addr2;
- u32 reg;
- u32 mode;
+ u32 reg, mode, mask;
u8 fast;
addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no);
@@ -216,11 +216,12 @@ static void hpt3x2n_set_dmamode(struct ata_port *ap, struct ata_device *adev)
fast &= ~0x07;
pci_write_config_byte(pdev, addr2, fast);
+ mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000;
+
pci_read_config_dword(pdev, addr1, &reg);
mode = hpt3x2n_find_mode(ap, adev->dma_mode);
- mode |= 0x8000000; /* FIFO in MWDMA or UDMA */
- mode &= ~0xC0000000; /* Leave config bits alone */
- reg &= 0xC0000000; /* Strip timing bits */
+ mode &= mask;
+ reg &= ~mask;
pci_write_config_dword(pdev, addr1, reg | mode);
}
@@ -447,10 +448,8 @@ static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id)
.port_ops = &hpt3x2n_port_ops
};
const struct ata_port_info *ppi[] = { &info, NULL };
-
+ u8 rev = dev->revision;
u8 irqmask;
- u32 class_rev;
-
unsigned int pci_mhz;
unsigned int f_low, f_high;
int adjust;
@@ -462,26 +461,23 @@ static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (rc)
return rc;
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xFF;
-
switch(dev->device) {
case PCI_DEVICE_ID_TTI_HPT366:
- if (class_rev < 6)
+ if (rev < 6)
return -ENODEV;
break;
case PCI_DEVICE_ID_TTI_HPT371:
- if (class_rev < 2)
+ if (rev < 2)
return -ENODEV;
/* 371N if rev > 1 */
break;
case PCI_DEVICE_ID_TTI_HPT372:
/* 372N if rev >= 2*/
- if (class_rev < 2)
+ if (rev < 2)
return -ENODEV;
break;
case PCI_DEVICE_ID_TTI_HPT302:
- if (class_rev < 2)
+ if (rev < 2)
return -ENODEV;
break;
case PCI_DEVICE_ID_TTI_HPT372N:
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index 7e310253b36b..c86c71639a95 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -255,8 +255,17 @@ static int hpt3x3_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
#ifdef CONFIG_PM
static int hpt3x3_reinit_one(struct pci_dev *dev)
{
+ struct ata_host *host = dev_get_drvdata(&dev->dev);
+ int rc;
+
+ rc = ata_pci_device_do_resume(dev);
+ if (rc)
+ return rc;
+
hpt3x3_init_chipset(dev);
- return ata_pci_device_resume(dev);
+
+ ata_host_resume(host);
+ return 0;
}
#endif
diff --git a/drivers/ata/pata_it8213.c b/drivers/ata/pata_it8213.c
index f156da8076f7..8f3325adceb3 100644
--- a/drivers/ata/pata_it8213.c
+++ b/drivers/ata/pata_it8213.c
@@ -22,7 +22,7 @@
#define DRV_VERSION "0.0.3"
/**
- * it8213_pre_reset - check for 40/80 pin
+ * it8213_pre_reset - probe begin
* @link: link
* @deadline: deadline jiffies for the operation
*
@@ -92,18 +92,17 @@ static void it8213_set_piomode (struct ata_port *ap, struct ata_device *adev)
{ 2, 1 },
{ 2, 3 }, };
- if (pio > 2)
- control |= 1; /* TIME1 enable */
+ if (pio > 1)
+ control |= 1; /* TIME */
if (ata_pio_need_iordy(adev)) /* PIO 3/4 require IORDY */
- control |= 2; /* IORDY enable */
+ control |= 2; /* IE */
/* Bit 2 is set for ATAPI on the IT8213 - reverse of ICH/PIIX */
if (adev->class != ATA_DEV_ATA)
- control |= 4;
+ control |= 4; /* PPE */
pci_read_config_word(dev, idetm_port, &idetm_data);
- /* Enable PPE, IE and TIME as appropriate */
-
+ /* Set PPE, IE, and TIME as appropriate */
if (adev->devno == 0) {
idetm_data &= 0xCCF0;
idetm_data |= control;
@@ -112,17 +111,17 @@ static void it8213_set_piomode (struct ata_port *ap, struct ata_device *adev)
} else {
u8 slave_data;
- idetm_data &= 0xCC0F;
+ idetm_data &= 0xFF0F;
idetm_data |= (control << 4);
/* Slave timing in separate register */
pci_read_config_byte(dev, 0x44, &slave_data);
slave_data &= 0xF0;
- slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << 4;
+ slave_data |= (timings[pio][0] << 2) | timings[pio][1];
pci_write_config_byte(dev, 0x44, slave_data);
}
- idetm_data |= 0x4000; /* Ensure SITRE is enabled */
+ idetm_data |= 0x4000; /* Ensure SITRE is set */
pci_write_config_word(dev, idetm_port, idetm_data);
}
@@ -173,10 +172,10 @@ static void it8213_set_dmamode (struct ata_port *ap, struct ata_device *adev)
udma_enable |= (1 << devid);
- /* Load the UDMA mode number */
+ /* Load the UDMA cycle time */
pci_read_config_word(dev, 0x4A, &udma_timing);
udma_timing &= ~(3 << (4 * devid));
- udma_timing |= (udma & 3) << (4 * devid);
+ udma_timing |= u_speed << (4 * devid);
pci_write_config_word(dev, 0x4A, udma_timing);
/* Load the clock selection */
@@ -211,7 +210,7 @@ static void it8213_set_dmamode (struct ata_port *ap, struct ata_device *adev)
master_data &= 0xFF4F; /* Mask out IORDY|TIME1|DMAONLY */
master_data |= control << 4;
pci_read_config_byte(dev, 0x44, &slave_data);
- slave_data &= (0x0F + 0xE1 * ap->port_no);
+ slave_data &= 0xF0;
/* Load the matching timing */
slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) << (ap->port_no ? 4 : 0);
pci_write_config_byte(dev, 0x44, slave_data);
@@ -263,7 +262,7 @@ static int it8213_init_one (struct pci_dev *pdev, const struct pci_device_id *en
static const struct ata_port_info info = {
.flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
- .mwdma_mask = ATA_MWDMA2,
+ .mwdma_mask = ATA_MWDMA12_ONLY,
.udma_mask = ATA_UDMA4, /* FIXME: want UDMA 100? */
.port_ops = &it8213_ops,
};
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 188bc2fcd22c..edc5c1fed150 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -955,7 +955,7 @@ static int it821x_reinit_one(struct pci_dev *pdev)
static const struct pci_device_id it821x[] = {
{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), },
{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), },
- { PCI_VDEVICE(RDC, 0x1010), },
+ { PCI_VDEVICE(RDC, PCI_DEVICE_ID_RDC_D1010), },
{ },
};
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 6932e56d179c..9df1ff7e1eaa 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -25,6 +25,13 @@
* http://www.ryston.cz/petr/vlb/pdc20230b.html
* http://www.ryston.cz/petr/vlb/pdc20230c.html
* http://www.ryston.cz/petr/vlb/pdc20630.html
+ * QDI65x0:
+ * http://www.ryston.cz/petr/vlb/qd6500.html
+ * http://www.ryston.cz/petr/vlb/qd6580.html
+ *
+ * QDI65x0 probe code based on drivers/ide/legacy/qd65xx.c
+ * Rewritten from the work of Colten Edwards <pje120@cs.usask.ca> by
+ * Samuel Thibault <samuel.thibault@ens-lyon.org>
*
* Unsupported but docs exist:
* Appian/Adaptec AIC25VL01/Cirrus Logic PD7220
@@ -35,7 +42,7 @@
* the MPIIX where the tuning is PCI side but the IDE is "ISA side".
*
* Specific support is included for the ht6560a/ht6560b/opti82c611a/
- * opti82c465mv/promise 20230c/20630/winbond83759A
+ * opti82c465mv/promise 20230c/20630/qdi65x0/winbond83759A
*
* Use the autospeed and pio_mask options with:
* Appian ADI/2 aka CLPD7220 or AIC25VL01.
@@ -672,7 +679,7 @@ static void qdi6580dp_set_piomode(struct ata_port *ap, struct ata_device *adev)
outb(timing, ld_qdi->timing + 2 * ap->port_no);
/* Clear the FIFO */
if (adev->class != ATA_DEV_ATA)
- outb(0x5F, ld_qdi->timing + 3);
+ outb(0x5F, (ld_qdi->timing & 0xFFF0) + 3);
}
/**
@@ -707,7 +714,7 @@ static void qdi6580_set_piomode(struct ata_port *ap, struct ata_device *adev)
outb(timing, ld_qdi->timing + 2 * adev->devno);
/* Clear the FIFO */
if (adev->class != ATA_DEV_ATA)
- outb(0x5F, ld_qdi->timing + 3);
+ outb(0x5F, (ld_qdi->timing & 0xFFF0) + 3);
}
/**
@@ -787,6 +794,7 @@ static struct ata_port_operations qdi6580_port_ops = {
static struct ata_port_operations qdi6580dp_port_ops = {
.inherits = &legacy_base_port_ops,
.set_piomode = qdi6580dp_set_piomode,
+ .qc_issue = qdi_qc_issue,
.sff_data_xfer = vlb32_data_xfer,
};
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index 2096fb737f82..950da39cae3d 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -58,7 +58,7 @@ static int marvell_pata_active(struct pci_dev *pdev)
}
/**
- * marvell_pre_reset - check for 40/80 pin
+ * marvell_pre_reset - probe begin
* @link: link
* @deadline: deadline jiffies for the operation
*
diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index 773b1590b492..061aa1c41a48 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c
@@ -325,6 +325,13 @@ static struct scsi_host_template ns87415_sht = {
ATA_BMDMA_SHT(DRV_NAME),
};
+static void ns87415_fixup(struct pci_dev *pdev)
+{
+ /* Select 512 byte sectors */
+ pci_write_config_byte(pdev, 0x55, 0xEE);
+ /* Select PIO0 8bit clocking */
+ pci_write_config_byte(pdev, 0x54, 0xB7);
+}
/**
* ns87415_init_one - Register 87415 ATA PCI device with kernel services
@@ -371,10 +378,8 @@ static int ns87415_init_one (struct pci_dev *pdev, const struct pci_device_id *e
if (rc)
return rc;
- /* Select 512 byte sectors */
- pci_write_config_byte(pdev, 0x55, 0xEE);
- /* Select PIO0 8bit clocking */
- pci_write_config_byte(pdev, 0x54, 0xB7);
+ ns87415_fixup(pdev);
+
return ata_pci_sff_init_one(pdev, ppi, &ns87415_sht, NULL);
}
@@ -384,6 +389,23 @@ static const struct pci_device_id ns87415_pci_tbl[] = {
{ } /* terminate list */
};
+#ifdef CONFIG_PM
+static int ns87415_reinit_one(struct pci_dev *pdev)
+{
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ int rc;
+
+ rc = ata_pci_device_do_resume(pdev);
+ if (rc)
+ return rc;
+
+ ns87415_fixup(pdev);
+
+ ata_host_resume(host);
+ return 0;
+}
+#endif
+
static struct pci_driver ns87415_pci_driver = {
.name = DRV_NAME,
.id_table = ns87415_pci_tbl,
@@ -391,7 +413,7 @@ static struct pci_driver ns87415_pci_driver = {
.remove = ata_pci_remove_one,
#ifdef CONFIG_PM
.suspend = ata_pci_device_suspend,
- .resume = ata_pci_device_resume,
+ .resume = ns87415_reinit_one,
#endif
};
diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c
index 84ac5033ac89..9a8687db6b2d 100644
--- a/drivers/ata/pata_oldpiix.c
+++ b/drivers/ata/pata_oldpiix.c
@@ -239,7 +239,7 @@ static int oldpiix_init_one (struct pci_dev *pdev, const struct pci_device_id *e
static const struct ata_port_info info = {
.flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
- .mwdma_mask = ATA_MWDMA2,
+ .mwdma_mask = ATA_MWDMA12_ONLY,
.port_ops = &oldpiix_pata_ops,
};
const struct ata_port_info *ppi[] = { &info, NULL };
diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c
new file mode 100644
index 000000000000..bfe0180f3efa
--- /dev/null
+++ b/drivers/ata/pata_piccolo.c
@@ -0,0 +1,140 @@
+/*
+ * pata_piccolo.c - Toshiba Piccolo PATA/SATA controller driver.
+ *
+ * This is basically an update to ata_generic.c to add Toshiba Piccolo support
+ * then split out to keep ata_generic "clean".
+ *
+ * Copyright 2005 Red Hat Inc, all rights reserved.
+ *
+ * Elements from ide/pci/generic.c
+ * Copyright (C) 2001-2002 Andre Hedrick <andre@linux-ide.org>
+ * Portions (C) Copyright 2002 Red Hat Inc <alan@redhat.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public License
+ *
+ * The timing data tables/programming info are courtesy of the NetBSD driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <scsi/scsi_host.h>
+#include <linux/libata.h>
+
+#define DRV_NAME "pata_piccolo"
+#define DRV_VERSION "0.0.1"
+
+
+
+static void tosh_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+ static const u16 pio[6] = { /* For reg 0x50 low word & E088 */
+ 0x0566, 0x0433, 0x0311, 0x0201, 0x0200, 0x0100
+ };
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ u16 conf;
+ pci_read_config_word(pdev, 0x50, &conf);
+ conf &= 0xE088;
+ conf |= pio[adev->pio_mode - XFER_PIO_0];
+ pci_write_config_word(pdev, 0x50, conf);
+}
+
+static void tosh_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+ struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+ u32 conf;
+ pci_read_config_dword(pdev, 0x5C, &conf);
+ conf &= 0x78FFE088; /* Keep the other bits */
+ if (adev->dma_mode >= XFER_UDMA_0) {
+ int udma = adev->dma_mode - XFER_UDMA_0;
+ conf |= 0x80000000;
+ conf |= (udma + 2) << 28;
+ conf |= (2 - udma) * 0x111; /* spread into three nibbles */
+ } else {
+ static const u32 mwdma[4] = {
+ 0x0655, 0x0200, 0x0200, 0x0100
+ };
+ conf |= mwdma[adev->dma_mode - XFER_MW_DMA_0];
+ }
+ pci_write_config_dword(pdev, 0x5C, conf);
+}
+
+
+static struct scsi_host_template tosh_sht = {
+ ATA_BMDMA_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations tosh_port_ops = {
+ .inherits = &ata_bmdma_port_ops,
+ .cable_detect = ata_cable_unknown,
+ .set_piomode = tosh_set_piomode,
+ .set_dmamode = tosh_set_dmamode
+};
+
+/**
+ * ata_tosh_init - attach generic IDE
+ * @dev: PCI device found
+ * @id: match entry
+ *
+ * Called each time a matching IDE interface is found. We check if the
+ * interface is one we wish to claim and if so we perform any chip
+ * specific hacks then let the ATA layer do the heavy lifting.
+ */
+
+static int ata_tosh_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ static const struct ata_port_info info = {
+ .flags = ATA_FLAG_SLAVE_POSS,
+ .pio_mask = ATA_PIO5,
+ .mwdma_mask = ATA_MWDMA2,
+ .udma_mask = ATA_UDMA2,
+ .port_ops = &tosh_port_ops
+ };
+ const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
+ /* Just one port for the moment */
+ return ata_pci_sff_init_one(dev, ppi, &tosh_sht, NULL);
+}
+
+static struct pci_device_id ata_tosh[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), },
+ { 0, },
+};
+
+static struct pci_driver ata_tosh_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = ata_tosh,
+ .probe = ata_tosh_init_one,
+ .remove = ata_pci_remove_one,
+#ifdef CONFIG_PM
+ .suspend = ata_pci_device_suspend,
+ .resume = ata_pci_device_resume,
+#endif
+};
+
+static int __init ata_tosh_init(void)
+{
+ return pci_register_driver(&ata_tosh_pci_driver);
+}
+
+
+static void __exit ata_tosh_exit(void)
+{
+ pci_unregister_driver(&ata_tosh_pci_driver);
+}
+
+
+MODULE_AUTHOR("Alan Cox");
+MODULE_DESCRIPTION("Low level driver for Toshiba Piccolo ATA");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ata_tosh);
+MODULE_VERSION(DRV_VERSION);
+
+module_init(ata_tosh_init);
+module_exit(ata_tosh_exit);
+
diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c
index 4401b332eaab..4fd25e737d9a 100644
--- a/drivers/ata/pata_radisys.c
+++ b/drivers/ata/pata_radisys.c
@@ -139,9 +139,9 @@ static void radisys_set_dmamode (struct ata_port *ap, struct ata_device *adev)
pci_read_config_byte(dev, 0x4A, &udma_mode);
if (adev->xfer_mode == XFER_UDMA_2)
- udma_mode &= ~ (1 << adev->devno);
+ udma_mode &= ~(2 << (adev->devno * 4));
else /* UDMA 4 */
- udma_mode |= (1 << adev->devno);
+ udma_mode |= (2 << (adev->devno * 4));
pci_write_config_byte(dev, 0x4A, udma_mode);
diff --git a/drivers/ata/pata_rdc.c b/drivers/ata/pata_rdc.c
index c843a1e07c4f..237a24d41a2d 100644
--- a/drivers/ata/pata_rdc.c
+++ b/drivers/ata/pata_rdc.c
@@ -284,7 +284,7 @@ static struct ata_port_info rdc_port_info = {
.flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
- .mwdma_mask = ATA_MWDMA2,
+ .mwdma_mask = ATA_MWDMA12_ONLY,
.udma_mask = ATA_UDMA5,
.port_ops = &rdc_pata_ops,
};
diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index a5e4dfe60b41..2932998fc4c6 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c
@@ -105,11 +105,20 @@ static int rz1000_init_one (struct pci_dev *pdev, const struct pci_device_id *en
#ifdef CONFIG_PM
static int rz1000_reinit_one(struct pci_dev *pdev)
{
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ int rc;
+
+ rc = ata_pci_device_do_resume(pdev);
+ if (rc)
+ return rc;
+
/* If this fails on resume (which is a "cant happen" case), we
must stop as any progress risks data loss */
if (rz1000_fifo_disable(pdev))
panic("rz1000 fifo");
- return ata_pci_device_resume(pdev);
+
+ ata_host_resume(host);
+ return 0;
}
#endif
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 4cb649d8d38c..a2ace48a4610 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -212,13 +212,11 @@ static struct ata_port_operations sil680_port_ops = {
static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio)
{
- u32 class_rev = 0;
u8 tmpbyte = 0;
- pci_read_config_dword(pdev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xff;
/* FIXME: double check */
- pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, (class_rev) ? 1 : 255);
+ pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+ pdev->revision ? 1 : 255);
pci_write_config_byte(pdev, 0x80, 0x00);
pci_write_config_byte(pdev, 0x84, 0x00);
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index 488e77bcd22b..5c30d56dec84 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -2,7 +2,7 @@
* pata_sis.c - SiS ATA driver
*
* (C) 2005 Red Hat
- * (C) 2007 Bartlomiej Zolnierkiewicz
+ * (C) 2007,2009 Bartlomiej Zolnierkiewicz
*
* Based upon linux/drivers/ide/pci/sis5513.c
* Copyright (C) 1999-2000 Andre Hedrick <andre@linux-ide.org>
@@ -829,6 +829,23 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
return ata_pci_sff_init_one(pdev, ppi, &sis_sht, chipset);
}
+#ifdef CONFIG_PM
+static int sis_reinit_one(struct pci_dev *pdev)
+{
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ int rc;
+
+ rc = ata_pci_device_do_resume(pdev);
+ if (rc)
+ return rc;
+
+ sis_fixup(pdev, host->private_data);
+
+ ata_host_resume(host);
+ return 0;
+}
+#endif
+
static const struct pci_device_id sis_pci_tbl[] = {
{ PCI_VDEVICE(SI, 0x5513), }, /* SiS 5513 */
{ PCI_VDEVICE(SI, 0x5518), }, /* SiS 5518 */
@@ -844,7 +861,7 @@ static struct pci_driver sis_pci_driver = {
.remove = ata_pci_remove_one,
#ifdef CONFIG_PM
.suspend = ata_pci_device_suspend,
- .resume = ata_pci_device_resume,
+ .resume = sis_reinit_one,
#endif
};
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 88984b803d6d..0d97890af681 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -303,14 +303,21 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev, int mo
}
/* Set UDMA unless device is not UDMA capable */
- if (udma_type && t.udma) {
- u8 cable80_status;
+ if (udma_type) {
+ u8 udma_etc;
- /* Get 80-wire cable detection bit */
- pci_read_config_byte(pdev, 0x50 + offset, &cable80_status);
- cable80_status &= 0x10;
+ pci_read_config_byte(pdev, 0x50 + offset, &udma_etc);
- pci_write_config_byte(pdev, 0x50 + offset, ut | cable80_status);
+ /* clear transfer mode bit */
+ udma_etc &= ~0x20;
+
+ if (t.udma) {
+ /* preserve 80-wire cable detection bit */
+ udma_etc &= 0x10;
+ udma_etc |= ut;
+ }
+
+ pci_write_config_byte(pdev, 0x50 + offset, udma_etc);
}
}
@@ -337,6 +344,32 @@ static void via_set_dmamode(struct ata_port *ap, struct ata_device *adev)
}
/**
+ * via_mode_filter - filter buggy device/mode pairs
+ * @dev: ATA device
+ * @mask: Mode bitmask
+ *
+ * We need to apply some minimal filtering for old controllers and at least
+ * one breed of Transcend SSD. Return the updated mask.
+ */
+
+static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
+{
+ struct ata_host *host = dev->link->ap->host;
+ const struct via_isa_bridge *config = host->private_data;
+ unsigned char model_num[ATA_ID_PROD_LEN + 1];
+
+ if (config->id == PCI_DEVICE_ID_VIA_82C586_0) {
+ ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
+ if (strcmp(model_num, "TS64GSSD25-M") == 0) {
+ ata_dev_printk(dev, KERN_WARNING,
+ "disabling UDMA mode due to reported lockups with this device.\n");
+ mask &= ~ ATA_MASK_UDMA;
+ }
+ }
+ return ata_bmdma_mode_filter(dev, mask);
+}
+
+/**
* via_tf_load - send taskfile registers to host controller
* @ap: Port to which output is sent
* @tf: ATA taskfile register set
@@ -427,6 +460,7 @@ static struct ata_port_operations via_port_ops = {
.prereset = via_pre_reset,
.sff_tf_load = via_tf_load,
.port_start = via_port_start,
+ .mode_filter = via_mode_filter,
};
static struct ata_port_operations via_port_ops_noirq = {
@@ -526,7 +560,7 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
.port_ops = &via_port_ops
};
const struct ata_port_info *ppi[] = { NULL, NULL };
- struct pci_dev *isa = NULL;
+ struct pci_dev *isa;
const struct via_isa_bridge *config;
static int printed_version;
u8 enable;
@@ -551,15 +585,13 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if ((isa = pci_get_device(PCI_VENDOR_ID_VIA +
!!(config->flags & VIA_BAD_ID),
config->id, NULL))) {
+ u8 rev = isa->revision;
+ pci_dev_put(isa);
- if (isa->revision >= config->rev_min &&
- isa->revision <= config->rev_max)
+ if (rev >= config->rev_min && rev <= config->rev_max)
break;
- pci_dev_put(isa);
}
- pci_dev_put(isa);
-
if (!(config->flags & VIA_NO_ENABLES)) {
/* 0x40 low bits indicate enabled channels */
pci_read_config_byte(pdev, 0x40 , &enable);
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 172b57e6543f..8a5d35b759dd 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -34,7 +34,7 @@ enum {
SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
- ATA_FLAG_PMP | ATA_FLAG_NCQ),
+ ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),
SATA_FSL_MAX_CMDS = SATA_FSL_QUEUE_DEPTH,
SATA_FSL_CMD_HDR_SIZE = 16, /* 4 DWORDS */
@@ -132,7 +132,7 @@ enum {
INT_ON_SINGL_DEVICE_ERR = (1 << 1),
INT_ON_CMD_COMPLETE = 1,
- INT_ON_ERROR = INT_ON_FATAL_ERR |
+ INT_ON_ERROR = INT_ON_FATAL_ERR | INT_ON_SNOTIFY_UPDATE |
INT_ON_PHYRDY_CHG | INT_ON_SINGL_DEVICE_ERR,
/*
@@ -153,7 +153,7 @@ enum {
IE_ON_CMD_COMPLETE = 1,
DEFAULT_PORT_IRQ_ENABLE_MASK = IE_ON_FATAL_ERR | IE_ON_PHYRDY_CHG |
- IE_ON_SIGNATURE_UPDATE |
+ IE_ON_SIGNATURE_UPDATE | IE_ON_SNOTIFY_UPDATE |
IE_ON_SINGL_DEVICE_ERR | IE_ON_CMD_COMPLETE,
EXT_INDIRECT_SEG_PRD_FLAG = (1 << 31),
@@ -992,9 +992,8 @@ static void sata_fsl_error_intr(struct ata_port *ap)
*/
sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
- if (unlikely(SError & 0xFFFF0000)) {
+ if (unlikely(SError & 0xFFFF0000))
sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);
- }
DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
hstatus, cereg, ioread32(hcr_base + DE), SError);
@@ -1007,6 +1006,10 @@ static void sata_fsl_error_intr(struct ata_port *ap)
freeze = 1;
}
+ /* Handle SDB FIS receive & notify update */
+ if (hstatus & INT_ON_SNOTIFY_UPDATE)
+ sata_async_notification(ap);
+
/* Handle PHYRDY change notification */
if (hstatus & INT_ON_PHYRDY_CHG) {
DPRINTK("SATA FSL: PHYRDY change indication\n");
@@ -1070,9 +1073,9 @@ static void sata_fsl_error_intr(struct ata_port *ap)
}
/* record error info */
- if (qc) {
+ if (qc)
qc->err_mask |= err_mask;
- } else
+ else
ehi->err_mask |= err_mask;
ehi->action |= action;
@@ -1103,7 +1106,6 @@ static void sata_fsl_host_intr(struct ata_port *ap)
if (unlikely(SError & 0xFFFF0000)) {
DPRINTK("serror @host_intr : 0x%x\n", SError);
sata_fsl_error_intr(ap);
-
}
if (unlikely(hstatus & INT_ON_ERROR)) {
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 6f5093b7c8c5..a8a7be0d06ff 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2217,7 +2217,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc)
int err = 0;
ata_tf_to_fis(&qc->tf, link->pmp, 1, (void *)fis);
- err = mv_send_fis(ap, fis, sizeof(fis) / sizeof(fis[0]));
+ err = mv_send_fis(ap, fis, ARRAY_SIZE(fis));
if (err)
return err;
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index e6946fc527d0..1370df6c420c 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -417,6 +417,10 @@ static struct ata_port_operations sil24_ops = {
#endif
};
+static int sata_sil24_msi; /* Disable MSI */
+module_param_named(msi, sata_sil24_msi, bool, S_IRUGO);
+MODULE_PARM_DESC(msi, "Enable MSI (Default: false)");
+
/*
* Use bits 30-31 of port_flags to encode available port numbers.
* Current maxium is 4.
@@ -1340,6 +1344,11 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
sil24_init_controller(host);
+ if (sata_sil24_msi && !pci_enable_msi(pdev)) {
+ dev_printk(KERN_INFO, &pdev->dev, "Using MSI\n");
+ pci_intx(pdev, 0);
+ }
+
pci_set_master(pdev);
return ata_host_activate(host, pdev->irq, sil24_interrupt, IRQF_SHARED,
&sil24_sht);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 846d89e3d122..5a01ecef4af3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -185,6 +185,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
}
dev->power.runtime_status = RPM_SUSPENDING;
+ dev->power.deferred_resume = false;
if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
spin_unlock_irq(&dev->power.lock);
@@ -200,7 +201,6 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
if (retval) {
dev->power.runtime_status = RPM_ACTIVE;
pm_runtime_cancel_pending(dev);
- dev->power.deferred_resume = false;
if (retval == -EAGAIN || retval == -EBUSY) {
notify = true;
@@ -217,7 +217,6 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
wake_up_all(&dev->power.wait_queue);
if (dev->power.deferred_resume) {
- dev->power.deferred_resume = false;
__pm_runtime_resume(dev, false);
retval = -EAGAIN;
goto out;
@@ -626,6 +625,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
goto out;
dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
+ if (!dev->power.timer_expires)
+ dev->power.timer_expires = 1;
mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
out:
@@ -659,13 +660,17 @@ static int __pm_request_resume(struct device *dev)
pm_runtime_deactivate_timer(dev);
+ if (dev->power.runtime_status == RPM_SUSPENDING) {
+ dev->power.deferred_resume = true;
+ return retval;
+ }
if (dev->power.request_pending) {
/* If non-resume request is pending, we can overtake it. */
dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME;
return retval;
- } else if (retval) {
- return retval;
}
+ if (retval)
+ return retval;
dev->power.request = RPM_REQ_RESUME;
dev->power.request_pending = true;
@@ -777,7 +782,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
}
if (parent) {
- spin_lock(&parent->power.lock);
+ spin_lock_nested(&parent->power.lock, SINGLE_DEPTH_NESTING);
/*
* It is invalid to put an active child under a parent that is
@@ -786,12 +791,10 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
*/
if (!parent->power.disable_depth
&& !parent->power.ignore_children
- && parent->power.runtime_status != RPM_ACTIVE) {
+ && parent->power.runtime_status != RPM_ACTIVE)
error = -EBUSY;
- } else {
- if (dev->power.runtime_status == RPM_SUSPENDED)
- atomic_inc(&parent->power.child_count);
- }
+ else if (dev->power.runtime_status == RPM_SUSPENDED)
+ atomic_inc(&parent->power.child_count);
spin_unlock(&parent->power.lock);
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1d886e079c58..77bfce52e9ca 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -271,6 +271,8 @@ config BLK_DEV_CRYPTOLOOP
instead, which can be configured to be on-disk compatible with the
cryptoloop device.
+source "drivers/block/drbd/Kconfig"
+
config BLK_DEV_NBD
tristate "Network block device support"
depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index cdaa3f8fddf0..aff5ac925c34 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o
obj-$(CONFIG_BLK_DEV_HD) += hd.o
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
+obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
swim_mod-objs := swim.o swim_asm.o
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 92b126394fa1..873e594860d3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -179,19 +179,17 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
static int deregister_disk(ctlr_info_t *h, int drv_index,
int clear_all, int via_ioctl);
-static void cciss_read_capacity(int ctlr, int logvol, int withirq,
+static void cciss_read_capacity(int ctlr, int logvol,
sector_t *total_size, unsigned int *block_size);
-static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
+static void cciss_read_capacity_16(int ctlr, int logvol,
sector_t *total_size, unsigned int *block_size);
static void cciss_geometry_inquiry(int ctlr, int logvol,
- int withirq, sector_t total_size,
+ sector_t total_size,
unsigned int block_size, InquiryData_struct *inq_buff,
drive_info_struct *drv);
static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
__u32);
static void start_io(ctlr_info_t *h);
-static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
- __u8 page_code, unsigned char *scsi3addr, int cmd_type);
static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
__u8 page_code, unsigned char scsi3addr[],
int cmd_type);
@@ -424,12 +422,9 @@ cciss_proc_write(struct file *file, const char __user *buf,
if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
struct seq_file *seq = file->private_data;
ctlr_info_t *h = seq->private;
- int rc;
- rc = cciss_engage_scsi(h->ctlr);
- if (rc != 0)
- err = -rc;
- else
+ err = cciss_engage_scsi(h->ctlr);
+ if (err == 0)
err = length;
} else
#endif /* CONFIG_CISS_SCSI_TAPE */
@@ -1657,9 +1652,11 @@ static void cciss_softirq_done(struct request *rq)
{
CommandList_struct *cmd = rq->completion_data;
ctlr_info_t *h = hba[cmd->ctlr];
+ SGDescriptor_struct *curr_sg = cmd->SG;
unsigned long flags;
u64bit temp64;
int i, ddir;
+ int sg_index = 0;
if (cmd->Request.Type.Direction == XFER_READ)
ddir = PCI_DMA_FROMDEVICE;
@@ -1669,9 +1666,22 @@ static void cciss_softirq_done(struct request *rq)
/* command did not need to be retried */
/* unmap the DMA mapping for all the scatter gather elements */
for (i = 0; i < cmd->Header.SGList; i++) {
- temp64.val32.lower = cmd->SG[i].Addr.lower;
- temp64.val32.upper = cmd->SG[i].Addr.upper;
- pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
+ if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
+ temp64.val32.lower = cmd->SG[i].Addr.lower;
+ temp64.val32.upper = cmd->SG[i].Addr.upper;
+ pci_dma_sync_single_for_cpu(h->pdev, temp64.val,
+ cmd->SG[i].Len, ddir);
+ pci_unmap_single(h->pdev, temp64.val,
+ cmd->SG[i].Len, ddir);
+ /* Point to the next block */
+ curr_sg = h->cmd_sg_list[cmd->cmdindex]->sgchain;
+ sg_index = 0;
+ }
+ temp64.val32.lower = curr_sg[sg_index].Addr.lower;
+ temp64.val32.upper = curr_sg[sg_index].Addr.upper;
+ pci_unmap_page(h->pdev, temp64.val, curr_sg[sg_index].Len,
+ ddir);
+ ++sg_index;
}
#ifdef CCISS_DEBUG
@@ -1701,7 +1711,7 @@ static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
* via the inquiry page 0. Model, vendor, and rev are set to empty strings if
* they cannot be read.
*/
-static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
+static void cciss_get_device_descr(int ctlr, int logvol,
char *vendor, char *model, char *rev)
{
int rc;
@@ -1717,14 +1727,8 @@ static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
return;
log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
- if (withirq)
- rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf,
- sizeof(InquiryData_struct), 0,
- scsi3addr, TYPE_CMD);
- else
- rc = sendcmd(CISS_INQUIRY, ctlr, inq_buf,
- sizeof(InquiryData_struct), 0,
- scsi3addr, TYPE_CMD);
+ rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf, sizeof(*inq_buf), 0,
+ scsi3addr, TYPE_CMD);
if (rc == IO_OK) {
memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
vendor[VENDOR_LEN] = '\0';
@@ -1743,7 +1747,7 @@ static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
* number cannot be had, for whatever reason, 16 bytes of 0xff
* are returned instead.
*/
-static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
+static void cciss_get_serial_no(int ctlr, int logvol,
unsigned char *serial_no, int buflen)
{
#define PAGE_83_INQ_BYTES 64
@@ -1759,12 +1763,8 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
return;
memset(serial_no, 0, buflen);
log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
- if (withirq)
- rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
- PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
- else
- rc = sendcmd(CISS_INQUIRY, ctlr, buf,
- PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
+ rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
+ PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
if (rc == IO_OK)
memcpy(serial_no, &buf[8], buflen);
kfree(buf);
@@ -1793,10 +1793,10 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
/* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
+ blk_queue_max_hw_segments(disk->queue, h->maxsgentries);
/* This is a limit in the driver and could be eliminated. */
- blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
+ blk_queue_max_phys_segments(disk->queue, h->maxsgentries);
blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
@@ -1852,18 +1852,16 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
/* testing to see if 16-byte CDBs are already being used */
if (h->cciss_read == CCISS_READ_16) {
- cciss_read_capacity_16(h->ctlr, drv_index, 1,
+ cciss_read_capacity_16(h->ctlr, drv_index,
&total_size, &block_size);
} else {
- cciss_read_capacity(ctlr, drv_index, 1,
- &total_size, &block_size);
-
+ cciss_read_capacity(ctlr, drv_index, &total_size, &block_size);
/* if read_capacity returns all F's this volume is >2TB */
/* in size so we switch to 16-byte CDB's for all */
/* read/write ops */
if (total_size == 0xFFFFFFFFULL) {
- cciss_read_capacity_16(ctlr, drv_index, 1,
+ cciss_read_capacity_16(ctlr, drv_index,
&total_size, &block_size);
h->cciss_read = CCISS_READ_16;
h->cciss_write = CCISS_WRITE_16;
@@ -1873,14 +1871,14 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
}
}
- cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
+ cciss_geometry_inquiry(ctlr, drv_index, total_size, block_size,
inq_buff, drvinfo);
drvinfo->block_size = block_size;
drvinfo->nr_blocks = total_size + 1;
- cciss_get_device_descr(ctlr, drv_index, 1, drvinfo->vendor,
+ cciss_get_device_descr(ctlr, drv_index, drvinfo->vendor,
drvinfo->model, drvinfo->rev);
- cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
+ cciss_get_serial_no(ctlr, drv_index, drvinfo->serial_no,
sizeof(drvinfo->serial_no));
/* Save the lunid in case we deregister the disk, below. */
memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
@@ -2531,6 +2529,8 @@ static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
case 0: return IO_OK; /* no sense */
case 1: return IO_OK; /* recovered error */
default:
+ if (check_for_unit_attention(h, c))
+ return IO_NEEDS_RETRY;
printk(KERN_WARNING "cciss%d: cmd 0x%02x "
"check condition, sense key = 0x%02x\n",
h->ctlr, c->Request.CDB[0],
@@ -2672,7 +2672,7 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
}
static void cciss_geometry_inquiry(int ctlr, int logvol,
- int withirq, sector_t total_size,
+ sector_t total_size,
unsigned int block_size,
InquiryData_struct *inq_buff,
drive_info_struct *drv)
@@ -2683,14 +2683,8 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
memset(inq_buff, 0, sizeof(InquiryData_struct));
log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
- if (withirq)
- return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
- inq_buff, sizeof(*inq_buff),
- 0xC1, scsi3addr, TYPE_CMD);
- else
- return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
- sizeof(*inq_buff), 0xC1, scsi3addr,
- TYPE_CMD);
+ return_code = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buff,
+ sizeof(*inq_buff), 0xC1, scsi3addr, TYPE_CMD);
if (return_code == IO_OK) {
if (inq_buff->data_byte[8] == 0xFF) {
printk(KERN_WARNING
@@ -2723,7 +2717,7 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
}
static void
-cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
+cciss_read_capacity(int ctlr, int logvol, sector_t *total_size,
unsigned int *block_size)
{
ReadCapdata_struct *buf;
@@ -2737,14 +2731,8 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
}
log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
- if (withirq)
- return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
- ctlr, buf, sizeof(ReadCapdata_struct),
- 0, scsi3addr, TYPE_CMD);
- else
- return_code = sendcmd(CCISS_READ_CAPACITY,
- ctlr, buf, sizeof(ReadCapdata_struct),
- 0, scsi3addr, TYPE_CMD);
+ return_code = sendcmd_withirq(CCISS_READ_CAPACITY, ctlr, buf,
+ sizeof(ReadCapdata_struct), 0, scsi3addr, TYPE_CMD);
if (return_code == IO_OK) {
*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
@@ -2756,8 +2744,8 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
kfree(buf);
}
-static void
-cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size, unsigned int *block_size)
+static void cciss_read_capacity_16(int ctlr, int logvol,
+ sector_t *total_size, unsigned int *block_size)
{
ReadCapdata_struct_16 *buf;
int return_code;
@@ -2770,16 +2758,9 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
}
log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
- if (withirq) {
- return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
- ctlr, buf, sizeof(ReadCapdata_struct_16),
- 0, scsi3addr, TYPE_CMD);
- }
- else {
- return_code = sendcmd(CCISS_READ_CAPACITY_16,
- ctlr, buf, sizeof(ReadCapdata_struct_16),
- 0, scsi3addr, TYPE_CMD);
- }
+ return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
+ ctlr, buf, sizeof(ReadCapdata_struct_16),
+ 0, scsi3addr, TYPE_CMD);
if (return_code == IO_OK) {
*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
@@ -2820,13 +2801,13 @@ static int cciss_revalidate(struct gendisk *disk)
return 1;
}
if (h->cciss_read == CCISS_READ_10) {
- cciss_read_capacity(h->ctlr, logvol, 1,
+ cciss_read_capacity(h->ctlr, logvol,
&total_size, &block_size);
} else {
- cciss_read_capacity_16(h->ctlr, logvol, 1,
+ cciss_read_capacity_16(h->ctlr, logvol,
&total_size, &block_size);
}
- cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
+ cciss_geometry_inquiry(h->ctlr, logvol, total_size, block_size,
inq_buff, drv);
blk_queue_logical_block_size(drv->queue, drv->block_size);
@@ -2837,167 +2818,6 @@ static int cciss_revalidate(struct gendisk *disk)
}
/*
- * Wait polling for a command to complete.
- * The memory mapped FIFO is polled for the completion.
- * Used only at init time, interrupts from the HBA are disabled.
- */
-static unsigned long pollcomplete(int ctlr)
-{
- unsigned long done;
- int i;
-
- /* Wait (up to 20 seconds) for a command to complete */
-
- for (i = 20 * HZ; i > 0; i--) {
- done = hba[ctlr]->access.command_completed(hba[ctlr]);
- if (done == FIFO_EMPTY)
- schedule_timeout_uninterruptible(1);
- else
- return done;
- }
- /* Invalid address to tell caller we ran out of time */
- return 1;
-}
-
-/* Send command c to controller h and poll for it to complete.
- * Turns interrupts off on the board. Used at driver init time
- * and during SCSI error recovery.
- */
-static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c)
-{
- int i;
- unsigned long complete;
- int status = IO_ERROR;
- u64bit buff_dma_handle;
-
-resend_cmd1:
-
- /* Disable interrupt on the board. */
- h->access.set_intr_mask(h, CCISS_INTR_OFF);
-
- /* Make sure there is room in the command FIFO */
- /* Actually it should be completely empty at this time */
- /* unless we are in here doing error handling for the scsi */
- /* tape side of the driver. */
- for (i = 200000; i > 0; i--) {
- /* if fifo isn't full go */
- if (!(h->access.fifo_full(h)))
- break;
- udelay(10);
- printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
- " waiting!\n", h->ctlr);
- }
- h->access.submit_command(h, c); /* Send the cmd */
- do {
- complete = pollcomplete(h->ctlr);
-
-#ifdef CCISS_DEBUG
- printk(KERN_DEBUG "cciss: command completed\n");
-#endif /* CCISS_DEBUG */
-
- if (complete == 1) {
- printk(KERN_WARNING
- "cciss cciss%d: SendCmd Timeout out, "
- "No command list address returned!\n", h->ctlr);
- status = IO_ERROR;
- break;
- }
-
- /* Make sure it's the command we're expecting. */
- if ((complete & ~CISS_ERROR_BIT) != c->busaddr) {
- printk(KERN_WARNING "cciss%d: Unexpected command "
- "completion.\n", h->ctlr);
- continue;
- }
-
- /* It is our command. If no error, we're done. */
- if (!(complete & CISS_ERROR_BIT)) {
- status = IO_OK;
- break;
- }
-
- /* There is an error... */
-
- /* if data overrun or underun on Report command ignore it */
- if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
- (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
- (c->Request.CDB[0] == CISS_INQUIRY)) &&
- ((c->err_info->CommandStatus == CMD_DATA_OVERRUN) ||
- (c->err_info->CommandStatus == CMD_DATA_UNDERRUN))) {
- complete = c->busaddr;
- status = IO_OK;
- break;
- }
-
- if (c->err_info->CommandStatus == CMD_UNSOLICITED_ABORT) {
- printk(KERN_WARNING "cciss%d: unsolicited abort %p\n",
- h->ctlr, c);
- if (c->retry_count < MAX_CMD_RETRIES) {
- printk(KERN_WARNING "cciss%d: retrying %p\n",
- h->ctlr, c);
- c->retry_count++;
- /* erase the old error information */
- memset(c->err_info, 0, sizeof(c->err_info));
- goto resend_cmd1;
- }
- printk(KERN_WARNING "cciss%d: retried %p too many "
- "times\n", h->ctlr, c);
- status = IO_ERROR;
- break;
- }
-
- if (c->err_info->CommandStatus == CMD_UNABORTABLE) {
- printk(KERN_WARNING "cciss%d: command could not be "
- "aborted.\n", h->ctlr);
- status = IO_ERROR;
- break;
- }
-
- if (c->err_info->CommandStatus == CMD_TARGET_STATUS) {
- status = check_target_status(h, c);
- break;
- }
-
- printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
- printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
- c->Request.CDB[0], c->err_info->CommandStatus);
- status = IO_ERROR;
- break;
-
- } while (1);
-
- /* unlock the data buffer from DMA */
- buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
- buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
- pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
- c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
- return status;
-}
-
-/*
- * Send a command to the controller, and wait for it to complete.
- * Used at init time, and during SCSI error recovery.
- */
-static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
- __u8 page_code, unsigned char *scsi3addr, int cmd_type)
-{
- CommandList_struct *c;
- int status;
-
- c = cmd_alloc(hba[ctlr], 1);
- if (!c) {
- printk(KERN_WARNING "cciss: unable to get memory");
- return IO_ERROR;
- }
- status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
- scsi3addr, cmd_type);
- if (status == IO_OK)
- status = sendcmd_core(hba[ctlr], c);
- cmd_free(hba[ctlr], c, 1);
- return status;
-}
-
-/*
* Map (physical) PCI mem into (virtual) kernel space
*/
static void __iomem *remap_pci_mem(ulong base, ulong size)
@@ -3255,9 +3075,13 @@ static void do_cciss_request(struct request_queue *q)
int seg;
struct request *creq;
u64bit temp64;
- struct scatterlist tmp_sg[MAXSGENTRIES];
+ struct scatterlist *tmp_sg;
+ SGDescriptor_struct *curr_sg;
drive_info_struct *drv;
int i, dir;
+ int nseg = 0;
+ int sg_index = 0;
+ int chained = 0;
/* We call start_io here in case there is a command waiting on the
* queue that has not been sent.
@@ -3270,13 +3094,14 @@ static void do_cciss_request(struct request_queue *q)
if (!creq)
goto startio;
- BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
+ BUG_ON(creq->nr_phys_segments > h->maxsgentries);
if ((c = cmd_alloc(h, 1)) == NULL)
goto full;
blk_start_request(creq);
+ tmp_sg = h->scatter_list[c->cmdindex];
spin_unlock_irq(q->queue_lock);
c->cmd_type = CMD_RWREQ;
@@ -3305,7 +3130,7 @@ static void do_cciss_request(struct request_queue *q)
(int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
#endif /* CCISS_DEBUG */
- sg_init_table(tmp_sg, MAXSGENTRIES);
+ sg_init_table(tmp_sg, h->maxsgentries);
seg = blk_rq_map_sg(q, creq, tmp_sg);
/* get the DMA records for the setup */
@@ -3314,25 +3139,70 @@ static void do_cciss_request(struct request_queue *q)
else
dir = PCI_DMA_TODEVICE;
+ curr_sg = c->SG;
+ sg_index = 0;
+ chained = 0;
+
for (i = 0; i < seg; i++) {
- c->SG[i].Len = tmp_sg[i].length;
+ if (((sg_index+1) == (h->max_cmd_sgentries)) &&
+ !chained && ((seg - i) > 1)) {
+ nseg = seg - i;
+ curr_sg[sg_index].Len = (nseg) *
+ sizeof(SGDescriptor_struct);
+ curr_sg[sg_index].Ext = CCISS_SG_CHAIN;
+
+ /* Point to next chain block. */
+ curr_sg = h->cmd_sg_list[c->cmdindex]->sgchain;
+ sg_index = 0;
+ chained = 1;
+ }
+ curr_sg[sg_index].Len = tmp_sg[i].length;
temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
- tmp_sg[i].offset,
- tmp_sg[i].length, dir);
- c->SG[i].Addr.lower = temp64.val32.lower;
- c->SG[i].Addr.upper = temp64.val32.upper;
- c->SG[i].Ext = 0; // we are not chaining
+ tmp_sg[i].offset,
+ tmp_sg[i].length, dir);
+ curr_sg[sg_index].Addr.lower = temp64.val32.lower;
+ curr_sg[sg_index].Addr.upper = temp64.val32.upper;
+ curr_sg[sg_index].Ext = 0; /* we are not chaining */
+
+ ++sg_index;
+ }
+
+ if (chained) {
+ int len;
+ curr_sg = c->SG;
+ sg_index = h->max_cmd_sgentries - 1;
+ len = curr_sg[sg_index].Len;
+ /* Setup pointer to next chain block.
+ * Fill out last element in current chain
+ * block with address of next chain block.
+ */
+ temp64.val = pci_map_single(h->pdev,
+ h->cmd_sg_list[c->cmdindex]->sgchain,
+ len, dir);
+
+ h->cmd_sg_list[c->cmdindex]->sg_chain_dma = temp64.val;
+ curr_sg[sg_index].Addr.lower = temp64.val32.lower;
+ curr_sg[sg_index].Addr.upper = temp64.val32.upper;
+
+ pci_dma_sync_single_for_device(h->pdev,
+ h->cmd_sg_list[c->cmdindex]->sg_chain_dma,
+ len, dir);
}
+
/* track how many SG entries we are using */
if (seg > h->maxSG)
h->maxSG = seg;
#ifdef CCISS_DEBUG
- printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
- blk_rq_sectors(creq), seg);
+ printk(KERN_DEBUG "cciss: Submitting %ld sectors in %d segments "
+ "chained[%d]\n",
+ blk_rq_sectors(creq), seg, chained);
#endif /* CCISS_DEBUG */
- c->Header.SGList = c->Header.SGTotal = seg;
+ c->Header.SGList = c->Header.SGTotal = seg + chained;
+ if (seg > h->max_cmd_sgentries)
+ c->Header.SGList = h->max_cmd_sgentries;
+
if (likely(blk_fs_request(creq))) {
if(h->cciss_read == CCISS_READ_10) {
c->Request.CDB[1] = 0;
@@ -3513,28 +3383,33 @@ static int add_to_scan_list(struct ctlr_info *h)
* @h: Pointer to the controller.
*
* Removes the controller from the rescan queue if present. Blocks if
- * the controller is currently conducting a rescan.
+ * the controller is currently conducting a rescan. The controller
+ * can be in one of three states:
+ * 1. Doesn't need a scan
+ * 2. On the scan list, but not scanning yet (we remove it)
+ * 3. Busy scanning (and not on the list). In this case we want to wait for
+ * the scan to complete to make sure the scanning thread for this
+ * controller is completely idle.
**/
static void remove_from_scan_list(struct ctlr_info *h)
{
struct ctlr_info *test_h, *tmp_h;
- int scanning = 0;
mutex_lock(&scan_mutex);
list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) {
- if (test_h == h) {
+ if (test_h == h) { /* state 2. */
list_del(&h->scan_list);
complete_all(&h->scan_wait);
mutex_unlock(&scan_mutex);
return;
}
}
- if (&h->busy_scanning)
- scanning = 0;
- mutex_unlock(&scan_mutex);
-
- if (scanning)
+ if (h->busy_scanning) { /* state 3. */
+ mutex_unlock(&scan_mutex);
wait_for_completion(&h->scan_wait);
+ } else { /* state 1, nothing to do. */
+ mutex_unlock(&scan_mutex);
+ }
}
/**
@@ -3573,13 +3448,11 @@ static int scan_thread(void *data)
h->busy_scanning = 1;
mutex_unlock(&scan_mutex);
- if (h) {
- rebuild_lun_table(h, 0, 0);
- complete_all(&h->scan_wait);
- mutex_lock(&scan_mutex);
- h->busy_scanning = 0;
- mutex_unlock(&scan_mutex);
- }
+ rebuild_lun_table(h, 0, 0);
+ complete_all(&h->scan_wait);
+ mutex_lock(&scan_mutex);
+ h->busy_scanning = 0;
+ mutex_unlock(&scan_mutex);
}
}
@@ -3605,8 +3478,22 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
case REPORT_LUNS_CHANGED:
printk(KERN_WARNING "cciss%d: report LUN data "
"changed\n", h->ctlr);
- add_to_scan_list(h);
- wake_up_process(cciss_scan_thread);
+ /*
+ * Here, we could call add_to_scan_list and wake up the scan thread,
+ * except that it's quite likely that we will get more than one
+ * REPORT_LUNS_CHANGED condition in quick succession, which means
+ * that those which occur after the first one will likely happen
+ * *during* the scan_thread's rescan. And the rescan code is not
+ * robust enough to restart in the middle, undoing what it has already
+ * done, and it's not clear that it's even possible to do this, since
+ * part of what it does is notify the block layer, which starts
+ * doing it's own i/o to read partition tables and so on, and the
+ * driver doesn't have visibility to know what might need undoing.
+ * In any event, if possible, it is horribly complicated to get right
+ * so we just don't do it for now.
+ *
+ * Note: this REPORT_LUNS_CHANGED condition only occurs on the MSA2012.
+ */
return 1;
break;
case POWER_OR_RESET:
@@ -3888,6 +3775,23 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
* leave a little room for ioctl calls.
*/
c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
+ c->maxsgentries = readl(&(c->cfgtable->MaxSGElements));
+
+ /*
+ * Limit native command to 32 s/g elements to save dma'able memory.
+ * Howvever spec says if 0, use 31
+ */
+
+ c->max_cmd_sgentries = 31;
+ if (c->maxsgentries > 512) {
+ c->max_cmd_sgentries = 32;
+ c->chainsize = c->maxsgentries - c->max_cmd_sgentries + 1;
+ c->maxsgentries -= 1; /* account for chain pointer */
+ } else {
+ c->maxsgentries = 31; /* Default to traditional value */
+ c->chainsize = 0; /* traditional */
+ }
+
c->product_name = products[prod_index].product_name;
c->access = *(products[prod_index].access);
c->nr_cmds = c->max_commands - 4;
@@ -4214,6 +4118,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
{
int i;
int j = 0;
+ int k = 0;
int rc;
int dac, return_code;
InquiryData_struct *inq_buff;
@@ -4317,6 +4222,53 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
printk(KERN_ERR "cciss: out of memory");
goto clean4;
}
+
+ /* Need space for temp scatter list */
+ hba[i]->scatter_list = kmalloc(hba[i]->max_commands *
+ sizeof(struct scatterlist *),
+ GFP_KERNEL);
+ for (k = 0; k < hba[i]->nr_cmds; k++) {
+ hba[i]->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
+ hba[i]->maxsgentries,
+ GFP_KERNEL);
+ if (hba[i]->scatter_list[k] == NULL) {
+ printk(KERN_ERR "cciss%d: could not allocate "
+ "s/g lists\n", i);
+ goto clean4;
+ }
+ }
+ hba[i]->cmd_sg_list = kmalloc(sizeof(struct Cmd_sg_list *) *
+ hba[i]->nr_cmds,
+ GFP_KERNEL);
+ if (!hba[i]->cmd_sg_list) {
+ printk(KERN_ERR "cciss%d: Cannot get memory for "
+ "s/g chaining.\n", i);
+ goto clean4;
+ }
+ /* Build up chain blocks for each command */
+ if (hba[i]->chainsize > 0) {
+ for (j = 0; j < hba[i]->nr_cmds; j++) {
+ hba[i]->cmd_sg_list[j] =
+ kmalloc(sizeof(struct Cmd_sg_list),
+ GFP_KERNEL);
+ if (!hba[i]->cmd_sg_list[j]) {
+ printk(KERN_ERR "cciss%d: Cannot get memory "
+ "for chain block.\n", i);
+ goto clean4;
+ }
+ /* Need a block of chainsized s/g elements. */
+ hba[i]->cmd_sg_list[j]->sgchain =
+ kmalloc((hba[i]->chainsize *
+ sizeof(SGDescriptor_struct)),
+ GFP_KERNEL);
+ if (!hba[i]->cmd_sg_list[j]->sgchain) {
+ printk(KERN_ERR "cciss%d: Cannot get memory "
+ "for s/g chains\n", i);
+ goto clean4;
+ }
+ }
+ }
+
spin_lock_init(&hba[i]->lock);
/* Initialize the pdev driver private data.
@@ -4362,7 +4314,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
cciss_procinit(i);
- hba[i]->cciss_max_sectors = 2048;
+ hba[i]->cciss_max_sectors = 8192;
rebuild_lun_table(hba[i], 1, 0);
hba[i]->busy_initializing = 0;
@@ -4370,6 +4322,20 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
clean4:
kfree(hba[i]->cmd_pool_bits);
+ /* Free up sg elements */
+ for (k = 0; k < hba[i]->nr_cmds; k++)
+ kfree(hba[i]->scatter_list[k]);
+ kfree(hba[i]->scatter_list);
+ /* Only free up extra s/g lists if controller supports them */
+ if (hba[i]->chainsize > 0) {
+ for (j = 0; j < hba[i]->nr_cmds; j++) {
+ if (hba[i]->cmd_sg_list[j]) {
+ kfree(hba[i]->cmd_sg_list[j]->sgchain);
+ kfree(hba[i]->cmd_sg_list[j]);
+ }
+ }
+ kfree(hba[i]->cmd_sg_list);
+ }
if (hba[i]->cmd_pool)
pci_free_consistent(hba[i]->pdev,
hba[i]->nr_cmds * sizeof(CommandList_struct),
@@ -4400,30 +4366,28 @@ clean_no_release_regions:
static void cciss_shutdown(struct pci_dev *pdev)
{
- ctlr_info_t *tmp_ptr;
- int i;
- char flush_buf[4];
+ ctlr_info_t *h;
+ char *flush_buf;
int return_code;
- tmp_ptr = pci_get_drvdata(pdev);
- if (tmp_ptr == NULL)
- return;
- i = tmp_ptr->ctlr;
- if (hba[i] == NULL)
+ h = pci_get_drvdata(pdev);
+ flush_buf = kzalloc(4, GFP_KERNEL);
+ if (!flush_buf) {
+ printk(KERN_WARNING
+ "cciss:%d cache not flushed, out of memory.\n",
+ h->ctlr);
return;
-
- /* Turn board interrupts off and send the flush cache command */
- /* sendcmd will turn off interrupt, and send the flush...
- * To write all data in the battery backed cache to disks */
- memset(flush_buf, 0, 4);
- return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0,
- CTLR_LUNID, TYPE_CMD);
- if (return_code == IO_OK) {
- printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
- } else {
- printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
}
- free_irq(hba[i]->intr[2], hba[i]);
+ /* write all data in the battery backed cache to disk */
+ memset(flush_buf, 0, 4);
+ return_code = sendcmd_withirq(CCISS_CACHE_FLUSH, h->ctlr, flush_buf,
+ 4, 0, CTLR_LUNID, TYPE_CMD);
+ kfree(flush_buf);
+ if (return_code != IO_OK)
+ printk(KERN_WARNING "cciss%d: Error flushing cache\n",
+ h->ctlr);
+ h->access.set_intr_mask(h, CCISS_INTR_OFF);
+ free_irq(h->intr[2], h);
}
static void __devexit cciss_remove_one(struct pci_dev *pdev)
@@ -4485,6 +4449,20 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
kfree(hba[i]->cmd_pool_bits);
+ /* Free up sg elements */
+ for (j = 0; j < hba[i]->nr_cmds; j++)
+ kfree(hba[i]->scatter_list[j]);
+ kfree(hba[i]->scatter_list);
+ /* Only free up extra s/g lists if controller supports them */
+ if (hba[i]->chainsize > 0) {
+ for (j = 0; j < hba[i]->nr_cmds; j++) {
+ if (hba[i]->cmd_sg_list[j]) {
+ kfree(hba[i]->cmd_sg_list[j]->sgchain);
+ kfree(hba[i]->cmd_sg_list[j]);
+ }
+ }
+ kfree(hba[i]->cmd_sg_list);
+ }
/*
* Deliberately omit pci_disable_device(): it does something nasty to
* Smart Array controllers that pci_enable_device does not undo
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 31524cf42c77..1d95db254069 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -55,7 +55,13 @@ typedef struct _drive_info_struct
char device_initialized; /* indicates whether dev is initialized */
} drive_info_struct;
-struct ctlr_info
+struct Cmd_sg_list {
+ SGDescriptor_struct *sgchain;
+ dma_addr_t sg_chain_dma;
+ int chain_block_size;
+};
+
+struct ctlr_info
{
int ctlr;
char devname[8];
@@ -75,6 +81,16 @@ struct ctlr_info
int num_luns;
int highest_lun;
int usage_count; /* number of opens all all minor devices */
+ /* Need space for temp sg list
+ * number of scatter/gathers supported
+ * number of scatter/gathers in chained block
+ */
+ struct scatterlist **scatter_list;
+ int maxsgentries;
+ int chainsize;
+ int max_cmd_sgentries;
+ struct Cmd_sg_list **cmd_sg_list;
+
# define DOORBELL_INT 0
# define PERF_MODE_INT 1
# define SIMPLE_MODE_INT 2
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index dbaed1ea0da3..b50a9b261b85 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -7,7 +7,8 @@
//general boundary defintions
#define SENSEINFOBYTES 32//note that this value may vary between host implementations
-#define MAXSGENTRIES 31
+#define MAXSGENTRIES 32
+#define CCISS_SG_CHAIN 0x80000000
#define MAXREPLYQS 256
//Command Status value
@@ -319,6 +320,10 @@ typedef struct _CfgTable_struct {
BYTE ServerName[16];
DWORD HeartBeat;
DWORD SCSI_Prefetch;
+ DWORD MaxSGElements;
+ DWORD MaxLogicalUnits;
+ DWORD MaxPhysicalDrives;
+ DWORD MaxPhysicalDrivesPerLogicalUnit;
} CfgTable_struct;
#pragma pack()
#endif // CCISS_CMD_H
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 3315268b4ec7..5d0e46dc3632 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -755,7 +755,7 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
cp,
ei->ScsiStatus);
#endif
- cmd->result |= (ei->ScsiStatus < 1);
+ cmd->result |= (ei->ScsiStatus << 1);
}
else { /* scsi status is zero??? How??? */
@@ -1547,7 +1547,7 @@ cciss_engage_scsi(int ctlr)
if (sa->registered) {
printk("cciss%d: SCSI subsystem already engaged.\n", ctlr);
spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
- return ENXIO;
+ return -ENXIO;
}
sa->registered = 1;
spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
new file mode 100644
index 000000000000..f4acd04ebeef
--- /dev/null
+++ b/drivers/block/drbd/Kconfig
@@ -0,0 +1,71 @@
+#
+# DRBD device driver configuration
+#
+
+comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
+ depends on !PROC_FS || !INET || !CONNECTOR
+
+config BLK_DEV_DRBD
+ tristate "DRBD Distributed Replicated Block Device support"
+ depends on PROC_FS && INET && CONNECTOR
+ select LRU_CACHE
+ default n
+ help
+
+ NOTE: In order to authenticate connections you have to select
+ CRYPTO_HMAC and a hash function as well.
+
+ DRBD is a shared-nothing, synchronously replicated block device. It
+ is designed to serve as a building block for high availability
+ clusters and in this context, is a "drop-in" replacement for shared
+ storage. Simplistically, you could see it as a network RAID 1.
+
+ Each minor device has a role, which can be 'primary' or 'secondary'.
+ On the node with the primary device the application is supposed to
+ run and to access the device (/dev/drbdX). Every write is sent to
+ the local 'lower level block device' and, across the network, to the
+ node with the device in 'secondary' state. The secondary device
+ simply writes the data to its lower level block device.
+
+ DRBD can also be used in dual-Primary mode (device writable on both
+ nodes), which means it can exhibit shared disk semantics in a
+ shared-nothing cluster. Needless to say, on top of dual-Primary
+ DRBD utilizing a cluster file system is necessary to maintain for
+ cache coherency.
+
+ For automatic failover you need a cluster manager (e.g. heartbeat).
+ See also: http://www.drbd.org/, http://www.linux-ha.org
+
+ If unsure, say N.
+
+config DRBD_FAULT_INJECTION
+ bool "DRBD fault injection"
+ depends on BLK_DEV_DRBD
+ help
+
+ Say Y here if you want to simulate IO errors, in order to test DRBD's
+ behavior.
+
+ The actual simulation of IO errors is done by writing 3 values to
+ /sys/module/drbd/parameters/
+
+ enable_faults: bitmask of...
+ 1 meta data write
+ 2 read
+ 4 resync data write
+ 8 read
+ 16 data write
+ 32 data read
+ 64 read ahead
+ 128 kmalloc of bitmap
+ 256 allocation of EE (epoch_entries)
+
+ fault_devs: bitmask of minor numbers
+ fault_rate: frequency in percent
+
+ Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
+ echo 16 > /sys/module/drbd/parameters/enable_faults
+ echo 1 > /sys/module/drbd/parameters/fault_devs
+ echo 5 > /sys/module/drbd/parameters/fault_rate
+
+ If unsure, say N.
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
new file mode 100644
index 000000000000..0d3f337ff5ff
--- /dev/null
+++ b/drivers/block/drbd/Makefile
@@ -0,0 +1,5 @@
+drbd-y := drbd_bitmap.o drbd_proc.o
+drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
+drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
+
+obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
new file mode 100644
index 000000000000..17956ff6a08d
--- /dev/null
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -0,0 +1,1424 @@
+/*
+ drbd_actlog.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+
+/* We maintain a trivial check sum in our on disk activity log.
+ * With that we can ensure correct operation even when the storage
+ * device might do a partial (last) sector write while loosing power.
+ */
+struct __packed al_transaction {
+ u32 magic;
+ u32 tr_number;
+ struct __packed {
+ u32 pos;
+ u32 extent; } updates[1 + AL_EXTENTS_PT];
+ u32 xor_sum;
+};
+
+struct update_odbm_work {
+ struct drbd_work w;
+ unsigned int enr;
+};
+
+struct update_al_work {
+ struct drbd_work w;
+ struct lc_element *al_ext;
+ struct completion event;
+ unsigned int enr;
+ /* if old_enr != LC_FREE, write corresponding bitmap sector, too */
+ unsigned int old_enr;
+};
+
+struct drbd_atodb_wait {
+ atomic_t count;
+ struct completion io_done;
+ struct drbd_conf *mdev;
+ int error;
+};
+
+
+int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
+
+static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
+ struct drbd_backing_dev *bdev,
+ struct page *page, sector_t sector,
+ int rw, int size)
+{
+ struct bio *bio;
+ struct drbd_md_io md_io;
+ int ok;
+
+ md_io.mdev = mdev;
+ init_completion(&md_io.event);
+ md_io.error = 0;
+
+ if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
+ rw |= (1 << BIO_RW_BARRIER);
+ rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO));
+
+ retry:
+ bio = bio_alloc(GFP_NOIO, 1);
+ bio->bi_bdev = bdev->md_bdev;
+ bio->bi_sector = sector;
+ ok = (bio_add_page(bio, page, size, 0) == size);
+ if (!ok)
+ goto out;
+ bio->bi_private = &md_io;
+ bio->bi_end_io = drbd_md_io_complete;
+ bio->bi_rw = rw;
+
+ if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+ bio_endio(bio, -EIO);
+ else
+ submit_bio(rw, bio);
+ wait_for_completion(&md_io.event);
+ ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
+
+ /* check for unsupported barrier op.
+ * would rather check on EOPNOTSUPP, but that is not reliable.
+ * don't try again for ANY return value != 0 */
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
+ /* Try again with no barrier */
+ dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
+ set_bit(MD_NO_BARRIER, &mdev->flags);
+ rw &= ~(1 << BIO_RW_BARRIER);
+ bio_put(bio);
+ goto retry;
+ }
+ out:
+ bio_put(bio);
+ return ok;
+}
+
+int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+ sector_t sector, int rw)
+{
+ int logical_block_size, mask, ok;
+ int offset = 0;
+ struct page *iop = mdev->md_io_page;
+
+ D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+
+ BUG_ON(!bdev->md_bdev);
+
+ logical_block_size = bdev_logical_block_size(bdev->md_bdev);
+ if (logical_block_size == 0)
+ logical_block_size = MD_SECTOR_SIZE;
+
+ /* in case logical_block_size != 512 [ s390 only? ] */
+ if (logical_block_size != MD_SECTOR_SIZE) {
+ mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
+ D_ASSERT(mask == 1 || mask == 3 || mask == 7);
+ D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
+ offset = sector & mask;
+ sector = sector & ~mask;
+ iop = mdev->md_io_tmpp;
+
+ if (rw & WRITE) {
+ /* these are GFP_KERNEL pages, pre-allocated
+ * on device initialization */
+ void *p = page_address(mdev->md_io_page);
+ void *hp = page_address(mdev->md_io_tmpp);
+
+ ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
+ READ, logical_block_size);
+
+ if (unlikely(!ok)) {
+ dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
+ "READ [logical_block_size!=512]) failed!\n",
+ (unsigned long long)sector);
+ return 0;
+ }
+
+ memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
+ }
+ }
+
+ if (sector < drbd_md_first_sector(bdev) ||
+ sector > drbd_md_last_sector(bdev))
+ dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
+ current->comm, current->pid, __func__,
+ (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+
+ ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
+ if (unlikely(!ok)) {
+ dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
+ (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+ return 0;
+ }
+
+ if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
+ void *p = page_address(mdev->md_io_page);
+ void *hp = page_address(mdev->md_io_tmpp);
+
+ memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
+ }
+
+ return ok;
+}
+
+static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
+{
+ struct lc_element *al_ext;
+ struct lc_element *tmp;
+ unsigned long al_flags = 0;
+
+ spin_lock_irq(&mdev->al_lock);
+ tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
+ if (unlikely(tmp != NULL)) {
+ struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+ if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+ spin_unlock_irq(&mdev->al_lock);
+ return NULL;
+ }
+ }
+ al_ext = lc_get(mdev->act_log, enr);
+ al_flags = mdev->act_log->flags;
+ spin_unlock_irq(&mdev->al_lock);
+
+ /*
+ if (!al_ext) {
+ if (al_flags & LC_STARVING)
+ dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
+ if (al_flags & LC_DIRTY)
+ dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
+ }
+ */
+
+ return al_ext;
+}
+
+void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+ unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+ struct lc_element *al_ext;
+ struct update_al_work al_work;
+
+ D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
+
+ wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
+
+ if (al_ext->lc_number != enr) {
+ /* drbd_al_write_transaction(mdev,al_ext,enr);
+ * recurses into generic_make_request(), which
+ * disallows recursion, bios being serialized on the
+ * current->bio_tail list now.
+ * we have to delegate updates to the activity log
+ * to the worker thread. */
+ init_completion(&al_work.event);
+ al_work.al_ext = al_ext;
+ al_work.enr = enr;
+ al_work.old_enr = al_ext->lc_number;
+ al_work.w.cb = w_al_write_transaction;
+ drbd_queue_work_front(&mdev->data.work, &al_work.w);
+ wait_for_completion(&al_work.event);
+
+ mdev->al_writ_cnt++;
+
+ spin_lock_irq(&mdev->al_lock);
+ lc_changed(mdev->act_log, al_ext);
+ spin_unlock_irq(&mdev->al_lock);
+ wake_up(&mdev->al_wait);
+ }
+}
+
+void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+ unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+ struct lc_element *extent;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->al_lock, flags);
+
+ extent = lc_find(mdev->act_log, enr);
+
+ if (!extent) {
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+ dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
+ return;
+ }
+
+ if (lc_put(mdev->act_log, extent) == 0)
+ wake_up(&mdev->al_wait);
+
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+int
+w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct update_al_work *aw = container_of(w, struct update_al_work, w);
+ struct lc_element *updated = aw->al_ext;
+ const unsigned int new_enr = aw->enr;
+ const unsigned int evicted = aw->old_enr;
+ struct al_transaction *buffer;
+ sector_t sector;
+ int i, n, mx;
+ unsigned int extent_nr;
+ u32 xor_sum = 0;
+
+ if (!get_ldev(mdev)) {
+ dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n");
+ complete(&((struct update_al_work *)w)->event);
+ return 1;
+ }
+ /* do we have to do a bitmap write, first?
+ * TODO reduce maximum latency:
+ * submit both bios, then wait for both,
+ * instead of doing two synchronous sector writes. */
+ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
+ drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
+
+ mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */
+ buffer = (struct al_transaction *)page_address(mdev->md_io_page);
+
+ buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+ buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
+
+ n = lc_index_of(mdev->act_log, updated);
+
+ buffer->updates[0].pos = cpu_to_be32(n);
+ buffer->updates[0].extent = cpu_to_be32(new_enr);
+
+ xor_sum ^= new_enr;
+
+ mx = min_t(int, AL_EXTENTS_PT,
+ mdev->act_log->nr_elements - mdev->al_tr_cycle);
+ for (i = 0; i < mx; i++) {
+ unsigned idx = mdev->al_tr_cycle + i;
+ extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
+ buffer->updates[i+1].pos = cpu_to_be32(idx);
+ buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
+ xor_sum ^= extent_nr;
+ }
+ for (; i < AL_EXTENTS_PT; i++) {
+ buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
+ buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
+ xor_sum ^= LC_FREE;
+ }
+ mdev->al_tr_cycle += AL_EXTENTS_PT;
+ if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
+ mdev->al_tr_cycle = 0;
+
+ buffer->xor_sum = cpu_to_be32(xor_sum);
+
+ sector = mdev->ldev->md.md_offset
+ + mdev->ldev->md.al_offset + mdev->al_tr_pos;
+
+ if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
+ drbd_chk_io_error(mdev, 1, TRUE);
+
+ if (++mdev->al_tr_pos >
+ div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+ mdev->al_tr_pos = 0;
+
+ D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
+ mdev->al_tr_number++;
+
+ mutex_unlock(&mdev->md_io_mutex);
+
+ complete(&((struct update_al_work *)w)->event);
+ put_ldev(mdev);
+
+ return 1;
+}
+
+/**
+ * drbd_al_read_tr() - Read a single transaction from the on disk activity log
+ * @mdev: DRBD device.
+ * @bdev: Block device to read form.
+ * @b: pointer to an al_transaction.
+ * @index: On disk slot of the transaction to read.
+ *
+ * Returns -1 on IO error, 0 on checksum error and 1 upon success.
+ */
+static int drbd_al_read_tr(struct drbd_conf *mdev,
+ struct drbd_backing_dev *bdev,
+ struct al_transaction *b,
+ int index)
+{
+ sector_t sector;
+ int rv, i;
+ u32 xor_sum = 0;
+
+ sector = bdev->md.md_offset + bdev->md.al_offset + index;
+
+ /* Dont process error normally,
+ * as this is done before disk is attached! */
+ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
+ return -1;
+
+ rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
+
+ for (i = 0; i < AL_EXTENTS_PT + 1; i++)
+ xor_sum ^= be32_to_cpu(b->updates[i].extent);
+ rv &= (xor_sum == be32_to_cpu(b->xor_sum));
+
+ return rv;
+}
+
+/**
+ * drbd_al_read_log() - Restores the activity log from its on disk representation.
+ * @mdev: DRBD device.
+ * @bdev: Block device to read form.
+ *
+ * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
+ */
+int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+ struct al_transaction *buffer;
+ int i;
+ int rv;
+ int mx;
+ int active_extents = 0;
+ int transactions = 0;
+ int found_valid = 0;
+ int from = 0;
+ int to = 0;
+ u32 from_tnr = 0;
+ u32 to_tnr = 0;
+ u32 cnr;
+
+ mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+
+ /* lock out all other meta data io for now,
+ * and make sure the page is mapped.
+ */
+ mutex_lock(&mdev->md_io_mutex);
+ buffer = page_address(mdev->md_io_page);
+
+ /* Find the valid transaction in the log */
+ for (i = 0; i <= mx; i++) {
+ rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+ if (rv == 0)
+ continue;
+ if (rv == -1) {
+ mutex_unlock(&mdev->md_io_mutex);
+ return 0;
+ }
+ cnr = be32_to_cpu(buffer->tr_number);
+
+ if (++found_valid == 1) {
+ from = i;
+ to = i;
+ from_tnr = cnr;
+ to_tnr = cnr;
+ continue;
+ }
+ if ((int)cnr - (int)from_tnr < 0) {
+ D_ASSERT(from_tnr - cnr + i - from == mx+1);
+ from = i;
+ from_tnr = cnr;
+ }
+ if ((int)cnr - (int)to_tnr > 0) {
+ D_ASSERT(cnr - to_tnr == i - to);
+ to = i;
+ to_tnr = cnr;
+ }
+ }
+
+ if (!found_valid) {
+ dev_warn(DEV, "No usable activity log found.\n");
+ mutex_unlock(&mdev->md_io_mutex);
+ return 1;
+ }
+
+ /* Read the valid transactions.
+ * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
+ i = from;
+ while (1) {
+ int j, pos;
+ unsigned int extent_nr;
+ unsigned int trn;
+
+ rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+ ERR_IF(rv == 0) goto cancel;
+ if (rv == -1) {
+ mutex_unlock(&mdev->md_io_mutex);
+ return 0;
+ }
+
+ trn = be32_to_cpu(buffer->tr_number);
+
+ spin_lock_irq(&mdev->al_lock);
+
+ /* This loop runs backwards because in the cyclic
+ elements there might be an old version of the
+ updated element (in slot 0). So the element in slot 0
+ can overwrite old versions. */
+ for (j = AL_EXTENTS_PT; j >= 0; j--) {
+ pos = be32_to_cpu(buffer->updates[j].pos);
+ extent_nr = be32_to_cpu(buffer->updates[j].extent);
+
+ if (extent_nr == LC_FREE)
+ continue;
+
+ lc_set(mdev->act_log, extent_nr, pos);
+ active_extents++;
+ }
+ spin_unlock_irq(&mdev->al_lock);
+
+ transactions++;
+
+cancel:
+ if (i == to)
+ break;
+ i++;
+ if (i > mx)
+ i = 0;
+ }
+
+ mdev->al_tr_number = to_tnr+1;
+ mdev->al_tr_pos = to;
+ if (++mdev->al_tr_pos >
+ div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+ mdev->al_tr_pos = 0;
+
+ /* ok, we are done with it */
+ mutex_unlock(&mdev->md_io_mutex);
+
+ dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
+ transactions, active_extents);
+
+ return 1;
+}
+
+static void atodb_endio(struct bio *bio, int error)
+{
+ struct drbd_atodb_wait *wc = bio->bi_private;
+ struct drbd_conf *mdev = wc->mdev;
+ struct page *page;
+ int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+ /* strange behavior of some lower level drivers...
+ * fail the request by clearing the uptodate flag,
+ * but do not return any error?! */
+ if (!error && !uptodate)
+ error = -EIO;
+
+ drbd_chk_io_error(mdev, error, TRUE);
+ if (error && wc->error == 0)
+ wc->error = error;
+
+ if (atomic_dec_and_test(&wc->count))
+ complete(&wc->io_done);
+
+ page = bio->bi_io_vec[0].bv_page;
+ put_page(page);
+ bio_put(bio);
+ mdev->bm_writ_cnt++;
+ put_ldev(mdev);
+}
+
+#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+/* activity log to on disk bitmap -- prepare bio unless that sector
+ * is already covered by previously prepared bios */
+static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
+ struct bio **bios,
+ unsigned int enr,
+ struct drbd_atodb_wait *wc) __must_hold(local)
+{
+ struct bio *bio;
+ struct page *page;
+ sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+ + mdev->ldev->md.bm_offset;
+ unsigned int page_offset = PAGE_SIZE;
+ int offset;
+ int i = 0;
+ int err = -ENOMEM;
+
+ /* Check if that enr is already covered by an already created bio.
+ * Caution, bios[] is not NULL terminated,
+ * but only initialized to all NULL.
+ * For completely scattered activity log,
+ * the last invocation iterates over all bios,
+ * and finds the last NULL entry.
+ */
+ while ((bio = bios[i])) {
+ if (bio->bi_sector == on_disk_sector)
+ return 0;
+ i++;
+ }
+ /* bios[i] == NULL, the next not yet used slot */
+
+ /* GFP_KERNEL, we are not in the write-out path */
+ bio = bio_alloc(GFP_KERNEL, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ if (i > 0) {
+ const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
+ page_offset = prev_bv->bv_offset + prev_bv->bv_len;
+ page = prev_bv->bv_page;
+ }
+ if (page_offset == PAGE_SIZE) {
+ page = alloc_page(__GFP_HIGHMEM);
+ if (page == NULL)
+ goto out_bio_put;
+ page_offset = 0;
+ } else {
+ get_page(page);
+ }
+
+ offset = S2W(enr);
+ drbd_bm_get_lel(mdev, offset,
+ min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
+ kmap(page) + page_offset);
+ kunmap(page);
+
+ bio->bi_private = wc;
+ bio->bi_end_io = atodb_endio;
+ bio->bi_bdev = mdev->ldev->md_bdev;
+ bio->bi_sector = on_disk_sector;
+
+ if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
+ goto out_put_page;
+
+ atomic_inc(&wc->count);
+ /* we already know that we may do this...
+ * get_ldev_if_state(mdev,D_ATTACHING);
+ * just get the extra reference, so that the local_cnt reflects
+ * the number of pending IO requests DRBD at its backing device.
+ */
+ atomic_inc(&mdev->local_cnt);
+
+ bios[i] = bio;
+
+ return 0;
+
+out_put_page:
+ err = -EINVAL;
+ put_page(page);
+out_bio_put:
+ bio_put(bio);
+ return err;
+}
+
+/**
+ * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents
+ * @mdev: DRBD device.
+ *
+ * Called when we detach (unconfigure) local storage,
+ * or when we go from R_PRIMARY to R_SECONDARY role.
+ */
+void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
+{
+ int i, nr_elements;
+ unsigned int enr;
+ struct bio **bios;
+ struct drbd_atodb_wait wc;
+
+ ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
+ return; /* sorry, I don't have any act_log etc... */
+
+ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+ nr_elements = mdev->act_log->nr_elements;
+
+ /* GFP_KERNEL, we are not in anyone's write-out path */
+ bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
+ if (!bios)
+ goto submit_one_by_one;
+
+ atomic_set(&wc.count, 0);
+ init_completion(&wc.io_done);
+ wc.mdev = mdev;
+ wc.error = 0;
+
+ for (i = 0; i < nr_elements; i++) {
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+ if (enr == LC_FREE)
+ continue;
+ /* next statement also does atomic_inc wc.count and local_cnt */
+ if (atodb_prepare_unless_covered(mdev, bios,
+ enr/AL_EXT_PER_BM_SECT,
+ &wc))
+ goto free_bios_submit_one_by_one;
+ }
+
+ /* unnecessary optimization? */
+ lc_unlock(mdev->act_log);
+ wake_up(&mdev->al_wait);
+
+ /* all prepared, submit them */
+ for (i = 0; i < nr_elements; i++) {
+ if (bios[i] == NULL)
+ break;
+ if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
+ bios[i]->bi_rw = WRITE;
+ bio_endio(bios[i], -EIO);
+ } else {
+ submit_bio(WRITE, bios[i]);
+ }
+ }
+
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
+
+ /* always (try to) flush bitmap to stable storage */
+ drbd_md_flush(mdev);
+
+ /* In case we did not submit a single IO do not wait for
+ * them to complete. ( Because we would wait forever here. )
+ *
+ * In case we had IOs and they are already complete, there
+ * is not point in waiting anyways.
+ * Therefore this if () ... */
+ if (atomic_read(&wc.count))
+ wait_for_completion(&wc.io_done);
+
+ put_ldev(mdev);
+
+ kfree(bios);
+ return;
+
+ free_bios_submit_one_by_one:
+ /* free everything by calling the endio callback directly. */
+ for (i = 0; i < nr_elements && bios[i]; i++)
+ bio_endio(bios[i], 0);
+
+ kfree(bios);
+
+ submit_one_by_one:
+ dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
+
+ for (i = 0; i < mdev->act_log->nr_elements; i++) {
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+ if (enr == LC_FREE)
+ continue;
+ /* Really slow: if we have al-extents 16..19 active,
+ * sector 4 will be written four times! Synchronous! */
+ drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
+ }
+
+ lc_unlock(mdev->act_log);
+ wake_up(&mdev->al_wait);
+ put_ldev(mdev);
+}
+
+/**
+ * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
+ * @mdev: DRBD device.
+ */
+void drbd_al_apply_to_bm(struct drbd_conf *mdev)
+{
+ unsigned int enr;
+ unsigned long add = 0;
+ char ppb[10];
+ int i;
+
+ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+ for (i = 0; i < mdev->act_log->nr_elements; i++) {
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+ if (enr == LC_FREE)
+ continue;
+ add += drbd_bm_ALe_set_all(mdev, enr);
+ }
+
+ lc_unlock(mdev->act_log);
+ wake_up(&mdev->al_wait);
+
+ dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
+ ppsize(ppb, Bit2KB(add)));
+}
+
+static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
+{
+ int rv;
+
+ spin_lock_irq(&mdev->al_lock);
+ rv = (al_ext->refcnt == 0);
+ if (likely(rv))
+ lc_del(mdev->act_log, al_ext);
+ spin_unlock_irq(&mdev->al_lock);
+
+ return rv;
+}
+
+/**
+ * drbd_al_shrink() - Removes all active extents form the activity log
+ * @mdev: DRBD device.
+ *
+ * Removes all active extents form the activity log, waiting until
+ * the reference count of each entry dropped to 0 first, of course.
+ *
+ * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
+ */
+void drbd_al_shrink(struct drbd_conf *mdev)
+{
+ struct lc_element *al_ext;
+ int i;
+
+ D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+
+ for (i = 0; i < mdev->act_log->nr_elements; i++) {
+ al_ext = lc_element_by_index(mdev->act_log, i);
+ if (al_ext->lc_number == LC_FREE)
+ continue;
+ wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
+ }
+
+ wake_up(&mdev->al_wait);
+}
+
+static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
+
+ if (!get_ldev(mdev)) {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
+ kfree(udw);
+ return 1;
+ }
+
+ drbd_bm_write_sect(mdev, udw->enr);
+ put_ldev(mdev);
+
+ kfree(udw);
+
+ if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
+ switch (mdev->state.conn) {
+ case C_SYNC_SOURCE: case C_SYNC_TARGET:
+ case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
+ drbd_resync_finished(mdev);
+ default:
+ /* nothing to do */
+ break;
+ }
+ }
+ drbd_bcast_sync_progress(mdev);
+
+ return 1;
+}
+
+
+/* ATTENTION. The AL's extents are 4MB each, while the extents in the
+ * resync LRU-cache are 16MB each.
+ * The caller of this function has to hold an get_ldev() reference.
+ *
+ * TODO will be obsoleted once we have a caching lru of the on disk bitmap
+ */
+static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
+ int count, int success)
+{
+ struct lc_element *e;
+ struct update_odbm_work *udw;
+
+ unsigned int enr;
+
+ D_ASSERT(atomic_read(&mdev->local_cnt));
+
+ /* I simply assume that a sector/size pair never crosses
+ * a 16 MB extent border. (Currently this is true...) */
+ enr = BM_SECT_TO_EXT(sector);
+
+ e = lc_get(mdev->resync, enr);
+ if (e) {
+ struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
+ if (ext->lce.lc_number == enr) {
+ if (success)
+ ext->rs_left -= count;
+ else
+ ext->rs_failed += count;
+ if (ext->rs_left < ext->rs_failed) {
+ dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
+ "rs_failed=%d count=%d\n",
+ (unsigned long long)sector,
+ ext->lce.lc_number, ext->rs_left,
+ ext->rs_failed, count);
+ dump_stack();
+
+ lc_put(mdev->resync, &ext->lce);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return;
+ }
+ } else {
+ /* Normally this element should be in the cache,
+ * since drbd_rs_begin_io() pulled it already in.
+ *
+ * But maybe an application write finished, and we set
+ * something outside the resync lru_cache in sync.
+ */
+ int rs_left = drbd_bm_e_weight(mdev, enr);
+ if (ext->flags != 0) {
+ dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
+ " -> %d[%u;00]\n",
+ ext->lce.lc_number, ext->rs_left,
+ ext->flags, enr, rs_left);
+ ext->flags = 0;
+ }
+ if (ext->rs_failed) {
+ dev_warn(DEV, "Kicking resync_lru element enr=%u "
+ "out with rs_failed=%d\n",
+ ext->lce.lc_number, ext->rs_failed);
+ set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+ }
+ ext->rs_left = rs_left;
+ ext->rs_failed = success ? 0 : count;
+ lc_changed(mdev->resync, &ext->lce);
+ }
+ lc_put(mdev->resync, &ext->lce);
+ /* no race, we are within the al_lock! */
+
+ if (ext->rs_left == ext->rs_failed) {
+ ext->rs_failed = 0;
+
+ udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
+ if (udw) {
+ udw->enr = ext->lce.lc_number;
+ udw->w.cb = w_update_odbm;
+ drbd_queue_work_front(&mdev->data.work, &udw->w);
+ } else {
+ dev_warn(DEV, "Could not kmalloc an udw\n");
+ set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+ }
+ }
+ } else {
+ dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
+ mdev->resync_locked,
+ mdev->resync->nr_elements,
+ mdev->resync->flags);
+ }
+}
+
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector. Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
+ *
+ */
+void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
+ const char *file, const unsigned int line)
+{
+ /* Is called from worker and receiver context _only_ */
+ unsigned long sbnr, ebnr, lbnr;
+ unsigned long count = 0;
+ sector_t esector, nr_sectors;
+ int wake_up = 0;
+ unsigned long flags;
+
+ if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+ dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
+ (unsigned long long)sector, size);
+ return;
+ }
+ nr_sectors = drbd_get_capacity(mdev->this_bdev);
+ esector = sector + (size >> 9) - 1;
+
+ ERR_IF(sector >= nr_sectors) return;
+ ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+ /* we clear it (in sync).
+ * round up start sector, round down end sector. we make sure we only
+ * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
+ if (unlikely(esector < BM_SECT_PER_BIT-1))
+ return;
+ if (unlikely(esector == (nr_sectors-1)))
+ ebnr = lbnr;
+ else
+ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+ if (sbnr > ebnr)
+ return;
+
+ /*
+ * ok, (capacity & 7) != 0 sometimes, but who cares...
+ * we count rs_{total,left} in bits, not sectors.
+ */
+ spin_lock_irqsave(&mdev->al_lock, flags);
+ count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
+ if (count) {
+ /* we need the lock for drbd_try_clear_on_disk_bm */
+ if (jiffies - mdev->rs_mark_time > HZ*10) {
+ /* should be rolling marks,
+ * but we estimate only anyways. */
+ if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
+ mdev->state.conn != C_PAUSED_SYNC_T &&
+ mdev->state.conn != C_PAUSED_SYNC_S) {
+ mdev->rs_mark_time = jiffies;
+ mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+ }
+ }
+ if (get_ldev(mdev)) {
+ drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
+ put_ldev(mdev);
+ }
+ /* just wake_up unconditional now, various lc_chaged(),
+ * lc_put() in drbd_try_clear_on_disk_bm(). */
+ wake_up = 1;
+ }
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+ if (wake_up)
+ wake_up(&mdev->al_wait);
+}
+
+/*
+ * this is intended to set one request worth of data out of sync.
+ * affects at least 1 bit,
+ * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
+ *
+ * called by tl_clear and drbd_send_dblock (==drbd_make_request).
+ * so this can be _any_ process.
+ */
+void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+ const char *file, const unsigned int line)
+{
+ unsigned long sbnr, ebnr, lbnr, flags;
+ sector_t esector, nr_sectors;
+ unsigned int enr, count;
+ struct lc_element *e;
+
+ if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+ dev_err(DEV, "sector: %llus, size: %d\n",
+ (unsigned long long)sector, size);
+ return;
+ }
+
+ if (!get_ldev(mdev))
+ return; /* no disk, no metadata, no bitmap to set bits in */
+
+ nr_sectors = drbd_get_capacity(mdev->this_bdev);
+ esector = sector + (size >> 9) - 1;
+
+ ERR_IF(sector >= nr_sectors)
+ goto out;
+ ERR_IF(esector >= nr_sectors)
+ esector = (nr_sectors-1);
+
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+ /* we set it out of sync,
+ * we do not need to round anything here */
+ sbnr = BM_SECT_TO_BIT(sector);
+ ebnr = BM_SECT_TO_BIT(esector);
+
+ /* ok, (capacity & 7) != 0 sometimes, but who cares...
+ * we count rs_{total,left} in bits, not sectors. */
+ spin_lock_irqsave(&mdev->al_lock, flags);
+ count = drbd_bm_set_bits(mdev, sbnr, ebnr);
+
+ enr = BM_SECT_TO_EXT(sector);
+ e = lc_find(mdev->resync, enr);
+ if (e)
+ lc_entry(e, struct bm_extent, lce)->rs_left += count;
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+
+out:
+ put_ldev(mdev);
+}
+
+static
+struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
+{
+ struct lc_element *e;
+ struct bm_extent *bm_ext;
+ int wakeup = 0;
+ unsigned long rs_flags;
+
+ spin_lock_irq(&mdev->al_lock);
+ if (mdev->resync_locked > mdev->resync->nr_elements/2) {
+ spin_unlock_irq(&mdev->al_lock);
+ return NULL;
+ }
+ e = lc_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (bm_ext) {
+ if (bm_ext->lce.lc_number != enr) {
+ bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+ bm_ext->rs_failed = 0;
+ lc_changed(mdev->resync, &bm_ext->lce);
+ wakeup = 1;
+ }
+ if (bm_ext->lce.refcnt == 1)
+ mdev->resync_locked++;
+ set_bit(BME_NO_WRITES, &bm_ext->flags);
+ }
+ rs_flags = mdev->resync->flags;
+ spin_unlock_irq(&mdev->al_lock);
+ if (wakeup)
+ wake_up(&mdev->al_wait);
+
+ if (!bm_ext) {
+ if (rs_flags & LC_STARVING)
+ dev_warn(DEV, "Have to wait for element"
+ " (resync LRU too small?)\n");
+ BUG_ON(rs_flags & LC_DIRTY);
+ }
+
+ return bm_ext;
+}
+
+static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
+{
+ struct lc_element *al_ext;
+ int rv = 0;
+
+ spin_lock_irq(&mdev->al_lock);
+ if (unlikely(enr == mdev->act_log->new_number))
+ rv = 1;
+ else {
+ al_ext = lc_find(mdev->act_log, enr);
+ if (al_ext) {
+ if (al_ext->refcnt)
+ rv = 1;
+ }
+ }
+ spin_unlock_irq(&mdev->al_lock);
+
+ /*
+ if (unlikely(rv)) {
+ dev_info(DEV, "Delaying sync read until app's write is done\n");
+ }
+ */
+ return rv;
+}
+
+/**
+ * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
+ * @mdev: DRBD device.
+ * @sector: The sector number.
+ *
+ * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
+ */
+int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+ unsigned int enr = BM_SECT_TO_EXT(sector);
+ struct bm_extent *bm_ext;
+ int i, sig;
+
+ sig = wait_event_interruptible(mdev->al_wait,
+ (bm_ext = _bme_get(mdev, enr)));
+ if (sig)
+ return 0;
+
+ if (test_bit(BME_LOCKED, &bm_ext->flags))
+ return 1;
+
+ for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+ sig = wait_event_interruptible(mdev->al_wait,
+ !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
+ if (sig) {
+ spin_lock_irq(&mdev->al_lock);
+ if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ mdev->resync_locked--;
+ wake_up(&mdev->al_wait);
+ }
+ spin_unlock_irq(&mdev->al_lock);
+ return 0;
+ }
+ }
+
+ set_bit(BME_LOCKED, &bm_ext->flags);
+
+ return 1;
+}
+
+/**
+ * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
+ * @mdev: DRBD device.
+ * @sector: The sector number.
+ *
+ * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
+ * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
+ * if there is still application IO going on in this area.
+ */
+int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+ unsigned int enr = BM_SECT_TO_EXT(sector);
+ const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
+ struct lc_element *e;
+ struct bm_extent *bm_ext;
+ int i;
+
+ spin_lock_irq(&mdev->al_lock);
+ if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
+ /* in case you have very heavy scattered io, it may
+ * stall the syncer undefined if we give up the ref count
+ * when we try again and requeue.
+ *
+ * if we don't give up the refcount, but the next time
+ * we are scheduled this extent has been "synced" by new
+ * application writes, we'd miss the lc_put on the
+ * extent we keep the refcount on.
+ * so we remembered which extent we had to try again, and
+ * if the next requested one is something else, we do
+ * the lc_put here...
+ * we also have to wake_up
+ */
+ e = lc_find(mdev->resync, mdev->resync_wenr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (bm_ext) {
+ D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+ D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ mdev->resync_wenr = LC_FREE;
+ if (lc_put(mdev->resync, &bm_ext->lce) == 0)
+ mdev->resync_locked--;
+ wake_up(&mdev->al_wait);
+ } else {
+ dev_alert(DEV, "LOGIC BUG\n");
+ }
+ }
+ /* TRY. */
+ e = lc_try_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (bm_ext) {
+ if (test_bit(BME_LOCKED, &bm_ext->flags))
+ goto proceed;
+ if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
+ mdev->resync_locked++;
+ } else {
+ /* we did set the BME_NO_WRITES,
+ * but then could not set BME_LOCKED,
+ * so we tried again.
+ * drop the extra reference. */
+ bm_ext->lce.refcnt--;
+ D_ASSERT(bm_ext->lce.refcnt > 0);
+ }
+ goto check_al;
+ } else {
+ /* do we rather want to try later? */
+ if (mdev->resync_locked > mdev->resync->nr_elements-3)
+ goto try_again;
+ /* Do or do not. There is no try. -- Yoda */
+ e = lc_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (!bm_ext) {
+ const unsigned long rs_flags = mdev->resync->flags;
+ if (rs_flags & LC_STARVING)
+ dev_warn(DEV, "Have to wait for element"
+ " (resync LRU too small?)\n");
+ BUG_ON(rs_flags & LC_DIRTY);
+ goto try_again;
+ }
+ if (bm_ext->lce.lc_number != enr) {
+ bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+ bm_ext->rs_failed = 0;
+ lc_changed(mdev->resync, &bm_ext->lce);
+ wake_up(&mdev->al_wait);
+ D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
+ }
+ set_bit(BME_NO_WRITES, &bm_ext->flags);
+ D_ASSERT(bm_ext->lce.refcnt == 1);
+ mdev->resync_locked++;
+ goto check_al;
+ }
+check_al:
+ for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+ if (unlikely(al_enr+i == mdev->act_log->new_number))
+ goto try_again;
+ if (lc_is_used(mdev->act_log, al_enr+i))
+ goto try_again;
+ }
+ set_bit(BME_LOCKED, &bm_ext->flags);
+proceed:
+ mdev->resync_wenr = LC_FREE;
+ spin_unlock_irq(&mdev->al_lock);
+ return 0;
+
+try_again:
+ if (bm_ext)
+ mdev->resync_wenr = enr;
+ spin_unlock_irq(&mdev->al_lock);
+ return -EAGAIN;
+}
+
+void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+ unsigned int enr = BM_SECT_TO_EXT(sector);
+ struct lc_element *e;
+ struct bm_extent *bm_ext;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->al_lock, flags);
+ e = lc_find(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (!bm_ext) {
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
+ return;
+ }
+
+ if (bm_ext->lce.refcnt == 0) {
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+ dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
+ "but refcnt is 0!?\n",
+ (unsigned long long)sector, enr);
+ return;
+ }
+
+ if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+ clear_bit(BME_LOCKED, &bm_ext->flags);
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ mdev->resync_locked--;
+ wake_up(&mdev->al_wait);
+ }
+
+ spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+/**
+ * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
+ * @mdev: DRBD device.
+ */
+void drbd_rs_cancel_all(struct drbd_conf *mdev)
+{
+ spin_lock_irq(&mdev->al_lock);
+
+ if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
+ lc_reset(mdev->resync);
+ put_ldev(mdev);
+ }
+ mdev->resync_locked = 0;
+ mdev->resync_wenr = LC_FREE;
+ spin_unlock_irq(&mdev->al_lock);
+ wake_up(&mdev->al_wait);
+}
+
+/**
+ * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
+ * @mdev: DRBD device.
+ *
+ * Returns 0 upon success, -EAGAIN if at least one reference count was
+ * not zero.
+ */
+int drbd_rs_del_all(struct drbd_conf *mdev)
+{
+ struct lc_element *e;
+ struct bm_extent *bm_ext;
+ int i;
+
+ spin_lock_irq(&mdev->al_lock);
+
+ if (get_ldev_if_state(mdev, D_FAILED)) {
+ /* ok, ->resync is there. */
+ for (i = 0; i < mdev->resync->nr_elements; i++) {
+ e = lc_element_by_index(mdev->resync, i);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ if (bm_ext->lce.lc_number == LC_FREE)
+ continue;
+ if (bm_ext->lce.lc_number == mdev->resync_wenr) {
+ dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
+ " got 'synced' by application io\n",
+ mdev->resync_wenr);
+ D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+ D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+ clear_bit(BME_NO_WRITES, &bm_ext->flags);
+ mdev->resync_wenr = LC_FREE;
+ lc_put(mdev->resync, &bm_ext->lce);
+ }
+ if (bm_ext->lce.refcnt != 0) {
+ dev_info(DEV, "Retrying drbd_rs_del_all() later. "
+ "refcnt=%d\n", bm_ext->lce.refcnt);
+ put_ldev(mdev);
+ spin_unlock_irq(&mdev->al_lock);
+ return -EAGAIN;
+ }
+ D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+ D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
+ lc_del(mdev->resync, &bm_ext->lce);
+ }
+ D_ASSERT(mdev->resync->used == 0);
+ put_ldev(mdev);
+ }
+ spin_unlock_irq(&mdev->al_lock);
+
+ return 0;
+}
+
+/**
+ * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
+ * @mdev: DRBD device.
+ * @sector: The sector number.
+ * @size: Size of failed IO operation, in byte.
+ */
+void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
+{
+ /* Is called from worker and receiver context _only_ */
+ unsigned long sbnr, ebnr, lbnr;
+ unsigned long count;
+ sector_t esector, nr_sectors;
+ int wake_up = 0;
+
+ if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+ dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
+ (unsigned long long)sector, size);
+ return;
+ }
+ nr_sectors = drbd_get_capacity(mdev->this_bdev);
+ esector = sector + (size >> 9) - 1;
+
+ ERR_IF(sector >= nr_sectors) return;
+ ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+ lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+ /*
+ * round up start sector, round down end sector. we make sure we only
+ * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
+ if (unlikely(esector < BM_SECT_PER_BIT-1))
+ return;
+ if (unlikely(esector == (nr_sectors-1)))
+ ebnr = lbnr;
+ else
+ ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+ sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+ if (sbnr > ebnr)
+ return;
+
+ /*
+ * ok, (capacity & 7) != 0 sometimes, but who cares...
+ * we count rs_{total,left} in bits, not sectors.
+ */
+ spin_lock_irq(&mdev->al_lock);
+ count = drbd_bm_count_bits(mdev, sbnr, ebnr);
+ if (count) {
+ mdev->rs_failed += count;
+
+ if (get_ldev(mdev)) {
+ drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
+ put_ldev(mdev);
+ }
+
+ /* just wake_up unconditional now, various lc_chaged(),
+ * lc_put() in drbd_try_clear_on_disk_bm(). */
+ wake_up = 1;
+ }
+ spin_unlock_irq(&mdev->al_lock);
+ if (wake_up)
+ wake_up(&mdev->al_wait);
+}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
new file mode 100644
index 000000000000..b61057e77882
--- /dev/null
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -0,0 +1,1327 @@
+/*
+ drbd_bitmap.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/drbd.h>
+#include <asm/kmap_types.h>
+#include "drbd_int.h"
+
+/* OPAQUE outside this file!
+ * interface defined in drbd_int.h
+
+ * convention:
+ * function name drbd_bm_... => used elsewhere, "public".
+ * function name bm_... => internal to implementation, "private".
+
+ * Note that since find_first_bit returns int, at the current granularity of
+ * the bitmap (4KB per byte), this implementation "only" supports up to
+ * 1<<(32+12) == 16 TB...
+ */
+
+/*
+ * NOTE
+ * Access to the *bm_pages is protected by bm_lock.
+ * It is safe to read the other members within the lock.
+ *
+ * drbd_bm_set_bits is called from bio_endio callbacks,
+ * We may be called with irq already disabled,
+ * so we need spin_lock_irqsave().
+ * And we need the kmap_atomic.
+ */
+struct drbd_bitmap {
+ struct page **bm_pages;
+ spinlock_t bm_lock;
+ /* WARNING unsigned long bm_*:
+ * 32bit number of bit offset is just enough for 512 MB bitmap.
+ * it will blow up if we make the bitmap bigger...
+ * not that it makes much sense to have a bitmap that large,
+ * rather change the granularity to 16k or 64k or something.
+ * (that implies other problems, however...)
+ */
+ unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
+ unsigned long bm_bits;
+ size_t bm_words;
+ size_t bm_number_of_pages;
+ sector_t bm_dev_capacity;
+ struct semaphore bm_change; /* serializes resize operations */
+
+ atomic_t bm_async_io;
+ wait_queue_head_t bm_io_wait;
+
+ unsigned long bm_flags;
+
+ /* debugging aid, in case we are still racy somewhere */
+ char *bm_why;
+ struct task_struct *bm_task;
+};
+
+/* definition of bits in bm_flags */
+#define BM_LOCKED 0
+#define BM_MD_IO_ERROR 1
+#define BM_P_VMALLOCED 2
+
+static int bm_is_locked(struct drbd_bitmap *b)
+{
+ return test_bit(BM_LOCKED, &b->bm_flags);
+}
+
+#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
+static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ if (!__ratelimit(&drbd_ratelimit_state))
+ return;
+ dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
+ current == mdev->receiver.task ? "receiver" :
+ current == mdev->asender.task ? "asender" :
+ current == mdev->worker.task ? "worker" : current->comm,
+ func, b->bm_why ?: "?",
+ b->bm_task == mdev->receiver.task ? "receiver" :
+ b->bm_task == mdev->asender.task ? "asender" :
+ b->bm_task == mdev->worker.task ? "worker" : "?");
+}
+
+void drbd_bm_lock(struct drbd_conf *mdev, char *why)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ int trylock_failed;
+
+ if (!b) {
+ dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
+ return;
+ }
+
+ trylock_failed = down_trylock(&b->bm_change);
+
+ if (trylock_failed) {
+ dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
+ current == mdev->receiver.task ? "receiver" :
+ current == mdev->asender.task ? "asender" :
+ current == mdev->worker.task ? "worker" : current->comm,
+ why, b->bm_why ?: "?",
+ b->bm_task == mdev->receiver.task ? "receiver" :
+ b->bm_task == mdev->asender.task ? "asender" :
+ b->bm_task == mdev->worker.task ? "worker" : "?");
+ down(&b->bm_change);
+ }
+ if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
+ dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+
+ b->bm_why = why;
+ b->bm_task = current;
+}
+
+void drbd_bm_unlock(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ if (!b) {
+ dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
+ return;
+ }
+
+ if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
+ dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
+
+ b->bm_why = NULL;
+ b->bm_task = NULL;
+ up(&b->bm_change);
+}
+
+/* word offset to long pointer */
+static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
+{
+ struct page *page;
+ unsigned long page_nr;
+
+ /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
+ page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+ BUG_ON(page_nr >= b->bm_number_of_pages);
+ page = b->bm_pages[page_nr];
+
+ return (unsigned long *) kmap_atomic(page, km);
+}
+
+static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
+{
+ return __bm_map_paddr(b, offset, KM_IRQ1);
+}
+
+static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+{
+ kunmap_atomic(p_addr, km);
+};
+
+static void bm_unmap(unsigned long *p_addr)
+{
+ return __bm_unmap(p_addr, KM_IRQ1);
+}
+
+/* long word offset of _bitmap_ sector */
+#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+/* word offset from start of bitmap to word number _in_page_
+ * modulo longs per page
+#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
+ hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ so do it explicitly:
+ */
+#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
+
+/* Long words per page */
+#define LWPP (PAGE_SIZE/sizeof(long))
+
+/*
+ * actually most functions herein should take a struct drbd_bitmap*, not a
+ * struct drbd_conf*, but for the debug macros I like to have the mdev around
+ * to be able to report device specific.
+ */
+
+static void bm_free_pages(struct page **pages, unsigned long number)
+{
+ unsigned long i;
+ if (!pages)
+ return;
+
+ for (i = 0; i < number; i++) {
+ if (!pages[i]) {
+ printk(KERN_ALERT "drbd: bm_free_pages tried to free "
+ "a NULL pointer; i=%lu n=%lu\n",
+ i, number);
+ continue;
+ }
+ __free_page(pages[i]);
+ pages[i] = NULL;
+ }
+}
+
+static void bm_vk_free(void *ptr, int v)
+{
+ if (v)
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
+/*
+ * "have" and "want" are NUMBER OF PAGES.
+ */
+static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
+{
+ struct page **old_pages = b->bm_pages;
+ struct page **new_pages, *page;
+ unsigned int i, bytes, vmalloced = 0;
+ unsigned long have = b->bm_number_of_pages;
+
+ BUG_ON(have == 0 && old_pages != NULL);
+ BUG_ON(have != 0 && old_pages == NULL);
+
+ if (have == want)
+ return old_pages;
+
+ /* Trying kmalloc first, falling back to vmalloc.
+ * GFP_KERNEL is ok, as this is done when a lower level disk is
+ * "attached" to the drbd. Context is receiver thread or cqueue
+ * thread. As we have no disk yet, we are not in the IO path,
+ * not even the IO path of the peer. */
+ bytes = sizeof(struct page *)*want;
+ new_pages = kmalloc(bytes, GFP_KERNEL);
+ if (!new_pages) {
+ new_pages = vmalloc(bytes);
+ if (!new_pages)
+ return NULL;
+ vmalloced = 1;
+ }
+
+ memset(new_pages, 0, bytes);
+ if (want >= have) {
+ for (i = 0; i < have; i++)
+ new_pages[i] = old_pages[i];
+ for (; i < want; i++) {
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page) {
+ bm_free_pages(new_pages + have, i - have);
+ bm_vk_free(new_pages, vmalloced);
+ return NULL;
+ }
+ new_pages[i] = page;
+ }
+ } else {
+ for (i = 0; i < want; i++)
+ new_pages[i] = old_pages[i];
+ /* NOT HERE, we are outside the spinlock!
+ bm_free_pages(old_pages + want, have - want);
+ */
+ }
+
+ if (vmalloced)
+ set_bit(BM_P_VMALLOCED, &b->bm_flags);
+ else
+ clear_bit(BM_P_VMALLOCED, &b->bm_flags);
+
+ return new_pages;
+}
+
+/*
+ * called on driver init only. TODO call when a device is created.
+ * allocates the drbd_bitmap, and stores it in mdev->bitmap.
+ */
+int drbd_bm_init(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ WARN_ON(b != NULL);
+ b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
+ if (!b)
+ return -ENOMEM;
+ spin_lock_init(&b->bm_lock);
+ init_MUTEX(&b->bm_change);
+ init_waitqueue_head(&b->bm_io_wait);
+
+ mdev->bitmap = b;
+
+ return 0;
+}
+
+sector_t drbd_bm_capacity(struct drbd_conf *mdev)
+{
+ ERR_IF(!mdev->bitmap) return 0;
+ return mdev->bitmap->bm_dev_capacity;
+}
+
+/* called on driver unload. TODO: call when a device is destroyed.
+ */
+void drbd_bm_cleanup(struct drbd_conf *mdev)
+{
+ ERR_IF (!mdev->bitmap) return;
+ bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
+ bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
+ kfree(mdev->bitmap);
+ mdev->bitmap = NULL;
+}
+
+/*
+ * since (b->bm_bits % BITS_PER_LONG) != 0,
+ * this masks out the remaining bits.
+ * Returns the number of bits cleared.
+ */
+static int bm_clear_surplus(struct drbd_bitmap *b)
+{
+ const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
+ size_t w = b->bm_bits >> LN2_BPL;
+ int cleared = 0;
+ unsigned long *p_addr, *bm;
+
+ p_addr = bm_map_paddr(b, w);
+ bm = p_addr + MLPP(w);
+ if (w < b->bm_words) {
+ cleared = hweight_long(*bm & ~mask);
+ *bm &= mask;
+ w++; bm++;
+ }
+
+ if (w < b->bm_words) {
+ cleared += hweight_long(*bm);
+ *bm = 0;
+ }
+ bm_unmap(p_addr);
+ return cleared;
+}
+
+static void bm_set_surplus(struct drbd_bitmap *b)
+{
+ const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
+ size_t w = b->bm_bits >> LN2_BPL;
+ unsigned long *p_addr, *bm;
+
+ p_addr = bm_map_paddr(b, w);
+ bm = p_addr + MLPP(w);
+ if (w < b->bm_words) {
+ *bm |= ~mask;
+ bm++; w++;
+ }
+
+ if (w < b->bm_words) {
+ *bm = ~(0UL);
+ }
+ bm_unmap(p_addr);
+}
+
+static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
+{
+ unsigned long *p_addr, *bm, offset = 0;
+ unsigned long bits = 0;
+ unsigned long i, do_now;
+
+ while (offset < b->bm_words) {
+ i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
+ p_addr = __bm_map_paddr(b, offset, KM_USER0);
+ bm = p_addr + MLPP(offset);
+ while (i--) {
+#ifndef __LITTLE_ENDIAN
+ if (swap_endian)
+ *bm = lel_to_cpu(*bm);
+#endif
+ bits += hweight_long(*bm++);
+ }
+ __bm_unmap(p_addr, KM_USER0);
+ offset += do_now;
+ cond_resched();
+ }
+
+ return bits;
+}
+
+static unsigned long bm_count_bits(struct drbd_bitmap *b)
+{
+ return __bm_count_bits(b, 0);
+}
+
+static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
+{
+ return __bm_count_bits(b, 1);
+}
+
+/* offset and len in long words.*/
+static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
+{
+ unsigned long *p_addr, *bm;
+ size_t do_now, end;
+
+#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
+
+ end = offset + len;
+
+ if (end > b->bm_words) {
+ printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+ return;
+ }
+
+ while (offset < end) {
+ do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
+ p_addr = bm_map_paddr(b, offset);
+ bm = p_addr + MLPP(offset);
+ if (bm+do_now > p_addr + LWPP) {
+ printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+ p_addr, bm, (int)do_now);
+ break; /* breaks to after catch_oob_access_end() only! */
+ }
+ memset(bm, c, do_now * sizeof(long));
+ bm_unmap(p_addr);
+ offset += do_now;
+ }
+}
+
+/*
+ * make sure the bitmap has enough room for the attached storage,
+ * if necessary, resize.
+ * called whenever we may have changed the device size.
+ * returns -ENOMEM if we could not allocate enough memory, 0 on success.
+ * In case this is actually a resize, we copy the old bitmap into the new one.
+ * Otherwise, the bitmap is initialized to all bits set.
+ */
+int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long bits, words, owords, obits, *p_addr, *bm;
+ unsigned long want, have, onpages; /* number of pages */
+ struct page **npages, **opages = NULL;
+ int err = 0, growing;
+ int opages_vmalloced;
+
+ ERR_IF(!b) return -ENOMEM;
+
+ drbd_bm_lock(mdev, "resize");
+
+ dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
+ (unsigned long long)capacity);
+
+ if (capacity == b->bm_dev_capacity)
+ goto out;
+
+ opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
+
+ if (capacity == 0) {
+ spin_lock_irq(&b->bm_lock);
+ opages = b->bm_pages;
+ onpages = b->bm_number_of_pages;
+ owords = b->bm_words;
+ b->bm_pages = NULL;
+ b->bm_number_of_pages =
+ b->bm_set =
+ b->bm_bits =
+ b->bm_words =
+ b->bm_dev_capacity = 0;
+ spin_unlock_irq(&b->bm_lock);
+ bm_free_pages(opages, onpages);
+ bm_vk_free(opages, opages_vmalloced);
+ goto out;
+ }
+ bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
+
+ /* if we would use
+ words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
+ a 32bit host could present the wrong number of words
+ to a 64bit host.
+ */
+ words = ALIGN(bits, 64) >> LN2_BPL;
+
+ if (get_ldev(mdev)) {
+ D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
+ put_ldev(mdev);
+ }
+
+ /* one extra long to catch off by one errors */
+ want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+ have = b->bm_number_of_pages;
+ if (want == have) {
+ D_ASSERT(b->bm_pages != NULL);
+ npages = b->bm_pages;
+ } else {
+ if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
+ npages = NULL;
+ else
+ npages = bm_realloc_pages(b, want);
+ }
+
+ if (!npages) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ spin_lock_irq(&b->bm_lock);
+ opages = b->bm_pages;
+ owords = b->bm_words;
+ obits = b->bm_bits;
+
+ growing = bits > obits;
+ if (opages)
+ bm_set_surplus(b);
+
+ b->bm_pages = npages;
+ b->bm_number_of_pages = want;
+ b->bm_bits = bits;
+ b->bm_words = words;
+ b->bm_dev_capacity = capacity;
+
+ if (growing) {
+ bm_memset(b, owords, 0xff, words-owords);
+ b->bm_set += bits - obits;
+ }
+
+ if (want < have) {
+ /* implicit: (opages != NULL) && (opages != npages) */
+ bm_free_pages(opages + want, have - want);
+ }
+
+ p_addr = bm_map_paddr(b, words);
+ bm = p_addr + MLPP(words);
+ *bm = DRBD_MAGIC;
+ bm_unmap(p_addr);
+
+ (void)bm_clear_surplus(b);
+
+ spin_unlock_irq(&b->bm_lock);
+ if (opages != npages)
+ bm_vk_free(opages, opages_vmalloced);
+ if (!growing)
+ b->bm_set = bm_count_bits(b);
+ dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
+
+ out:
+ drbd_bm_unlock(mdev);
+ return err;
+}
+
+/* inherently racy:
+ * if not protected by other means, return value may be out of date when
+ * leaving this function...
+ * we still need to lock it, since it is important that this returns
+ * bm_set == 0 precisely.
+ *
+ * maybe bm_set should be atomic_t ?
+ */
+static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long s;
+ unsigned long flags;
+
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm_pages) return 0;
+
+ spin_lock_irqsave(&b->bm_lock, flags);
+ s = b->bm_set;
+ spin_unlock_irqrestore(&b->bm_lock, flags);
+
+ return s;
+}
+
+unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+ unsigned long s;
+ /* if I don't have a disk, I don't know about out-of-sync status */
+ if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+ return 0;
+ s = _drbd_bm_total_weight(mdev);
+ put_ldev(mdev);
+ return s;
+}
+
+size_t drbd_bm_words(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm_pages) return 0;
+
+ return b->bm_words;
+}
+
+unsigned long drbd_bm_bits(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ ERR_IF(!b) return 0;
+
+ return b->bm_bits;
+}
+
+/* merge number words from buffer into the bitmap starting at offset.
+ * buffer[i] is expected to be little endian unsigned long.
+ * bitmap must be locked by drbd_bm_lock.
+ * currently only used from receive_bitmap.
+ */
+void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+ unsigned long *buffer)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr, *bm;
+ unsigned long word, bits;
+ size_t end, do_now;
+
+ end = offset + number;
+
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm_pages) return;
+ if (number == 0)
+ return;
+ WARN_ON(offset >= b->bm_words);
+ WARN_ON(end > b->bm_words);
+
+ spin_lock_irq(&b->bm_lock);
+ while (offset < end) {
+ do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+ p_addr = bm_map_paddr(b, offset);
+ bm = p_addr + MLPP(offset);
+ offset += do_now;
+ while (do_now--) {
+ bits = hweight_long(*bm);
+ word = *bm | lel_to_cpu(*buffer++);
+ *bm++ = word;
+ b->bm_set += hweight_long(word) - bits;
+ }
+ bm_unmap(p_addr);
+ }
+ /* with 32bit <-> 64bit cross-platform connect
+ * this is only correct for current usage,
+ * where we _know_ that we are 64 bit aligned,
+ * and know that this function is used in this way, too...
+ */
+ if (end == b->bm_words)
+ b->bm_set -= bm_clear_surplus(b);
+
+ spin_unlock_irq(&b->bm_lock);
+}
+
+/* copy number words from the bitmap starting at offset into the buffer.
+ * buffer[i] will be little endian unsigned long.
+ */
+void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+ unsigned long *buffer)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr, *bm;
+ size_t end, do_now;
+
+ end = offset + number;
+
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm_pages) return;
+
+ spin_lock_irq(&b->bm_lock);
+ if ((offset >= b->bm_words) ||
+ (end > b->bm_words) ||
+ (number <= 0))
+ dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
+ (unsigned long) offset,
+ (unsigned long) number,
+ (unsigned long) b->bm_words);
+ else {
+ while (offset < end) {
+ do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+ p_addr = bm_map_paddr(b, offset);
+ bm = p_addr + MLPP(offset);
+ offset += do_now;
+ while (do_now--)
+ *buffer++ = cpu_to_lel(*bm++);
+ bm_unmap(p_addr);
+ }
+ }
+ spin_unlock_irq(&b->bm_lock);
+}
+
+/* set all bits in the bitmap */
+void drbd_bm_set_all(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm_pages) return;
+
+ spin_lock_irq(&b->bm_lock);
+ bm_memset(b, 0, 0xff, b->bm_words);
+ (void)bm_clear_surplus(b);
+ b->bm_set = b->bm_bits;
+ spin_unlock_irq(&b->bm_lock);
+}
+
+/* clear all bits in the bitmap */
+void drbd_bm_clear_all(struct drbd_conf *mdev)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm_pages) return;
+
+ spin_lock_irq(&b->bm_lock);
+ bm_memset(b, 0, 0, b->bm_words);
+ b->bm_set = 0;
+ spin_unlock_irq(&b->bm_lock);
+}
+
+static void bm_async_io_complete(struct bio *bio, int error)
+{
+ struct drbd_bitmap *b = bio->bi_private;
+ int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+
+ /* strange behavior of some lower level drivers...
+ * fail the request by clearing the uptodate flag,
+ * but do not return any error?!
+ * do we want to WARN() on this? */
+ if (!error && !uptodate)
+ error = -EIO;
+
+ if (error) {
+ /* doh. what now?
+ * for now, set all bits, and flag MD_IO_ERROR */
+ __set_bit(BM_MD_IO_ERROR, &b->bm_flags);
+ }
+ if (atomic_dec_and_test(&b->bm_async_io))
+ wake_up(&b->bm_io_wait);
+
+ bio_put(bio);
+}
+
+static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
+{
+ /* we are process context. we always get a bio */
+ struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+ unsigned int len;
+ sector_t on_disk_sector =
+ mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
+ on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+
+ /* this might happen with very small
+ * flexible external meta data device */
+ len = min_t(unsigned int, PAGE_SIZE,
+ (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
+
+ bio->bi_bdev = mdev->ldev->md_bdev;
+ bio->bi_sector = on_disk_sector;
+ bio_add_page(bio, b->bm_pages[page_nr], len, 0);
+ bio->bi_private = b;
+ bio->bi_end_io = bm_async_io_complete;
+
+ if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+ bio->bi_rw |= rw;
+ bio_endio(bio, -EIO);
+ } else {
+ submit_bio(rw, bio);
+ }
+}
+
+# if defined(__LITTLE_ENDIAN)
+ /* nothing to do, on disk == in memory */
+# define bm_cpu_to_lel(x) ((void)0)
+# else
+void bm_cpu_to_lel(struct drbd_bitmap *b)
+{
+ /* need to cpu_to_lel all the pages ...
+ * this may be optimized by using
+ * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
+ * the following is still not optimal, but better than nothing */
+ unsigned int i;
+ unsigned long *p_addr, *bm;
+ if (b->bm_set == 0) {
+ /* no page at all; avoid swap if all is 0 */
+ i = b->bm_number_of_pages;
+ } else if (b->bm_set == b->bm_bits) {
+ /* only the last page */
+ i = b->bm_number_of_pages - 1;
+ } else {
+ /* all pages */
+ i = 0;
+ }
+ for (; i < b->bm_number_of_pages; i++) {
+ p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
+ for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
+ *bm = cpu_to_lel(*bm);
+ kunmap_atomic(p_addr, KM_USER0);
+ }
+}
+# endif
+/* lel_to_cpu == cpu_to_lel */
+# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
+
+/*
+ * bm_rw: read/write the whole bitmap from/to its on disk location.
+ */
+static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ /* sector_t sector; */
+ int bm_words, num_pages, i;
+ unsigned long now;
+ char ppb[10];
+ int err = 0;
+
+ WARN_ON(!bm_is_locked(b));
+
+ /* no spinlock here, the drbd_bm_lock should be enough! */
+
+ bm_words = drbd_bm_words(mdev);
+ num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+ /* on disk bitmap is little endian */
+ if (rw == WRITE)
+ bm_cpu_to_lel(b);
+
+ now = jiffies;
+ atomic_set(&b->bm_async_io, num_pages);
+ __clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
+
+ /* let the layers below us try to merge these bios... */
+ for (i = 0; i < num_pages; i++)
+ bm_page_io_async(mdev, b, i, rw);
+
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
+ wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
+
+ if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
+ dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
+ drbd_chk_io_error(mdev, 1, TRUE);
+ err = -EIO;
+ }
+
+ now = jiffies;
+ if (rw == WRITE) {
+ /* swap back endianness */
+ bm_lel_to_cpu(b);
+ /* flush bitmap to stable storage */
+ drbd_md_flush(mdev);
+ } else /* rw == READ */ {
+ /* just read, if necessary adjust endianness */
+ b->bm_set = bm_count_bits_swap_endian(b);
+ dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
+ jiffies - now);
+ }
+ now = b->bm_set;
+
+ dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+ ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+
+ return err;
+}
+
+/**
+ * drbd_bm_read() - Read the whole bitmap from its on disk location.
+ * @mdev: DRBD device.
+ */
+int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, READ);
+}
+
+/**
+ * drbd_bm_write() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
+ */
+int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, WRITE);
+}
+
+/**
+ * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
+ * @mdev: DRBD device.
+ * @enr: Extent number in the resync lru (happens to be sector offset)
+ *
+ * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
+ * by a single sector write. Therefore enr == sector offset from the
+ * start of the bitmap.
+ */
+int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
+{
+ sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+ + mdev->ldev->md.bm_offset;
+ int bm_words, num_words, offset;
+ int err = 0;
+
+ mutex_lock(&mdev->md_io_mutex);
+ bm_words = drbd_bm_words(mdev);
+ offset = S2W(enr); /* word offset into bitmap */
+ num_words = min(S2W(1), bm_words - offset);
+ if (num_words < S2W(1))
+ memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
+ drbd_bm_get_lel(mdev, offset, num_words,
+ page_address(mdev->md_io_page));
+ if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
+ int i;
+ err = -EIO;
+ dev_err(DEV, "IO ERROR writing bitmap sector %lu "
+ "(meta-disk sector %llus)\n",
+ enr, (unsigned long long)on_disk_sector);
+ drbd_chk_io_error(mdev, 1, TRUE);
+ for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
+ drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
+ }
+ mdev->bm_writ_cnt++;
+ mutex_unlock(&mdev->md_io_mutex);
+ return err;
+}
+
+/* NOTE
+ * find_first_bit returns int, we return unsigned long.
+ * should not make much difference anyways, but ...
+ *
+ * this returns a bit number, NOT a sector!
+ */
+#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
+static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
+ const int find_zero_bit, const enum km_type km)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long i = -1UL;
+ unsigned long *p_addr;
+ unsigned long bit_offset; /* bit offset of the mapped page. */
+
+ if (bm_fo > b->bm_bits) {
+ dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+ } else {
+ while (bm_fo < b->bm_bits) {
+ unsigned long offset;
+ bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
+ offset = bit_offset >> LN2_BPL; /* word offset of the page */
+ p_addr = __bm_map_paddr(b, offset, km);
+
+ if (find_zero_bit)
+ i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+ else
+ i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+
+ __bm_unmap(p_addr, km);
+ if (i < PAGE_SIZE*8) {
+ i = bit_offset + i;
+ if (i >= b->bm_bits)
+ break;
+ goto found;
+ }
+ bm_fo = bit_offset + PAGE_SIZE*8;
+ }
+ i = -1UL;
+ }
+ found:
+ return i;
+}
+
+static unsigned long bm_find_next(struct drbd_conf *mdev,
+ unsigned long bm_fo, const int find_zero_bit)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long i = -1UL;
+
+ ERR_IF(!b) return i;
+ ERR_IF(!b->bm_pages) return i;
+
+ spin_lock_irq(&b->bm_lock);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+
+ i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+
+ spin_unlock_irq(&b->bm_lock);
+ return i;
+}
+
+unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+ return bm_find_next(mdev, bm_fo, 0);
+}
+
+#if 0
+/* not yet needed for anything. */
+unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+ return bm_find_next(mdev, bm_fo, 1);
+}
+#endif
+
+/* does not spin_lock_irqsave.
+ * you must take drbd_bm_lock() first */
+unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+ /* WARN_ON(!bm_is_locked(mdev)); */
+ return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+}
+
+unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+ /* WARN_ON(!bm_is_locked(mdev)); */
+ return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector.
+ * expected to be called for only a few bits (e - s about BITS_PER_LONG).
+ * Must hold bitmap lock already. */
+int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+ unsigned long e, int val, const enum km_type km)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr = NULL;
+ unsigned long bitnr;
+ unsigned long last_page_nr = -1UL;
+ int c = 0;
+
+ if (e >= b->bm_bits) {
+ dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
+ s, e, b->bm_bits);
+ e = b->bm_bits ? b->bm_bits -1 : 0;
+ }
+ for (bitnr = s; bitnr <= e; bitnr++) {
+ unsigned long offset = bitnr>>LN2_BPL;
+ unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+ if (page_nr != last_page_nr) {
+ if (p_addr)
+ __bm_unmap(p_addr, km);
+ p_addr = __bm_map_paddr(b, offset, km);
+ last_page_nr = page_nr;
+ }
+ if (val)
+ c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
+ else
+ c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
+ }
+ if (p_addr)
+ __bm_unmap(p_addr, km);
+ b->bm_set += c;
+ return c;
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector */
+int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+ const unsigned long e, int val)
+{
+ unsigned long flags;
+ struct drbd_bitmap *b = mdev->bitmap;
+ int c = 0;
+
+ ERR_IF(!b) return 1;
+ ERR_IF(!b->bm_pages) return 0;
+
+ spin_lock_irqsave(&b->bm_lock, flags);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+
+ c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
+
+ spin_unlock_irqrestore(&b->bm_lock, flags);
+ return c;
+}
+
+/* returns number of bits changed 0 -> 1 */
+int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+ return bm_change_bits_to(mdev, s, e, 1);
+}
+
+/* returns number of bits changed 1 -> 0 */
+int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+ return -bm_change_bits_to(mdev, s, e, 0);
+}
+
+/* sets all bits in full words,
+ * from first_word up to, but not including, last_word */
+static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
+ int page_nr, int first_word, int last_word)
+{
+ int i;
+ int bits;
+ unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
+ for (i = first_word; i < last_word; i++) {
+ bits = hweight_long(paddr[i]);
+ paddr[i] = ~0UL;
+ b->bm_set += BITS_PER_LONG - bits;
+ }
+ kunmap_atomic(paddr, KM_USER0);
+}
+
+/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
+ * You must first drbd_bm_lock().
+ * Can be called to set the whole bitmap in one go.
+ * Sets bits from s to e _inclusive_. */
+void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+ /* First set_bit from the first bit (s)
+ * up to the next long boundary (sl),
+ * then assign full words up to the last long boundary (el),
+ * then set_bit up to and including the last bit (e).
+ *
+ * Do not use memset, because we must account for changes,
+ * so we need to loop over the words with hweight() anyways.
+ */
+ unsigned long sl = ALIGN(s,BITS_PER_LONG);
+ unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
+ int first_page;
+ int last_page;
+ int page_nr;
+ int first_word;
+ int last_word;
+
+ if (e - s <= 3*BITS_PER_LONG) {
+ /* don't bother; el and sl may even be wrong. */
+ __bm_change_bits_to(mdev, s, e, 1, KM_USER0);
+ return;
+ }
+
+ /* difference is large enough that we can trust sl and el */
+
+ /* bits filling the current long */
+ if (sl)
+ __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
+
+ first_page = sl >> (3 + PAGE_SHIFT);
+ last_page = el >> (3 + PAGE_SHIFT);
+
+ /* MLPP: modulo longs per page */
+ /* LWPP: long words per page */
+ first_word = MLPP(sl >> LN2_BPL);
+ last_word = LWPP;
+
+ /* first and full pages, unless first page == last page */
+ for (page_nr = first_page; page_nr < last_page; page_nr++) {
+ bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
+ cond_resched();
+ first_word = 0;
+ }
+
+ /* last page (respectively only page, for first page == last page) */
+ last_word = MLPP(el >> LN2_BPL);
+ bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* possibly trailing bits.
+ * example: (e & 63) == 63, el will be e+1.
+ * if that even was the very last bit,
+ * it would trigger an assert in __bm_change_bits_to()
+ */
+ if (el <= e)
+ __bm_change_bits_to(mdev, el, e, 1, KM_USER0);
+}
+
+/* returns bit state
+ * wants bitnr, NOT sector.
+ * inherently racy... area needs to be locked by means of {al,rs}_lru
+ * 1 ... bit set
+ * 0 ... bit not set
+ * -1 ... first out of bounds access, stop testing for bits!
+ */
+int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
+{
+ unsigned long flags;
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr;
+ int i;
+
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm_pages) return 0;
+
+ spin_lock_irqsave(&b->bm_lock, flags);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+ if (bitnr < b->bm_bits) {
+ unsigned long offset = bitnr>>LN2_BPL;
+ p_addr = bm_map_paddr(b, offset);
+ i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
+ bm_unmap(p_addr);
+ } else if (bitnr == b->bm_bits) {
+ i = -1;
+ } else { /* (bitnr > b->bm_bits) */
+ dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
+ i = 0;
+ }
+
+ spin_unlock_irqrestore(&b->bm_lock, flags);
+ return i;
+}
+
+/* returns number of bits set in the range [s, e] */
+int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+ unsigned long flags;
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr = NULL, page_nr = -1;
+ unsigned long bitnr;
+ int c = 0;
+ size_t w;
+
+ /* If this is called without a bitmap, that is a bug. But just to be
+ * robust in case we screwed up elsewhere, in that case pretend there
+ * was one dirty bit in the requested area, so we won't try to do a
+ * local read there (no bitmap probably implies no disk) */
+ ERR_IF(!b) return 1;
+ ERR_IF(!b->bm_pages) return 1;
+
+ spin_lock_irqsave(&b->bm_lock, flags);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+ for (bitnr = s; bitnr <= e; bitnr++) {
+ w = bitnr >> LN2_BPL;
+ if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
+ page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
+ if (p_addr)
+ bm_unmap(p_addr);
+ p_addr = bm_map_paddr(b, w);
+ }
+ ERR_IF (bitnr >= b->bm_bits) {
+ dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
+ } else {
+ c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
+ }
+ }
+ if (p_addr)
+ bm_unmap(p_addr);
+ spin_unlock_irqrestore(&b->bm_lock, flags);
+ return c;
+}
+
+
+/* inherently racy...
+ * return value may be already out-of-date when this function returns.
+ * but the general usage is that this is only use during a cstate when bits are
+ * only cleared, not set, and typically only care for the case when the return
+ * value is zero, or we already "locked" this "bitmap extent" by other means.
+ *
+ * enr is bm-extent number, since we chose to name one sector (512 bytes)
+ * worth of the bitmap a "bitmap extent".
+ *
+ * TODO
+ * I think since we use it like a reference count, we should use the real
+ * reference count of some bitmap extent element from some lru instead...
+ *
+ */
+int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ int count, s, e;
+ unsigned long flags;
+ unsigned long *p_addr, *bm;
+
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm_pages) return 0;
+
+ spin_lock_irqsave(&b->bm_lock, flags);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+
+ s = S2W(enr);
+ e = min((size_t)S2W(enr+1), b->bm_words);
+ count = 0;
+ if (s < b->bm_words) {
+ int n = e-s;
+ p_addr = bm_map_paddr(b, s);
+ bm = p_addr + MLPP(s);
+ while (n--)
+ count += hweight_long(*bm++);
+ bm_unmap(p_addr);
+ } else {
+ dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
+ }
+ spin_unlock_irqrestore(&b->bm_lock, flags);
+ return count;
+}
+
+/* set all bits covered by the AL-extent al_enr */
+unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
+{
+ struct drbd_bitmap *b = mdev->bitmap;
+ unsigned long *p_addr, *bm;
+ unsigned long weight;
+ int count, s, e, i, do_now;
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm_pages) return 0;
+
+ spin_lock_irq(&b->bm_lock);
+ if (bm_is_locked(b))
+ bm_print_lock_info(mdev);
+ weight = b->bm_set;
+
+ s = al_enr * BM_WORDS_PER_AL_EXT;
+ e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
+ /* assert that s and e are on the same page */
+ D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
+ == s >> (PAGE_SHIFT - LN2_BPL + 3));
+ count = 0;
+ if (s < b->bm_words) {
+ i = do_now = e-s;
+ p_addr = bm_map_paddr(b, s);
+ bm = p_addr + MLPP(s);
+ while (i--) {
+ count += hweight_long(*bm);
+ *bm = -1UL;
+ bm++;
+ }
+ bm_unmap(p_addr);
+ b->bm_set += do_now*BITS_PER_LONG - count;
+ if (e == b->bm_words)
+ b->bm_set -= bm_clear_surplus(b);
+ } else {
+ dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
+ }
+ weight = b->bm_set - weight;
+ spin_unlock_irq(&b->bm_lock);
+ return weight;
+}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
new file mode 100644
index 000000000000..2312d782fe99
--- /dev/null
+++ b/drivers/block/drbd/drbd_int.h
@@ -0,0 +1,2252 @@
+/*
+ drbd_int.h
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#ifndef _DRBD_INT_H
+#define _DRBD_INT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/crypto.h>
+#include <linux/ratelimit.h>
+#include <linux/tcp.h>
+#include <linux/mutex.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <net/tcp.h>
+#include <linux/lru_cache.h>
+
+#ifdef __CHECKER__
+# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
+# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read")))
+# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
+# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call")))
+#else
+# define __protected_by(x)
+# define __protected_read_by(x)
+# define __protected_write_by(x)
+# define __must_hold(x)
+#endif
+
+#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
+
+/* module parameter, defined in drbd_main.c */
+extern unsigned int minor_count;
+extern int disable_sendpage;
+extern int allow_oos;
+extern unsigned int cn_idx;
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+extern int enable_faults;
+extern int fault_rate;
+extern int fault_devs;
+#endif
+
+extern char usermode_helper[];
+
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* I don't remember why XCPU ...
+ * This is used to wake the asender,
+ * and to interrupt sending the sending task
+ * on disconnect.
+ */
+#define DRBD_SIG SIGXCPU
+
+/* This is used to stop/restart our threads.
+ * Cannot use SIGTERM nor SIGKILL, since these
+ * are sent out by init on runlevel changes
+ * I choose SIGHUP for now.
+ */
+#define DRBD_SIGKILL SIGHUP
+
+/* All EEs on the free list should have ID_VACANT (== 0)
+ * freshly allocated EEs get !ID_VACANT (== 1)
+ * so if it says "cannot dereference null pointer at adress 0x00000001",
+ * it is most likely one of these :( */
+
+#define ID_IN_SYNC (4711ULL)
+#define ID_OUT_OF_SYNC (4712ULL)
+
+#define ID_SYNCER (-1ULL)
+#define ID_VACANT 0
+#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+
+struct drbd_conf;
+
+
+/* to shorten dev_warn(DEV, "msg"); and relatives statements */
+#define DEV (disk_to_dev(mdev->vdisk))
+
+#define D_ASSERT(exp) if (!(exp)) \
+ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
+
+#define ERR_IF(exp) if (({ \
+ int _b = (exp) != 0; \
+ if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \
+ __func__, #exp, __FILE__, __LINE__); \
+ _b; \
+ }))
+
+/* Defines to control fault insertion */
+enum {
+ DRBD_FAULT_MD_WR = 0, /* meta data write */
+ DRBD_FAULT_MD_RD = 1, /* read */
+ DRBD_FAULT_RS_WR = 2, /* resync */
+ DRBD_FAULT_RS_RD = 3,
+ DRBD_FAULT_DT_WR = 4, /* data */
+ DRBD_FAULT_DT_RD = 5,
+ DRBD_FAULT_DT_RA = 6, /* data read ahead */
+ DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */
+ DRBD_FAULT_AL_EE = 8, /* alloc ee */
+
+ DRBD_FAULT_MAX,
+};
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+extern unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
+static inline int
+drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
+ return fault_rate &&
+ (enable_faults & (1<<type)) &&
+ _drbd_insert_fault(mdev, type);
+}
+#define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t)))
+
+#else
+#define FAULT_ACTIVE(_m, _t) (0)
+#endif
+
+/* integer division, round _UP_ to the next integer */
+#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
+/* usual integer division */
+#define div_floor(A, B) ((A)/(B))
+
+/* drbd_meta-data.c (still in drbd_main.c) */
+/* 4th incarnation of the disk layout. */
+#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
+
+extern struct drbd_conf **minor_table;
+extern struct ratelimit_state drbd_ratelimit_state;
+
+/* on the wire */
+enum drbd_packets {
+ /* receiver (data socket) */
+ P_DATA = 0x00,
+ P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */
+ P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */
+ P_BARRIER = 0x03,
+ P_BITMAP = 0x04,
+ P_BECOME_SYNC_TARGET = 0x05,
+ P_BECOME_SYNC_SOURCE = 0x06,
+ P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */
+ P_DATA_REQUEST = 0x08, /* Used to ask for a data block */
+ P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */
+ P_SYNC_PARAM = 0x0a,
+ P_PROTOCOL = 0x0b,
+ P_UUIDS = 0x0c,
+ P_SIZES = 0x0d,
+ P_STATE = 0x0e,
+ P_SYNC_UUID = 0x0f,
+ P_AUTH_CHALLENGE = 0x10,
+ P_AUTH_RESPONSE = 0x11,
+ P_STATE_CHG_REQ = 0x12,
+
+ /* asender (meta socket */
+ P_PING = 0x13,
+ P_PING_ACK = 0x14,
+ P_RECV_ACK = 0x15, /* Used in protocol B */
+ P_WRITE_ACK = 0x16, /* Used in protocol C */
+ P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
+ P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */
+ P_NEG_ACK = 0x19, /* Sent if local disk is unusable */
+ P_NEG_DREPLY = 0x1a, /* Local disk is broken... */
+ P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */
+ P_BARRIER_ACK = 0x1c,
+ P_STATE_CHG_REPLY = 0x1d,
+
+ /* "new" commands, no longer fitting into the ordering scheme above */
+
+ P_OV_REQUEST = 0x1e, /* data socket */
+ P_OV_REPLY = 0x1f,
+ P_OV_RESULT = 0x20, /* meta socket */
+ P_CSUM_RS_REQUEST = 0x21, /* data socket */
+ P_RS_IS_IN_SYNC = 0x22, /* meta socket */
+ P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
+ P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */
+
+ P_MAX_CMD = 0x25,
+ P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
+ P_MAX_OPT_CMD = 0x101,
+
+ /* special command ids for handshake */
+
+ P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */
+ P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */
+
+ P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */
+};
+
+static inline const char *cmdname(enum drbd_packets cmd)
+{
+ /* THINK may need to become several global tables
+ * when we want to support more than
+ * one PRO_VERSION */
+ static const char *cmdnames[] = {
+ [P_DATA] = "Data",
+ [P_DATA_REPLY] = "DataReply",
+ [P_RS_DATA_REPLY] = "RSDataReply",
+ [P_BARRIER] = "Barrier",
+ [P_BITMAP] = "ReportBitMap",
+ [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
+ [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
+ [P_UNPLUG_REMOTE] = "UnplugRemote",
+ [P_DATA_REQUEST] = "DataRequest",
+ [P_RS_DATA_REQUEST] = "RSDataRequest",
+ [P_SYNC_PARAM] = "SyncParam",
+ [P_SYNC_PARAM89] = "SyncParam89",
+ [P_PROTOCOL] = "ReportProtocol",
+ [P_UUIDS] = "ReportUUIDs",
+ [P_SIZES] = "ReportSizes",
+ [P_STATE] = "ReportState",
+ [P_SYNC_UUID] = "ReportSyncUUID",
+ [P_AUTH_CHALLENGE] = "AuthChallenge",
+ [P_AUTH_RESPONSE] = "AuthResponse",
+ [P_PING] = "Ping",
+ [P_PING_ACK] = "PingAck",
+ [P_RECV_ACK] = "RecvAck",
+ [P_WRITE_ACK] = "WriteAck",
+ [P_RS_WRITE_ACK] = "RSWriteAck",
+ [P_DISCARD_ACK] = "DiscardAck",
+ [P_NEG_ACK] = "NegAck",
+ [P_NEG_DREPLY] = "NegDReply",
+ [P_NEG_RS_DREPLY] = "NegRSDReply",
+ [P_BARRIER_ACK] = "BarrierAck",
+ [P_STATE_CHG_REQ] = "StateChgRequest",
+ [P_STATE_CHG_REPLY] = "StateChgReply",
+ [P_OV_REQUEST] = "OVRequest",
+ [P_OV_REPLY] = "OVReply",
+ [P_OV_RESULT] = "OVResult",
+ [P_MAX_CMD] = NULL,
+ };
+
+ if (cmd == P_HAND_SHAKE_M)
+ return "HandShakeM";
+ if (cmd == P_HAND_SHAKE_S)
+ return "HandShakeS";
+ if (cmd == P_HAND_SHAKE)
+ return "HandShake";
+ if (cmd >= P_MAX_CMD)
+ return "Unknown";
+ return cmdnames[cmd];
+}
+
+/* for sending/receiving the bitmap,
+ * possibly in some encoding scheme */
+struct bm_xfer_ctx {
+ /* "const"
+ * stores total bits and long words
+ * of the bitmap, so we don't need to
+ * call the accessor functions over and again. */
+ unsigned long bm_bits;
+ unsigned long bm_words;
+ /* during xfer, current position within the bitmap */
+ unsigned long bit_offset;
+ unsigned long word_offset;
+
+ /* statistics; index: (h->command == P_BITMAP) */
+ unsigned packets[2];
+ unsigned bytes[2];
+};
+
+extern void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+ const char *direction, struct bm_xfer_ctx *c);
+
+static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
+{
+ /* word_offset counts "native long words" (32 or 64 bit),
+ * aligned at 64 bit.
+ * Encoded packet may end at an unaligned bit offset.
+ * In case a fallback clear text packet is transmitted in
+ * between, we adjust this offset back to the last 64bit
+ * aligned "native long word", which makes coding and decoding
+ * the plain text bitmap much more convenient. */
+#if BITS_PER_LONG == 64
+ c->word_offset = c->bit_offset >> 6;
+#elif BITS_PER_LONG == 32
+ c->word_offset = c->bit_offset >> 5;
+ c->word_offset &= ~(1UL);
+#else
+# error "unsupported BITS_PER_LONG"
+#endif
+}
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+/* This is the layout for a packet on the wire.
+ * The byteorder is the network byte order.
+ * (except block_id and barrier fields.
+ * these are pointers to local structs
+ * and have no relevance for the partner,
+ * which just echoes them as received.)
+ *
+ * NOTE that the payload starts at a long aligned offset,
+ * regardless of 32 or 64 bit arch!
+ */
+struct p_header {
+ u32 magic;
+ u16 command;
+ u16 length; /* bytes of data after this header */
+ u8 payload[0];
+} __packed;
+/* 8 bytes. packet FIXED for the next century! */
+
+/*
+ * short commands, packets without payload, plain p_header:
+ * P_PING
+ * P_PING_ACK
+ * P_BECOME_SYNC_TARGET
+ * P_BECOME_SYNC_SOURCE
+ * P_UNPLUG_REMOTE
+ */
+
+/*
+ * commands with out-of-struct payload:
+ * P_BITMAP (no additional fields)
+ * P_DATA, P_DATA_REPLY (see p_data)
+ * P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
+ */
+
+/* these defines must not be changed without changing the protocol version */
+#define DP_HARDBARRIER 1
+#define DP_RW_SYNC 2
+#define DP_MAY_SET_IN_SYNC 4
+
+struct p_data {
+ struct p_header head;
+ u64 sector; /* 64 bits sector number */
+ u64 block_id; /* to identify the request in protocol B&C */
+ u32 seq_num;
+ u32 dp_flags;
+} __packed;
+
+/*
+ * commands which share a struct:
+ * p_block_ack:
+ * P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
+ * P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ * p_block_req:
+ * P_DATA_REQUEST, P_RS_DATA_REQUEST
+ */
+struct p_block_ack {
+ struct p_header head;
+ u64 sector;
+ u64 block_id;
+ u32 blksize;
+ u32 seq_num;
+} __packed;
+
+
+struct p_block_req {
+ struct p_header head;
+ u64 sector;
+ u64 block_id;
+ u32 blksize;
+ u32 pad; /* to multiple of 8 Byte */
+} __packed;
+
+/*
+ * commands with their own struct for additional fields:
+ * P_HAND_SHAKE
+ * P_BARRIER
+ * P_BARRIER_ACK
+ * P_SYNC_PARAM
+ * ReportParams
+ */
+
+struct p_handshake {
+ struct p_header head; /* 8 bytes */
+ u32 protocol_min;
+ u32 feature_flags;
+ u32 protocol_max;
+
+ /* should be more than enough for future enhancements
+ * for now, feature_flags and the reserverd array shall be zero.
+ */
+
+ u32 _pad;
+ u64 reserverd[7];
+} __packed;
+/* 80 bytes, FIXED for the next century */
+
+struct p_barrier {
+ struct p_header head;
+ u32 barrier; /* barrier number _handle_ only */
+ u32 pad; /* to multiple of 8 Byte */
+} __packed;
+
+struct p_barrier_ack {
+ struct p_header head;
+ u32 barrier;
+ u32 set_size;
+} __packed;
+
+struct p_rs_param {
+ struct p_header head;
+ u32 rate;
+
+ /* Since protocol version 88 and higher. */
+ char verify_alg[0];
+} __packed;
+
+struct p_rs_param_89 {
+ struct p_header head;
+ u32 rate;
+ /* protocol version 89: */
+ char verify_alg[SHARED_SECRET_MAX];
+ char csums_alg[SHARED_SECRET_MAX];
+} __packed;
+
+struct p_protocol {
+ struct p_header head;
+ u32 protocol;
+ u32 after_sb_0p;
+ u32 after_sb_1p;
+ u32 after_sb_2p;
+ u32 want_lose;
+ u32 two_primaries;
+
+ /* Since protocol version 87 and higher. */
+ char integrity_alg[0];
+
+} __packed;
+
+struct p_uuids {
+ struct p_header head;
+ u64 uuid[UI_EXTENDED_SIZE];
+} __packed;
+
+struct p_rs_uuid {
+ struct p_header head;
+ u64 uuid;
+} __packed;
+
+struct p_sizes {
+ struct p_header head;
+ u64 d_size; /* size of disk */
+ u64 u_size; /* user requested size */
+ u64 c_size; /* current exported size */
+ u32 max_segment_size; /* Maximal size of a BIO */
+ u32 queue_order_type;
+} __packed;
+
+struct p_state {
+ struct p_header head;
+ u32 state;
+} __packed;
+
+struct p_req_state {
+ struct p_header head;
+ u32 mask;
+ u32 val;
+} __packed;
+
+struct p_req_state_reply {
+ struct p_header head;
+ u32 retcode;
+} __packed;
+
+struct p_drbd06_param {
+ u64 size;
+ u32 state;
+ u32 blksize;
+ u32 protocol;
+ u32 version;
+ u32 gen_cnt[5];
+ u32 bit_map_gen[5];
+} __packed;
+
+struct p_discard {
+ struct p_header head;
+ u64 block_id;
+ u32 seq_num;
+ u32 pad;
+} __packed;
+
+/* Valid values for the encoding field.
+ * Bump proto version when changing this. */
+enum drbd_bitmap_code {
+ /* RLE_VLI_Bytes = 0,
+ * and other bit variants had been defined during
+ * algorithm evaluation. */
+ RLE_VLI_Bits = 2,
+};
+
+struct p_compressed_bm {
+ struct p_header head;
+ /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
+ * (encoding & 0x80): polarity (set/unset) of first runlength
+ * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
+ * used to pad up to head.length bytes
+ */
+ u8 encoding;
+
+ u8 code[0];
+} __packed;
+
+/* DCBP: Drbd Compressed Bitmap Packet ... */
+static inline enum drbd_bitmap_code
+DCBP_get_code(struct p_compressed_bm *p)
+{
+ return (enum drbd_bitmap_code)(p->encoding & 0x0f);
+}
+
+static inline void
+DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
+{
+ BUG_ON(code & ~0xf);
+ p->encoding = (p->encoding & ~0xf) | code;
+}
+
+static inline int
+DCBP_get_start(struct p_compressed_bm *p)
+{
+ return (p->encoding & 0x80) != 0;
+}
+
+static inline void
+DCBP_set_start(struct p_compressed_bm *p, int set)
+{
+ p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
+}
+
+static inline int
+DCBP_get_pad_bits(struct p_compressed_bm *p)
+{
+ return (p->encoding >> 4) & 0x7;
+}
+
+static inline void
+DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
+{
+ BUG_ON(n & ~0x7);
+ p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
+}
+
+/* one bitmap packet, including the p_header,
+ * should fit within one _architecture independend_ page.
+ * so we need to use the fixed size 4KiB page size
+ * most architechtures have used for a long time.
+ */
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
+#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
+#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
+#if (PAGE_SIZE < 4096)
+/* drbd_send_bitmap / receive_bitmap would break horribly */
+#error "PAGE_SIZE too small"
+#endif
+
+union p_polymorph {
+ struct p_header header;
+ struct p_handshake handshake;
+ struct p_data data;
+ struct p_block_ack block_ack;
+ struct p_barrier barrier;
+ struct p_barrier_ack barrier_ack;
+ struct p_rs_param_89 rs_param_89;
+ struct p_protocol protocol;
+ struct p_sizes sizes;
+ struct p_uuids uuids;
+ struct p_state state;
+ struct p_req_state req_state;
+ struct p_req_state_reply req_state_reply;
+ struct p_block_req block_req;
+} __packed;
+
+/**********************************************************************/
+enum drbd_thread_state {
+ None,
+ Running,
+ Exiting,
+ Restarting
+};
+
+struct drbd_thread {
+ spinlock_t t_lock;
+ struct task_struct *task;
+ struct completion stop;
+ enum drbd_thread_state t_state;
+ int (*function) (struct drbd_thread *);
+ struct drbd_conf *mdev;
+ int reset_cpu_mask;
+};
+
+static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
+{
+ /* THINK testing the t_state seems to be uncritical in all cases
+ * (but thread_{start,stop}), so we can read it *without* the lock.
+ * --lge */
+
+ smp_rmb();
+ return thi->t_state;
+}
+
+
+/*
+ * Having this as the first member of a struct provides sort of "inheritance".
+ * "derived" structs can be "drbd_queue_work()"ed.
+ * The callback should know and cast back to the descendant struct.
+ * drbd_request and drbd_epoch_entry are descendants of drbd_work.
+ */
+struct drbd_work;
+typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
+struct drbd_work {
+ struct list_head list;
+ drbd_work_cb cb;
+};
+
+struct drbd_tl_epoch;
+struct drbd_request {
+ struct drbd_work w;
+ struct drbd_conf *mdev;
+
+ /* if local IO is not allowed, will be NULL.
+ * if local IO _is_ allowed, holds the locally submitted bio clone,
+ * or, after local IO completion, the ERR_PTR(error).
+ * see drbd_endio_pri(). */
+ struct bio *private_bio;
+
+ struct hlist_node colision;
+ sector_t sector;
+ unsigned int size;
+ unsigned int epoch; /* barrier_nr */
+
+ /* barrier_nr: used to check on "completion" whether this req was in
+ * the current epoch, and we therefore have to close it,
+ * starting a new epoch...
+ */
+
+ /* up to here, the struct layout is identical to drbd_epoch_entry;
+ * we might be able to use that to our advantage... */
+
+ struct list_head tl_requests; /* ring list in the transfer log */
+ struct bio *master_bio; /* master bio pointer */
+ unsigned long rq_state; /* see comments above _req_mod() */
+ int seq_num;
+ unsigned long start_time;
+};
+
+struct drbd_tl_epoch {
+ struct drbd_work w;
+ struct list_head requests; /* requests before */
+ struct drbd_tl_epoch *next; /* pointer to the next barrier */
+ unsigned int br_number; /* the barriers identifier. */
+ int n_req; /* number of requests attached before this barrier */
+};
+
+struct drbd_request;
+
+/* These Tl_epoch_entries may be in one of 6 lists:
+ active_ee .. data packet being written
+ sync_ee .. syncer block being written
+ done_ee .. block written, need to send P_WRITE_ACK
+ read_ee .. [RS]P_DATA_REQUEST being read
+*/
+
+struct drbd_epoch {
+ struct list_head list;
+ unsigned int barrier_nr;
+ atomic_t epoch_size; /* increased on every request added. */
+ atomic_t active; /* increased on every req. added, and dec on every finished. */
+ unsigned long flags;
+};
+
+/* drbd_epoch flag bits */
+enum {
+ DE_BARRIER_IN_NEXT_EPOCH_ISSUED,
+ DE_BARRIER_IN_NEXT_EPOCH_DONE,
+ DE_CONTAINS_A_BARRIER,
+ DE_HAVE_BARRIER_NUMBER,
+ DE_IS_FINISHING,
+};
+
+enum epoch_event {
+ EV_PUT,
+ EV_GOT_BARRIER_NR,
+ EV_BARRIER_DONE,
+ EV_BECAME_LAST,
+ EV_CLEANUP = 32, /* used as flag */
+};
+
+struct drbd_epoch_entry {
+ struct drbd_work w;
+ struct drbd_conf *mdev;
+ struct bio *private_bio;
+ struct hlist_node colision;
+ sector_t sector;
+ unsigned int size;
+ struct drbd_epoch *epoch;
+
+ /* up to here, the struct layout is identical to drbd_request;
+ * we might be able to use that to our advantage... */
+
+ unsigned int flags;
+ u64 block_id;
+};
+
+struct drbd_wq_barrier {
+ struct drbd_work w;
+ struct completion done;
+};
+
+struct digest_info {
+ int digest_size;
+ void *digest;
+};
+
+/* ee flag bits */
+enum {
+ __EE_CALL_AL_COMPLETE_IO,
+ __EE_CONFLICT_PENDING,
+ __EE_MAY_SET_IN_SYNC,
+ __EE_IS_BARRIER,
+};
+#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
+#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING)
+#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
+#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
+
+/* global flag bits */
+enum {
+ CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */
+ SIGNAL_ASENDER, /* whether asender wants to be interrupted */
+ SEND_PING, /* whether asender should send a ping asap */
+
+ STOP_SYNC_TIMER, /* tell timer to cancel itself */
+ UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
+ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
+ MD_DIRTY, /* current uuids and flags not yet on disk */
+ DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */
+ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */
+ CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */
+ CL_ST_CHG_SUCCESS,
+ CL_ST_CHG_FAIL,
+ CRASHED_PRIMARY, /* This node was a crashed primary.
+ * Gets cleared when the state.conn
+ * goes into C_CONNECTED state. */
+ WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */
+ NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
+ CONSIDER_RESYNC,
+
+ MD_NO_BARRIER, /* meta data device does not support barriers,
+ so don't even try */
+ SUSPEND_IO, /* suspend application io */
+ BITMAP_IO, /* suspend application io;
+ once no more io in flight, start bitmap io */
+ BITMAP_IO_QUEUED, /* Started bitmap IO */
+ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
+ NET_CONGESTED, /* The data socket is congested */
+
+ CONFIG_PENDING, /* serialization of (re)configuration requests.
+ * if set, also prevents the device from dying */
+ DEVICE_DYING, /* device became unconfigured,
+ * but worker thread is still handling the cleanup.
+ * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
+ * while this is set. */
+ RESIZE_PENDING, /* Size change detected locally, waiting for the response from
+ * the peer, if it changed there as well. */
+};
+
+struct drbd_bitmap; /* opaque for drbd_conf */
+
+/* TODO sort members for performance
+ * MAYBE group them further */
+
+/* THINK maybe we actually want to use the default "event/%s" worker threads
+ * or similar in linux 2.6, which uses per cpu data and threads.
+ *
+ * To be general, this might need a spin_lock member.
+ * For now, please use the mdev->req_lock to protect list_head,
+ * see drbd_queue_work below.
+ */
+struct drbd_work_queue {
+ struct list_head q;
+ struct semaphore s; /* producers up it, worker down()s it */
+ spinlock_t q_lock; /* to protect the list. */
+};
+
+struct drbd_socket {
+ struct drbd_work_queue work;
+ struct mutex mutex;
+ struct socket *socket;
+ /* this way we get our
+ * send/receive buffers off the stack */
+ union p_polymorph sbuf;
+ union p_polymorph rbuf;
+};
+
+struct drbd_md {
+ u64 md_offset; /* sector offset to 'super' block */
+
+ u64 la_size_sect; /* last agreed size, unit sectors */
+ u64 uuid[UI_SIZE];
+ u64 device_uuid;
+ u32 flags;
+ u32 md_size_sect;
+
+ s32 al_offset; /* signed relative sector offset to al area */
+ s32 bm_offset; /* signed relative sector offset to bitmap */
+
+ /* u32 al_nr_extents; important for restoring the AL
+ * is stored into sync_conf.al_extents, which in turn
+ * gets applied to act_log->nr_elements
+ */
+};
+
+/* for sync_conf and other types... */
+#define NL_PACKET(name, number, fields) struct name { fields };
+#define NL_INTEGER(pn,pr,member) int member;
+#define NL_INT64(pn,pr,member) __u64 member;
+#define NL_BIT(pn,pr,member) unsigned member:1;
+#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
+#include "linux/drbd_nl.h"
+
+struct drbd_backing_dev {
+ struct block_device *backing_bdev;
+ struct block_device *md_bdev;
+ struct file *lo_file;
+ struct file *md_file;
+ struct drbd_md md;
+ struct disk_conf dc; /* The user provided config... */
+ sector_t known_size; /* last known size of that backing device */
+};
+
+struct drbd_md_io {
+ struct drbd_conf *mdev;
+ struct completion event;
+ int error;
+};
+
+struct bm_io_work {
+ struct drbd_work w;
+ char *why;
+ int (*io_fn)(struct drbd_conf *mdev);
+ void (*done)(struct drbd_conf *mdev, int rv);
+};
+
+enum write_ordering_e {
+ WO_none,
+ WO_drain_io,
+ WO_bdev_flush,
+ WO_bio_barrier
+};
+
+struct drbd_conf {
+ /* things that are stored as / read from meta data on disk */
+ unsigned long flags;
+
+ /* configured by drbdsetup */
+ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
+ struct syncer_conf sync_conf;
+ struct drbd_backing_dev *ldev __protected_by(local);
+
+ sector_t p_size; /* partner's disk size */
+ struct request_queue *rq_queue;
+ struct block_device *this_bdev;
+ struct gendisk *vdisk;
+
+ struct drbd_socket data; /* data/barrier/cstate/parameter packets */
+ struct drbd_socket meta; /* ping/ack (metadata) packets */
+ int agreed_pro_version; /* actually used protocol version */
+ unsigned long last_received; /* in jiffies, either socket */
+ unsigned int ko_count;
+ struct drbd_work resync_work,
+ unplug_work,
+ md_sync_work;
+ struct timer_list resync_timer;
+ struct timer_list md_sync_timer;
+
+ /* Used after attach while negotiating new disk state. */
+ union drbd_state new_state_tmp;
+
+ union drbd_state state;
+ wait_queue_head_t misc_wait;
+ wait_queue_head_t state_wait; /* upon each state change. */
+ unsigned int send_cnt;
+ unsigned int recv_cnt;
+ unsigned int read_cnt;
+ unsigned int writ_cnt;
+ unsigned int al_writ_cnt;
+ unsigned int bm_writ_cnt;
+ atomic_t ap_bio_cnt; /* Requests we need to complete */
+ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
+ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
+ atomic_t unacked_cnt; /* Need to send replys for */
+ atomic_t local_cnt; /* Waiting for local completion */
+ atomic_t net_cnt; /* Users of net_conf */
+ spinlock_t req_lock;
+ struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
+ struct drbd_tl_epoch *newest_tle;
+ struct drbd_tl_epoch *oldest_tle;
+ struct list_head out_of_sequence_requests;
+ struct hlist_head *tl_hash;
+ unsigned int tl_hash_s;
+
+ /* blocks to sync in this run [unit BM_BLOCK_SIZE] */
+ unsigned long rs_total;
+ /* number of sync IOs that failed in this run */
+ unsigned long rs_failed;
+ /* Syncer's start time [unit jiffies] */
+ unsigned long rs_start;
+ /* cumulated time in PausedSyncX state [unit jiffies] */
+ unsigned long rs_paused;
+ /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
+ unsigned long rs_mark_left;
+ /* marks's time [unit jiffies] */
+ unsigned long rs_mark_time;
+ /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
+ unsigned long rs_same_csum;
+
+ /* where does the admin want us to start? (sector) */
+ sector_t ov_start_sector;
+ /* where are we now? (sector) */
+ sector_t ov_position;
+ /* Start sector of out of sync range (to merge printk reporting). */
+ sector_t ov_last_oos_start;
+ /* size of out-of-sync range in sectors. */
+ sector_t ov_last_oos_size;
+ unsigned long ov_left; /* in bits */
+ struct crypto_hash *csums_tfm;
+ struct crypto_hash *verify_tfm;
+
+ struct drbd_thread receiver;
+ struct drbd_thread worker;
+ struct drbd_thread asender;
+ struct drbd_bitmap *bitmap;
+ unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
+
+ /* Used to track operations of resync... */
+ struct lru_cache *resync;
+ /* Number of locked elements in resync LRU */
+ unsigned int resync_locked;
+ /* resync extent number waiting for application requests */
+ unsigned int resync_wenr;
+
+ int open_cnt;
+ u64 *p_uuid;
+ struct drbd_epoch *current_epoch;
+ spinlock_t epoch_lock;
+ unsigned int epochs;
+ enum write_ordering_e write_ordering;
+ struct list_head active_ee; /* IO in progress */
+ struct list_head sync_ee; /* IO in progress */
+ struct list_head done_ee; /* send ack */
+ struct list_head read_ee; /* IO in progress */
+ struct list_head net_ee; /* zero-copy network send in progress */
+ struct hlist_head *ee_hash; /* is proteced by req_lock! */
+ unsigned int ee_hash_s;
+
+ /* this one is protected by ee_lock, single thread */
+ struct drbd_epoch_entry *last_write_w_barrier;
+
+ int next_barrier_nr;
+ struct hlist_head *app_reads_hash; /* is proteced by req_lock */
+ struct list_head resync_reads;
+ atomic_t pp_in_use;
+ wait_queue_head_t ee_wait;
+ struct page *md_io_page; /* one page buffer for md_io */
+ struct page *md_io_tmpp; /* for logical_block_size != 512 */
+ struct mutex md_io_mutex; /* protects the md_io_buffer */
+ spinlock_t al_lock;
+ wait_queue_head_t al_wait;
+ struct lru_cache *act_log; /* activity log */
+ unsigned int al_tr_number;
+ int al_tr_cycle;
+ int al_tr_pos; /* position of the next transaction in the journal */
+ struct crypto_hash *cram_hmac_tfm;
+ struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
+ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
+ void *int_dig_out;
+ void *int_dig_in;
+ void *int_dig_vv;
+ wait_queue_head_t seq_wait;
+ atomic_t packet_seq;
+ unsigned int peer_seq;
+ spinlock_t peer_seq_lock;
+ unsigned int minor;
+ unsigned long comm_bm_set; /* communicated number of set bits. */
+ cpumask_var_t cpu_mask;
+ struct bm_io_work bm_io_work;
+ u64 ed_uuid; /* UUID of the exposed data */
+ struct mutex state_mutex;
+ char congestion_reason; /* Why we where congested... */
+};
+
+static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
+{
+ struct drbd_conf *mdev;
+
+ mdev = minor < minor_count ? minor_table[minor] : NULL;
+
+ return mdev;
+}
+
+static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
+{
+ return mdev->minor;
+}
+
+/* returns 1 if it was successfull,
+ * returns 0 if there was no data socket.
+ * so wherever you are going to use the data.socket, e.g. do
+ * if (!drbd_get_data_sock(mdev))
+ * return 0;
+ * CODE();
+ * drbd_put_data_sock(mdev);
+ */
+static inline int drbd_get_data_sock(struct drbd_conf *mdev)
+{
+ mutex_lock(&mdev->data.mutex);
+ /* drbd_disconnect() could have called drbd_free_sock()
+ * while we were waiting in down()... */
+ if (unlikely(mdev->data.socket == NULL)) {
+ mutex_unlock(&mdev->data.mutex);
+ return 0;
+ }
+ return 1;
+}
+
+static inline void drbd_put_data_sock(struct drbd_conf *mdev)
+{
+ mutex_unlock(&mdev->data.mutex);
+}
+
+/*
+ * function declarations
+ *************************/
+
+/* drbd_main.c */
+
+enum chg_state_flags {
+ CS_HARD = 1,
+ CS_VERBOSE = 2,
+ CS_WAIT_COMPLETE = 4,
+ CS_SERIALIZE = 8,
+ CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
+};
+
+extern void drbd_init_set_defaults(struct drbd_conf *mdev);
+extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+ union drbd_state mask, union drbd_state val);
+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
+ union drbd_state);
+extern int _drbd_request_state(struct drbd_conf *, union drbd_state,
+ union drbd_state, enum chg_state_flags);
+extern int __drbd_set_state(struct drbd_conf *, union drbd_state,
+ enum chg_state_flags, struct completion *done);
+extern void print_st_err(struct drbd_conf *, union drbd_state,
+ union drbd_state, int);
+extern int drbd_thread_start(struct drbd_thread *thi);
+extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
+#ifdef CONFIG_SMP
+extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
+extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
+#else
+#define drbd_thread_current_set_cpu(A) ({})
+#define drbd_calc_cpu_mask(A) ({})
+#endif
+extern void drbd_free_resources(struct drbd_conf *mdev);
+extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+ unsigned int set_size);
+extern void tl_clear(struct drbd_conf *mdev);
+extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
+extern void drbd_free_sock(struct drbd_conf *mdev);
+extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+ void *buf, size_t size, unsigned msg_flags);
+extern int drbd_send_protocol(struct drbd_conf *mdev);
+extern int drbd_send_uuids(struct drbd_conf *mdev);
+extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
+extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
+extern int _drbd_send_state(struct drbd_conf *mdev);
+extern int drbd_send_state(struct drbd_conf *mdev);
+extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+ enum drbd_packets cmd, struct p_header *h,
+ size_t size, unsigned msg_flags);
+#define USE_DATA_SOCKET 1
+#define USE_META_SOCKET 0
+extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+ enum drbd_packets cmd, struct p_header *h,
+ size_t size);
+extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
+ char *data, size_t size);
+extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
+extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
+ u32 set_size);
+extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct drbd_epoch_entry *e);
+extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct p_block_req *rp);
+extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct p_data *dp);
+extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+ sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct drbd_epoch_entry *e);
+extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
+extern int _drbd_send_barrier(struct drbd_conf *mdev,
+ struct drbd_tl_epoch *barrier);
+extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+ sector_t sector, int size, u64 block_id);
+extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
+ sector_t sector,int size,
+ void *digest, int digest_size,
+ enum drbd_packets cmd);
+extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
+
+extern int drbd_send_bitmap(struct drbd_conf *mdev);
+extern int _drbd_send_bitmap(struct drbd_conf *mdev);
+extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
+extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
+
+/* drbd_meta-data.c (still in drbd_main.c) */
+extern void drbd_md_sync(struct drbd_conf *mdev);
+extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
+/* maybe define them below as inline? */
+extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local);
+extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
+extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
+extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+ int (*io_fn)(struct drbd_conf *),
+ void (*done)(struct drbd_conf *, int),
+ char *why);
+extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
+extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
+extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
+
+
+/* Meta data layout
+ We reserve a 128MB Block (4k aligned)
+ * either at the end of the backing device
+ * or on a seperate meta data device. */
+
+#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */
+/* The following numbers are sectors */
+#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */
+#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */
+/* Allows up to about 3.8TB */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
+
+/* Since the smalles IO unit is usually 512 byte */
+#define MD_SECTOR_SHIFT 9
+#define MD_SECTOR_SIZE (1<<MD_SECTOR_SHIFT)
+
+/* activity log */
+#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
+#define AL_EXTENT_SHIFT 22 /* One extent represents 4M Storage */
+#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
+
+#if BITS_PER_LONG == 32
+#define LN2_BPL 5
+#define cpu_to_lel(A) cpu_to_le32(A)
+#define lel_to_cpu(A) le32_to_cpu(A)
+#elif BITS_PER_LONG == 64
+#define LN2_BPL 6
+#define cpu_to_lel(A) cpu_to_le64(A)
+#define lel_to_cpu(A) le64_to_cpu(A)
+#else
+#error "LN2 of BITS_PER_LONG unknown!"
+#endif
+
+/* resync bitmap */
+/* 16MB sized 'bitmap extent' to track syncer usage */
+struct bm_extent {
+ int rs_left; /* number of bits set (out of sync) in this extent. */
+ int rs_failed; /* number of failed resync requests in this extent. */
+ unsigned long flags;
+ struct lc_element lce;
+};
+
+#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
+#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */
+
+/* drbd_bitmap.c */
+/*
+ * We need to store one bit for a block.
+ * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
+ * Bit 0 ==> local node thinks this block is binary identical on both nodes
+ * Bit 1 ==> local node thinks this block needs to be synced.
+ */
+
+#define BM_BLOCK_SHIFT 12 /* 4k per bit */
+#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
+/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
+ * per sector of on disk bitmap */
+#define BM_EXT_SHIFT (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3) /* = 24 */
+#define BM_EXT_SIZE (1<<BM_EXT_SHIFT)
+
+#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
+#error "HAVE YOU FIXED drbdmeta AS WELL??"
+#endif
+
+/* thus many _storage_ sectors are described by one bit */
+#define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SHIFT-9))
+#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
+#define BM_SECT_PER_BIT BM_BIT_TO_SECT(1)
+
+/* bit to represented kilo byte conversion */
+#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
+
+/* in which _bitmap_ extent (resp. sector) the bit for a certain
+ * _storage_ sector is located in */
+#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
+
+/* how much _storage_ sectors we have per bitmap sector */
+#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
+#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
+
+/* in one sector of the bitmap, we have this many activity_log extents. */
+#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
+#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+
+#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
+#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
+
+/* the extent in "PER_EXTENT" below is an activity log extent
+ * we need that many (long words/bytes) to store the bitmap
+ * of one AL_EXTENT_SIZE chunk of storage.
+ * we can store the bitmap for that many AL_EXTENTS within
+ * one sector of the _on_disk_ bitmap:
+ * bit 0 bit 37 bit 38 bit (512*8)-1
+ * ...|........|........|.. // ..|........|
+ * sect. 0 `296 `304 ^(512*8*8)-1
+ *
+#define BM_WORDS_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
+#define BM_BYTES_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 ) // 128
+#define BM_EXT_PER_SECT ( 512 / BM_BYTES_PER_EXTENT ) // 4
+ */
+
+#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
+#define DRBD_MAX_SECTORS_BM \
+ ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9)))
+#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM
+#elif !defined(CONFIG_LBD) && BITS_PER_LONG == 32
+#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
+#else
+#define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM
+/* 16 TB in units of sectors */
+#if BITS_PER_LONG == 32
+/* adjust by one page worth of bitmap,
+ * so we won't wrap around in drbd_bm_find_next_bit.
+ * you should use 64bit OS for that much storage, anyways. */
+#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
+#else
+#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32)
+#endif
+#endif
+
+/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
+ * With a value of 6 all IO in one 32K block make it to the same slot of the
+ * hash table. */
+#define HT_SHIFT 6
+#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
+
+/* Number of elements in the app_reads_hash */
+#define APP_R_HSIZE 15
+
+extern int drbd_bm_init(struct drbd_conf *mdev);
+extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors);
+extern void drbd_bm_cleanup(struct drbd_conf *mdev);
+extern void drbd_bm_set_all(struct drbd_conf *mdev);
+extern void drbd_bm_clear_all(struct drbd_conf *mdev);
+extern int drbd_bm_set_bits(
+ struct drbd_conf *mdev, unsigned long s, unsigned long e);
+extern int drbd_bm_clear_bits(
+ struct drbd_conf *mdev, unsigned long s, unsigned long e);
+/* bm_set_bits variant for use while holding drbd_bm_lock */
+extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
+ const unsigned long s, const unsigned long e);
+extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
+extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
+extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local);
+extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
+extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
+ unsigned long al_enr);
+extern size_t drbd_bm_words(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
+extern sector_t drbd_bm_capacity(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+/* bm_find_next variants for use while you hold drbd_bm_lock() */
+extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
+extern int drbd_bm_rs_done(struct drbd_conf *mdev);
+/* for receive_bitmap */
+extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
+ size_t number, unsigned long *buffer);
+/* for _drbd_send_bitmap and drbd_bm_write_sect */
+extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
+ size_t number, unsigned long *buffer);
+
+extern void drbd_bm_lock(struct drbd_conf *mdev, char *why);
+extern void drbd_bm_unlock(struct drbd_conf *mdev);
+
+extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
+/* drbd_main.c */
+
+extern struct kmem_cache *drbd_request_cache;
+extern struct kmem_cache *drbd_ee_cache; /* epoch entries */
+extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
+extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
+extern mempool_t *drbd_request_mempool;
+extern mempool_t *drbd_ee_mempool;
+
+extern struct page *drbd_pp_pool; /* drbd's page pool */
+extern spinlock_t drbd_pp_lock;
+extern int drbd_pp_vacant;
+extern wait_queue_head_t drbd_pp_wait;
+
+extern rwlock_t global_state_lock;
+
+extern struct drbd_conf *drbd_new_device(unsigned int minor);
+extern void drbd_free_mdev(struct drbd_conf *mdev);
+
+extern int proc_details;
+
+/* drbd_req */
+extern int drbd_make_request_26(struct request_queue *q, struct bio *bio);
+extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
+extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
+extern int is_valid_ar_handle(struct drbd_request *, sector_t);
+
+
+/* drbd_nl.c */
+extern void drbd_suspend_io(struct drbd_conf *mdev);
+extern void drbd_resume_io(struct drbd_conf *mdev);
+extern char *ppsize(char *buf, unsigned long long size);
+extern sector_t drbd_new_dev_size(struct drbd_conf *,
+ struct drbd_backing_dev *);
+enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern void resync_after_online_grow(struct drbd_conf *);
+extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
+extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
+ int force);
+enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
+
+/* drbd_worker.c */
+extern int drbd_worker(struct drbd_thread *thi);
+extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
+extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
+extern void resume_next_sg(struct drbd_conf *mdev);
+extern void suspend_other_sg(struct drbd_conf *mdev);
+extern int drbd_resync_finished(struct drbd_conf *mdev);
+/* maybe rather drbd_main.c ? */
+extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
+ struct drbd_backing_dev *bdev, sector_t sector, int rw);
+extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+
+static inline void ov_oos_print(struct drbd_conf *mdev)
+{
+ if (mdev->ov_last_oos_size) {
+ dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
+ (unsigned long long)mdev->ov_last_oos_start,
+ (unsigned long)mdev->ov_last_oos_size);
+ }
+ mdev->ov_last_oos_size=0;
+}
+
+
+extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+/* worker callbacks */
+extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
+extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
+extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
+extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+
+extern void resync_timer_fn(unsigned long data);
+
+/* drbd_receiver.c */
+extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
+extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+ u64 id,
+ sector_t sector,
+ unsigned int data_size,
+ gfp_t gfp_mask) __must_hold(local);
+extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
+extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+ struct list_head *head);
+extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+ struct list_head *head);
+extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
+extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
+extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+
+/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
+ * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
+static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int optlen)
+{
+ int err;
+ if (level == SOL_SOCKET)
+ err = sock_setsockopt(sock, level, optname, optval, optlen);
+ else
+ err = sock->ops->setsockopt(sock, level, optname, optval,
+ optlen);
+ return err;
+}
+
+static inline void drbd_tcp_cork(struct socket *sock)
+{
+ int __user val = 1;
+ (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+ (char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_uncork(struct socket *sock)
+{
+ int __user val = 0;
+ (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+ (char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_nodelay(struct socket *sock)
+{
+ int __user val = 1;
+ (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
+ (char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_quickack(struct socket *sock)
+{
+ int __user val = 1;
+ (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
+ (char __user *)&val, sizeof(val));
+}
+
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
+
+/* drbd_proc.c */
+extern struct proc_dir_entry *drbd_proc;
+extern struct file_operations drbd_proc_fops;
+extern const char *drbd_conn_str(enum drbd_conns s);
+extern const char *drbd_role_str(enum drbd_role s);
+
+/* drbd_actlog.c */
+extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
+extern int drbd_rs_del_all(struct drbd_conf *mdev);
+extern void drbd_rs_failed_io(struct drbd_conf *mdev,
+ sector_t sector, int size);
+extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
+extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
+ int size, const char *file, const unsigned int line);
+#define drbd_set_in_sync(mdev, sector, size) \
+ __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
+extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+ int size, const char *file, const unsigned int line);
+#define drbd_set_out_of_sync(mdev, sector, size) \
+ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
+extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
+extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
+extern void drbd_al_shrink(struct drbd_conf *mdev);
+
+
+/* drbd_nl.c */
+
+void drbd_nl_cleanup(void);
+int __init drbd_nl_init(void);
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
+void drbd_bcast_sync_progress(struct drbd_conf *mdev);
+void drbd_bcast_ee(struct drbd_conf *mdev,
+ const char *reason, const int dgs,
+ const char* seen_hash, const char* calc_hash,
+ const struct drbd_epoch_entry* e);
+
+
+/**
+ * DOC: DRBD State macros
+ *
+ * These macros are used to express state changes in easily readable form.
+ *
+ * The NS macros expand to a mask and a value, that can be bit ored onto the
+ * current state as soon as the spinlock (req_lock) was taken.
+ *
+ * The _NS macros are used for state functions that get called with the
+ * spinlock. These macros expand directly to the new state value.
+ *
+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
+ * to express state changes that affect more than one aspect of the state.
+ *
+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
+ * Means that the network connection was established and that the peer
+ * is in secondary role.
+ */
+#define role_MASK R_MASK
+#define peer_MASK R_MASK
+#define disk_MASK D_MASK
+#define pdsk_MASK D_MASK
+#define conn_MASK C_MASK
+#define susp_MASK 1
+#define user_isp_MASK 1
+#define aftr_isp_MASK 1
+
+#define NS(T, S) \
+ ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
+ ({ union drbd_state val; val.i = 0; val.T = (S); val; })
+#define NS2(T1, S1, T2, S2) \
+ ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+ mask.T2 = T2##_MASK; mask; }), \
+ ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+ val.T2 = (S2); val; })
+#define NS3(T1, S1, T2, S2, T3, S3) \
+ ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+ mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
+ ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+ val.T2 = (S2); val.T3 = (S3); val; })
+
+#define _NS(D, T, S) \
+ D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
+#define _NS2(D, T1, S1, T2, S2) \
+ D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+ __ns.T2 = (S2); __ns; })
+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
+ D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+ __ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+
+/*
+ * inline helper functions
+ *************************/
+
+static inline void drbd_state_lock(struct drbd_conf *mdev)
+{
+ wait_event(mdev->misc_wait,
+ !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+}
+
+static inline void drbd_state_unlock(struct drbd_conf *mdev)
+{
+ clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
+ wake_up(&mdev->misc_wait);
+}
+
+static inline int _drbd_set_state(struct drbd_conf *mdev,
+ union drbd_state ns, enum chg_state_flags flags,
+ struct completion *done)
+{
+ int rv;
+
+ read_lock(&global_state_lock);
+ rv = __drbd_set_state(mdev, ns, flags, done);
+ read_unlock(&global_state_lock);
+
+ return rv;
+}
+
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
+static inline int drbd_request_state(struct drbd_conf *mdev,
+ union drbd_state mask,
+ union drbd_state val)
+{
+ return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+}
+
+#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+{
+ switch (mdev->ldev->dc.on_io_error) {
+ case EP_PASS_ON:
+ if (!forcedetach) {
+ if (printk_ratelimit())
+ dev_err(DEV, "Local IO failed in %s."
+ "Passing error on...\n", where);
+ break;
+ }
+ /* NOTE fall through to detach case if forcedetach set */
+ case EP_DETACH:
+ case EP_CALL_HELPER:
+ if (mdev->state.disk > D_FAILED) {
+ _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
+ dev_err(DEV, "Local IO failed in %s."
+ "Detaching...\n", where);
+ }
+ break;
+ }
+}
+
+/**
+ * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
+ * @mdev: DRBD device.
+ * @error: Error code passed to the IO completion callback
+ * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
+ *
+ * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
+ */
+#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
+static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
+ int error, int forcedetach, const char *where)
+{
+ if (error) {
+ unsigned long flags;
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ __drbd_chk_io_error_(mdev, forcedetach, where);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+ }
+}
+
+
+/**
+ * drbd_md_first_sector() - Returns the first sector number of the meta data area
+ * @bdev: Meta data block device.
+ *
+ * BTW, for internal meta data, this happens to be the maximum capacity
+ * we could agree upon with our peer node.
+ */
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+{
+ switch (bdev->dc.meta_dev_idx) {
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ return bdev->md.md_offset + bdev->md.bm_offset;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ default:
+ return bdev->md.md_offset;
+ }
+}
+
+/**
+ * drbd_md_last_sector() - Return the last sector number of the meta data area
+ * @bdev: Meta data block device.
+ */
+static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
+{
+ switch (bdev->dc.meta_dev_idx) {
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ return bdev->md.md_offset + MD_AL_OFFSET - 1;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ default:
+ return bdev->md.md_offset + bdev->md.md_size_sect;
+ }
+}
+
+/* Returns the number of 512 byte sectors of the device */
+static inline sector_t drbd_get_capacity(struct block_device *bdev)
+{
+ /* return bdev ? get_capacity(bdev->bd_disk) : 0; */
+ return bdev ? bdev->bd_inode->i_size >> 9 : 0;
+}
+
+/**
+ * drbd_get_max_capacity() - Returns the capacity we announce to out peer
+ * @bdev: Meta data block device.
+ *
+ * returns the capacity we announce to out peer. we clip ourselves at the
+ * various MAX_SECTORS, because if we don't, current implementation will
+ * oops sooner or later
+ */
+static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
+{
+ sector_t s;
+ switch (bdev->dc.meta_dev_idx) {
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ s = drbd_get_capacity(bdev->backing_bdev)
+ ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+ drbd_md_first_sector(bdev))
+ : 0;
+ break;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+ drbd_get_capacity(bdev->backing_bdev));
+ /* clip at maximum size the meta device can support */
+ s = min_t(sector_t, s,
+ BM_EXT_TO_SECT(bdev->md.md_size_sect
+ - bdev->md.bm_offset));
+ break;
+ default:
+ s = min_t(sector_t, DRBD_MAX_SECTORS,
+ drbd_get_capacity(bdev->backing_bdev));
+ }
+ return s;
+}
+
+/**
+ * drbd_md_ss__() - Return the sector number of our meta data super block
+ * @mdev: DRBD device.
+ * @bdev: Meta data block device.
+ */
+static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
+ struct drbd_backing_dev *bdev)
+{
+ switch (bdev->dc.meta_dev_idx) {
+ default: /* external, some index */
+ return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
+ case DRBD_MD_INDEX_INTERNAL:
+ /* with drbd08, internal meta data is always "flexible" */
+ case DRBD_MD_INDEX_FLEX_INT:
+ /* sizeof(struct md_on_disk_07) == 4k
+ * position: last 4k aligned block of 4k size */
+ if (!bdev->backing_bdev) {
+ if (__ratelimit(&drbd_ratelimit_state)) {
+ dev_err(DEV, "bdev->backing_bdev==NULL\n");
+ dump_stack();
+ }
+ return 0;
+ }
+ return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL)
+ - MD_AL_OFFSET;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ return 0;
+ }
+}
+
+static inline void
+_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+{
+ list_add_tail(&w->list, &q->q);
+ up(&q->s);
+}
+
+static inline void
+drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&q->q_lock, flags);
+ list_add(&w->list, &q->q);
+ up(&q->s); /* within the spinlock,
+ see comment near end of drbd_worker() */
+ spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&q->q_lock, flags);
+ list_add_tail(&w->list, &q->q);
+ up(&q->s); /* within the spinlock,
+ see comment near end of drbd_worker() */
+ spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void wake_asender(struct drbd_conf *mdev)
+{
+ if (test_bit(SIGNAL_ASENDER, &mdev->flags))
+ force_sig(DRBD_SIG, mdev->asender.task);
+}
+
+static inline void request_ping(struct drbd_conf *mdev)
+{
+ set_bit(SEND_PING, &mdev->flags);
+ wake_asender(mdev);
+}
+
+static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
+ enum drbd_packets cmd)
+{
+ struct p_header h;
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping(struct drbd_conf *mdev)
+{
+ struct p_header h;
+ return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
+{
+ struct p_header h;
+ return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
+}
+
+static inline void drbd_thread_stop(struct drbd_thread *thi)
+{
+ _drbd_thread_stop(thi, FALSE, TRUE);
+}
+
+static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
+{
+ _drbd_thread_stop(thi, FALSE, FALSE);
+}
+
+static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
+{
+ _drbd_thread_stop(thi, TRUE, FALSE);
+}
+
+/* counts how many answer packets packets we expect from our peer,
+ * for either explicit application requests,
+ * or implicit barrier packets as necessary.
+ * increased:
+ * w_send_barrier
+ * _req_mod(req, queue_for_net_write or queue_for_net_read);
+ * it is much easier and equally valid to count what we queue for the
+ * worker, even before it actually was queued or send.
+ * (drbd_make_request_common; recovery path on read io-error)
+ * decreased:
+ * got_BarrierAck (respective tl_clear, tl_clear_barrier)
+ * _req_mod(req, data_received)
+ * [from receive_DataReply]
+ * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
+ * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
+ * for some reason it is NOT decreased in got_NegAck,
+ * but in the resulting cleanup code from report_params.
+ * we should try to remember the reason for that...
+ * _req_mod(req, send_failed or send_canceled)
+ * _req_mod(req, connection_lost_while_pending)
+ * [from tl_clear_barrier]
+ */
+static inline void inc_ap_pending(struct drbd_conf *mdev)
+{
+ atomic_inc(&mdev->ap_pending_cnt);
+}
+
+#define ERR_IF_CNT_IS_NEGATIVE(which) \
+ if (atomic_read(&mdev->which) < 0) \
+ dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \
+ __func__ , __LINE__ , \
+ atomic_read(&mdev->which))
+
+#define dec_ap_pending(mdev) do { \
+ typecheck(struct drbd_conf *, mdev); \
+ if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \
+ wake_up(&mdev->misc_wait); \
+ ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
+
+/* counts how many resync-related answers we still expect from the peer
+ * increase decrease
+ * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
+ * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER)
+ * (or P_NEG_ACK with ID_SYNCER)
+ */
+static inline void inc_rs_pending(struct drbd_conf *mdev)
+{
+ atomic_inc(&mdev->rs_pending_cnt);
+}
+
+#define dec_rs_pending(mdev) do { \
+ typecheck(struct drbd_conf *, mdev); \
+ atomic_dec(&mdev->rs_pending_cnt); \
+ ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
+
+/* counts how many answers we still need to send to the peer.
+ * increased on
+ * receive_Data unless protocol A;
+ * we need to send a P_RECV_ACK (proto B)
+ * or P_WRITE_ACK (proto C)
+ * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
+ * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
+ * receive_Barrier_* we need to send a P_BARRIER_ACK
+ */
+static inline void inc_unacked(struct drbd_conf *mdev)
+{
+ atomic_inc(&mdev->unacked_cnt);
+}
+
+#define dec_unacked(mdev) do { \
+ typecheck(struct drbd_conf *, mdev); \
+ atomic_dec(&mdev->unacked_cnt); \
+ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+#define sub_unacked(mdev, n) do { \
+ typecheck(struct drbd_conf *, mdev); \
+ atomic_sub(n, &mdev->unacked_cnt); \
+ ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+
+static inline void put_net_conf(struct drbd_conf *mdev)
+{
+ if (atomic_dec_and_test(&mdev->net_cnt))
+ wake_up(&mdev->misc_wait);
+}
+
+/**
+ * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
+ * @mdev: DRBD device.
+ *
+ * You have to call put_net_conf() when finished working with mdev->net_conf.
+ */
+static inline int get_net_conf(struct drbd_conf *mdev)
+{
+ int have_net_conf;
+
+ atomic_inc(&mdev->net_cnt);
+ have_net_conf = mdev->state.conn >= C_UNCONNECTED;
+ if (!have_net_conf)
+ put_net_conf(mdev);
+ return have_net_conf;
+}
+
+/**
+ * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev
+ * @M: DRBD device.
+ *
+ * You have to call put_ldev() when finished working with mdev->ldev.
+ */
+#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
+#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
+
+static inline void put_ldev(struct drbd_conf *mdev)
+{
+ __release(local);
+ if (atomic_dec_and_test(&mdev->local_cnt))
+ wake_up(&mdev->misc_wait);
+ D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
+}
+
+#ifndef __CHECKER__
+static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+ int io_allowed;
+
+ atomic_inc(&mdev->local_cnt);
+ io_allowed = (mdev->state.disk >= mins);
+ if (!io_allowed)
+ put_ldev(mdev);
+ return io_allowed;
+}
+#else
+extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins);
+#endif
+
+/* you must have an "get_ldev" reference */
+static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
+ unsigned long *bits_left, unsigned int *per_mil_done)
+{
+ /*
+ * this is to break it at compile time when we change that
+ * (we may feel 4TB maximum storage per drbd is not enough)
+ */
+ typecheck(unsigned long, mdev->rs_total);
+
+ /* note: both rs_total and rs_left are in bits, i.e. in
+ * units of BM_BLOCK_SIZE.
+ * for the percentage, we don't care. */
+
+ *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+ /* >> 10 to prevent overflow,
+ * +1 to prevent division by zero */
+ if (*bits_left > mdev->rs_total) {
+ /* doh. maybe a logic bug somewhere.
+ * may also be just a race condition
+ * between this and a disconnect during sync.
+ * for now, just prevent in-kernel buffer overflow.
+ */
+ smp_rmb();
+ dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
+ drbd_conn_str(mdev->state.conn),
+ *bits_left, mdev->rs_total, mdev->rs_failed);
+ *per_mil_done = 0;
+ } else {
+ /* make sure the calculation happens in long context */
+ unsigned long tmp = 1000UL -
+ (*bits_left >> 10)*1000UL
+ / ((mdev->rs_total >> 10) + 1UL);
+ *per_mil_done = tmp;
+ }
+}
+
+
+/* this throttles on-the-fly application requests
+ * according to max_buffers settings;
+ * maybe re-implement using semaphores? */
+static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
+{
+ int mxb = 1000000; /* arbitrary limit on open requests */
+ if (get_net_conf(mdev)) {
+ mxb = mdev->net_conf->max_buffers;
+ put_net_conf(mdev);
+ }
+ return mxb;
+}
+
+static inline int drbd_state_is_stable(union drbd_state s)
+{
+
+ /* DO NOT add a default clause, we want the compiler to warn us
+ * for any newly introduced state we may have forgotten to add here */
+
+ switch ((enum drbd_conns)s.conn) {
+ /* new io only accepted when there is no connection, ... */
+ case C_STANDALONE:
+ case C_WF_CONNECTION:
+ /* ... or there is a well established connection. */
+ case C_CONNECTED:
+ case C_SYNC_SOURCE:
+ case C_SYNC_TARGET:
+ case C_VERIFY_S:
+ case C_VERIFY_T:
+ case C_PAUSED_SYNC_S:
+ case C_PAUSED_SYNC_T:
+ /* maybe stable, look at the disk state */
+ break;
+
+ /* no new io accepted during tansitional states
+ * like handshake or teardown */
+ case C_DISCONNECTING:
+ case C_UNCONNECTED:
+ case C_TIMEOUT:
+ case C_BROKEN_PIPE:
+ case C_NETWORK_FAILURE:
+ case C_PROTOCOL_ERROR:
+ case C_TEAR_DOWN:
+ case C_WF_REPORT_PARAMS:
+ case C_STARTING_SYNC_S:
+ case C_STARTING_SYNC_T:
+ case C_WF_BITMAP_S:
+ case C_WF_BITMAP_T:
+ case C_WF_SYNC_UUID:
+ case C_MASK:
+ /* not "stable" */
+ return 0;
+ }
+
+ switch ((enum drbd_disk_state)s.disk) {
+ case D_DISKLESS:
+ case D_INCONSISTENT:
+ case D_OUTDATED:
+ case D_CONSISTENT:
+ case D_UP_TO_DATE:
+ /* disk state is stable as well. */
+ break;
+
+ /* no new io accepted during tansitional states */
+ case D_ATTACHING:
+ case D_FAILED:
+ case D_NEGOTIATING:
+ case D_UNKNOWN:
+ case D_MASK:
+ /* not "stable" */
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
+{
+ int mxb = drbd_get_max_buffers(mdev);
+
+ if (mdev->state.susp)
+ return 0;
+ if (test_bit(SUSPEND_IO, &mdev->flags))
+ return 0;
+
+ /* to avoid potential deadlock or bitmap corruption,
+ * in various places, we only allow new application io
+ * to start during "stable" states. */
+
+ /* no new io accepted when attaching or detaching the disk */
+ if (!drbd_state_is_stable(mdev->state))
+ return 0;
+
+ /* since some older kernels don't have atomic_add_unless,
+ * and we are within the spinlock anyways, we have this workaround. */
+ if (atomic_read(&mdev->ap_bio_cnt) > mxb)
+ return 0;
+ if (test_bit(BITMAP_IO, &mdev->flags))
+ return 0;
+ return 1;
+}
+
+/* I'd like to use wait_event_lock_irq,
+ * but I'm not sure when it got introduced,
+ * and not sure when it has 3 or 4 arguments */
+static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
+{
+ /* compare with after_state_ch,
+ * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
+ DEFINE_WAIT(wait);
+
+ /* we wait here
+ * as long as the device is suspended
+ * until the bitmap is no longer on the fly during connection
+ * handshake as long as we would exeed the max_buffer limit.
+ *
+ * to avoid races with the reconnect code,
+ * we need to atomic_inc within the spinlock. */
+
+ spin_lock_irq(&mdev->req_lock);
+ while (!__inc_ap_bio_cond(mdev)) {
+ prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&mdev->req_lock);
+ schedule();
+ finish_wait(&mdev->misc_wait, &wait);
+ spin_lock_irq(&mdev->req_lock);
+ }
+ atomic_add(one_or_two, &mdev->ap_bio_cnt);
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+static inline void dec_ap_bio(struct drbd_conf *mdev)
+{
+ int mxb = drbd_get_max_buffers(mdev);
+ int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
+
+ D_ASSERT(ap_bio >= 0);
+ /* this currently does wake_up for every dec_ap_bio!
+ * maybe rather introduce some type of hysteresis?
+ * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
+ if (ap_bio < mxb)
+ wake_up(&mdev->misc_wait);
+ if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+ if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+ drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+ }
+}
+
+static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
+{
+ mdev->ed_uuid = val;
+}
+
+static inline int seq_cmp(u32 a, u32 b)
+{
+ /* we assume wrap around at 32bit.
+ * for wrap around at 24bit (old atomic_t),
+ * we'd have to
+ * a <<= 8; b <<= 8;
+ */
+ return (s32)(a) - (s32)(b);
+}
+#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
+#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
+#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
+#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
+/* CAUTION: please no side effects in arguments! */
+#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
+
+static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
+{
+ unsigned int m;
+ spin_lock(&mdev->peer_seq_lock);
+ m = seq_max(mdev->peer_seq, new_seq);
+ mdev->peer_seq = m;
+ spin_unlock(&mdev->peer_seq_lock);
+ if (m == new_seq)
+ wake_up(&mdev->seq_wait);
+}
+
+static inline void drbd_update_congested(struct drbd_conf *mdev)
+{
+ struct sock *sk = mdev->data.socket->sk;
+ if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
+ set_bit(NET_CONGESTED, &mdev->flags);
+}
+
+static inline int drbd_queue_order_type(struct drbd_conf *mdev)
+{
+ /* sorry, we currently have no working implementation
+ * of distributed TCQ stuff */
+#ifndef QUEUE_ORDERED_NONE
+#define QUEUE_ORDERED_NONE 0
+#endif
+ return QUEUE_ORDERED_NONE;
+}
+
+static inline void drbd_blk_run_queue(struct request_queue *q)
+{
+ if (q && q->unplug_fn)
+ q->unplug_fn(q);
+}
+
+static inline void drbd_kick_lo(struct drbd_conf *mdev)
+{
+ if (get_ldev(mdev)) {
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
+ put_ldev(mdev);
+ }
+}
+
+static inline void drbd_md_flush(struct drbd_conf *mdev)
+{
+ int r;
+
+ if (test_bit(MD_NO_BARRIER, &mdev->flags))
+ return;
+
+ r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+ if (r) {
+ set_bit(MD_NO_BARRIER, &mdev->flags);
+ dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
+ }
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
new file mode 100644
index 000000000000..157d1e4343c2
--- /dev/null
+++ b/drivers/block/drbd/drbd_main.c
@@ -0,0 +1,3699 @@
+/*
+ drbd.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
+ from Logicworks, Inc. for making SDP replication support possible.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <asm/uaccess.h>
+#include <asm/types.h>
+#include <net/sock.h>
+#include <linux/ctype.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+
+#include <linux/drbd_limits.h>
+#include "drbd_int.h"
+#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
+
+#include "drbd_vli.h"
+
+struct after_state_chg_work {
+ struct drbd_work w;
+ union drbd_state os;
+ union drbd_state ns;
+ enum chg_state_flags flags;
+ struct completion *done;
+};
+
+int drbdd_init(struct drbd_thread *);
+int drbd_worker(struct drbd_thread *);
+int drbd_asender(struct drbd_thread *);
+
+int drbd_init(void);
+static int drbd_open(struct block_device *bdev, fmode_t mode);
+static int drbd_release(struct gendisk *gd, fmode_t mode);
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+ union drbd_state ns, enum chg_state_flags flags);
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void md_sync_timer_fn(unsigned long data);
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+ "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
+MODULE_VERSION(REL_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
+
+#include <linux/moduleparam.h>
+/* allow_open_on_secondary */
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+/* thanks to these macros, if compiled into the kernel (not-module),
+ * this becomes the boot parameter drbd.minor_count */
+module_param(minor_count, uint, 0444);
+module_param(disable_sendpage, bool, 0644);
+module_param(allow_oos, bool, 0);
+module_param(cn_idx, uint, 0444);
+module_param(proc_details, int, 0644);
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+int enable_faults;
+int fault_rate;
+static int fault_count;
+int fault_devs;
+/* bitmap of enabled faults */
+module_param(enable_faults, int, 0664);
+/* fault rate % value - applies to all enabled faults */
+module_param(fault_rate, int, 0664);
+/* count of faults inserted */
+module_param(fault_count, int, 0664);
+/* bitmap of devices to insert faults on */
+module_param(fault_devs, int, 0644);
+#endif
+
+/* module parameter, defined */
+unsigned int minor_count = 32;
+int disable_sendpage;
+int allow_oos;
+unsigned int cn_idx = CN_IDX_DRBD;
+int proc_details; /* Detail level in proc drbd*/
+
+/* Module parameter for setting the user mode helper program
+ * to run. Default is /sbin/drbdadm */
+char usermode_helper[80] = "/sbin/drbdadm";
+
+module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+
+/* in 2.6.x, our device mapping and config info contains our virtual gendisks
+ * as member "struct gendisk *vdisk;"
+ */
+struct drbd_conf **minor_table;
+
+struct kmem_cache *drbd_request_cache;
+struct kmem_cache *drbd_ee_cache; /* epoch entries */
+struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
+struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
+mempool_t *drbd_request_mempool;
+mempool_t *drbd_ee_mempool;
+
+/* I do not use a standard mempool, because:
+ 1) I want to hand out the pre-allocated objects first.
+ 2) I want to be able to interrupt sleeping allocation with a signal.
+ Note: This is a single linked list, the next pointer is the private
+ member of struct page.
+ */
+struct page *drbd_pp_pool;
+spinlock_t drbd_pp_lock;
+int drbd_pp_vacant;
+wait_queue_head_t drbd_pp_wait;
+
+DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
+
+static struct block_device_operations drbd_ops = {
+ .owner = THIS_MODULE,
+ .open = drbd_open,
+ .release = drbd_release,
+};
+
+#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+
+#ifdef __CHECKER__
+/* When checking with sparse, and this is an inline function, sparse will
+ give tons of false positives. When this is a real functions sparse works.
+ */
+int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+ int io_allowed;
+
+ atomic_inc(&mdev->local_cnt);
+ io_allowed = (mdev->state.disk >= mins);
+ if (!io_allowed) {
+ if (atomic_dec_and_test(&mdev->local_cnt))
+ wake_up(&mdev->misc_wait);
+ }
+ return io_allowed;
+}
+
+#endif
+
+/**
+ * DOC: The transfer log
+ *
+ * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
+ * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
+ * of the list. There is always at least one &struct drbd_tl_epoch object.
+ *
+ * Each &struct drbd_tl_epoch has a circular double linked list of requests
+ * attached.
+ */
+static int tl_init(struct drbd_conf *mdev)
+{
+ struct drbd_tl_epoch *b;
+
+ /* during device minor initialization, we may well use GFP_KERNEL */
+ b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
+ if (!b)
+ return 0;
+ INIT_LIST_HEAD(&b->requests);
+ INIT_LIST_HEAD(&b->w.list);
+ b->next = NULL;
+ b->br_number = 4711;
+ b->n_req = 0;
+ b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+
+ mdev->oldest_tle = b;
+ mdev->newest_tle = b;
+ INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
+
+ mdev->tl_hash = NULL;
+ mdev->tl_hash_s = 0;
+
+ return 1;
+}
+
+static void tl_cleanup(struct drbd_conf *mdev)
+{
+ D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
+ D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+ kfree(mdev->oldest_tle);
+ mdev->oldest_tle = NULL;
+ kfree(mdev->unused_spare_tle);
+ mdev->unused_spare_tle = NULL;
+ kfree(mdev->tl_hash);
+ mdev->tl_hash = NULL;
+ mdev->tl_hash_s = 0;
+}
+
+/**
+ * _tl_add_barrier() - Adds a barrier to the transfer log
+ * @mdev: DRBD device.
+ * @new: Barrier to be added before the current head of the TL.
+ *
+ * The caller must hold the req_lock.
+ */
+void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
+{
+ struct drbd_tl_epoch *newest_before;
+
+ INIT_LIST_HEAD(&new->requests);
+ INIT_LIST_HEAD(&new->w.list);
+ new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+ new->next = NULL;
+ new->n_req = 0;
+
+ newest_before = mdev->newest_tle;
+ /* never send a barrier number == 0, because that is special-cased
+ * when using TCQ for our write ordering code */
+ new->br_number = (newest_before->br_number+1) ?: 1;
+ if (mdev->newest_tle != new) {
+ mdev->newest_tle->next = new;
+ mdev->newest_tle = new;
+ }
+}
+
+/**
+ * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
+ * @mdev: DRBD device.
+ * @barrier_nr: Expected identifier of the DRBD write barrier packet.
+ * @set_size: Expected number of requests before that barrier.
+ *
+ * In case the passed barrier_nr or set_size does not match the oldest
+ * &struct drbd_tl_epoch objects this function will cause a termination
+ * of the connection.
+ */
+void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+ unsigned int set_size)
+{
+ struct drbd_tl_epoch *b, *nob; /* next old barrier */
+ struct list_head *le, *tle;
+ struct drbd_request *r;
+
+ spin_lock_irq(&mdev->req_lock);
+
+ b = mdev->oldest_tle;
+
+ /* first some paranoia code */
+ if (b == NULL) {
+ dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+ barrier_nr);
+ goto bail;
+ }
+ if (b->br_number != barrier_nr) {
+ dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
+ barrier_nr, b->br_number);
+ goto bail;
+ }
+ if (b->n_req != set_size) {
+ dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n",
+ barrier_nr, set_size, b->n_req);
+ goto bail;
+ }
+
+ /* Clean up list of requests processed during current epoch */
+ list_for_each_safe(le, tle, &b->requests) {
+ r = list_entry(le, struct drbd_request, tl_requests);
+ _req_mod(r, barrier_acked);
+ }
+ /* There could be requests on the list waiting for completion
+ of the write to the local disk. To avoid corruptions of
+ slab's data structures we have to remove the lists head.
+
+ Also there could have been a barrier ack out of sequence, overtaking
+ the write acks - which would be a bug and violating write ordering.
+ To not deadlock in case we lose connection while such requests are
+ still pending, we need some way to find them for the
+ _req_mode(connection_lost_while_pending).
+
+ These have been list_move'd to the out_of_sequence_requests list in
+ _req_mod(, barrier_acked) above.
+ */
+ list_del_init(&b->requests);
+
+ nob = b->next;
+ if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+ _tl_add_barrier(mdev, b);
+ if (nob)
+ mdev->oldest_tle = nob;
+ /* if nob == NULL b was the only barrier, and becomes the new
+ barrier. Therefore mdev->oldest_tle points already to b */
+ } else {
+ D_ASSERT(nob != NULL);
+ mdev->oldest_tle = nob;
+ kfree(b);
+ }
+
+ spin_unlock_irq(&mdev->req_lock);
+ dec_ap_pending(mdev);
+
+ return;
+
+bail:
+ spin_unlock_irq(&mdev->req_lock);
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+}
+
+
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev: DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
+void tl_clear(struct drbd_conf *mdev)
+{
+ struct drbd_tl_epoch *b, *tmp;
+ struct list_head *le, *tle;
+ struct drbd_request *r;
+ int new_initial_bnr = net_random();
+
+ spin_lock_irq(&mdev->req_lock);
+
+ b = mdev->oldest_tle;
+ while (b) {
+ list_for_each_safe(le, tle, &b->requests) {
+ r = list_entry(le, struct drbd_request, tl_requests);
+ /* It would be nice to complete outside of spinlock.
+ * But this is easier for now. */
+ _req_mod(r, connection_lost_while_pending);
+ }
+ tmp = b->next;
+
+ /* there could still be requests on that ring list,
+ * in case local io is still pending */
+ list_del(&b->requests);
+
+ /* dec_ap_pending corresponding to queue_barrier.
+ * the newest barrier may not have been queued yet,
+ * in which case w.cb is still NULL. */
+ if (b->w.cb != NULL)
+ dec_ap_pending(mdev);
+
+ if (b == mdev->newest_tle) {
+ /* recycle, but reinit! */
+ D_ASSERT(tmp == NULL);
+ INIT_LIST_HEAD(&b->requests);
+ INIT_LIST_HEAD(&b->w.list);
+ b->w.cb = NULL;
+ b->br_number = new_initial_bnr;
+ b->n_req = 0;
+
+ mdev->oldest_tle = b;
+ break;
+ }
+ kfree(b);
+ b = tmp;
+ }
+
+ /* we expect this list to be empty. */
+ D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+
+ /* but just in case, clean it up anyways! */
+ list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
+ r = list_entry(le, struct drbd_request, tl_requests);
+ /* It would be nice to complete outside of spinlock.
+ * But this is easier for now. */
+ _req_mod(r, connection_lost_while_pending);
+ }
+
+ /* ensure bit indicating barrier is required is clear */
+ clear_bit(CREATE_BARRIER, &mdev->flags);
+
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+/**
+ * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * @mdev: DRBD device.
+ * @os: old (current) state.
+ * @ns: new (wanted) state.
+ */
+static int cl_wide_st_chg(struct drbd_conf *mdev,
+ union drbd_state os, union drbd_state ns)
+{
+ return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
+ ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
+ (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+ (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
+ (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
+ (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
+ (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
+}
+
+int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+ union drbd_state mask, union drbd_state val)
+{
+ unsigned long flags;
+ union drbd_state os, ns;
+ int rv;
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ os = mdev->state;
+ ns.i = (os.i & ~mask.i) | val.i;
+ rv = _drbd_set_state(mdev, ns, f, NULL);
+ ns = mdev->state;
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ return rv;
+}
+
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ */
+void drbd_force_state(struct drbd_conf *mdev,
+ union drbd_state mask, union drbd_state val)
+{
+ drbd_change_state(mdev, CS_HARD, mask, val);
+}
+
+static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
+static int is_valid_state_transition(struct drbd_conf *,
+ union drbd_state, union drbd_state);
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+ union drbd_state ns, int *warn_sync_abort);
+int drbd_send_state_req(struct drbd_conf *,
+ union drbd_state, union drbd_state);
+
+static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
+ union drbd_state mask, union drbd_state val)
+{
+ union drbd_state os, ns;
+ unsigned long flags;
+ int rv;
+
+ if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
+ return SS_CW_SUCCESS;
+
+ if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
+ return SS_CW_FAILED_BY_PEER;
+
+ rv = 0;
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ os = mdev->state;
+ ns.i = (os.i & ~mask.i) | val.i;
+ ns = sanitize_state(mdev, os, ns, NULL);
+
+ if (!cl_wide_st_chg(mdev, os, ns))
+ rv = SS_CW_NO_NEED;
+ if (!rv) {
+ rv = is_valid_state(mdev, ns);
+ if (rv == SS_SUCCESS) {
+ rv = is_valid_state_transition(mdev, ns, os);
+ if (rv == SS_SUCCESS)
+ rv = 0; /* cont waiting, otherwise fail. */
+ }
+ }
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ return rv;
+}
+
+/**
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ * @f: flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
+ */
+static int drbd_req_state(struct drbd_conf *mdev,
+ union drbd_state mask, union drbd_state val,
+ enum chg_state_flags f)
+{
+ struct completion done;
+ unsigned long flags;
+ union drbd_state os, ns;
+ int rv;
+
+ init_completion(&done);
+
+ if (f & CS_SERIALIZE)
+ mutex_lock(&mdev->state_mutex);
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ os = mdev->state;
+ ns.i = (os.i & ~mask.i) | val.i;
+ ns = sanitize_state(mdev, os, ns, NULL);
+
+ if (cl_wide_st_chg(mdev, os, ns)) {
+ rv = is_valid_state(mdev, ns);
+ if (rv == SS_SUCCESS)
+ rv = is_valid_state_transition(mdev, ns, os);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ if (rv < SS_SUCCESS) {
+ if (f & CS_VERBOSE)
+ print_st_err(mdev, os, ns, rv);
+ goto abort;
+ }
+
+ drbd_state_lock(mdev);
+ if (!drbd_send_state_req(mdev, mask, val)) {
+ drbd_state_unlock(mdev);
+ rv = SS_CW_FAILED_BY_PEER;
+ if (f & CS_VERBOSE)
+ print_st_err(mdev, os, ns, rv);
+ goto abort;
+ }
+
+ wait_event(mdev->state_wait,
+ (rv = _req_st_cond(mdev, mask, val)));
+
+ if (rv < SS_SUCCESS) {
+ drbd_state_unlock(mdev);
+ if (f & CS_VERBOSE)
+ print_st_err(mdev, os, ns, rv);
+ goto abort;
+ }
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ os = mdev->state;
+ ns.i = (os.i & ~mask.i) | val.i;
+ rv = _drbd_set_state(mdev, ns, f, &done);
+ drbd_state_unlock(mdev);
+ } else {
+ rv = _drbd_set_state(mdev, ns, f, &done);
+ }
+
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
+ D_ASSERT(current != mdev->worker.task);
+ wait_for_completion(&done);
+ }
+
+abort:
+ if (f & CS_SERIALIZE)
+ mutex_unlock(&mdev->state_mutex);
+
+ return rv;
+}
+
+/**
+ * _drbd_request_state() - Request a state change (with flags)
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ * @f: flags
+ *
+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
+ */
+int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+ union drbd_state val, enum chg_state_flags f)
+{
+ int rv;
+
+ wait_event(mdev->state_wait,
+ (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
+
+ return rv;
+}
+
+static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
+{
+ dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
+ name,
+ drbd_conn_str(ns.conn),
+ drbd_role_str(ns.role),
+ drbd_role_str(ns.peer),
+ drbd_disk_str(ns.disk),
+ drbd_disk_str(ns.pdsk),
+ ns.susp ? 's' : 'r',
+ ns.aftr_isp ? 'a' : '-',
+ ns.peer_isp ? 'p' : '-',
+ ns.user_isp ? 'u' : '-'
+ );
+}
+
+void print_st_err(struct drbd_conf *mdev,
+ union drbd_state os, union drbd_state ns, int err)
+{
+ if (err == SS_IN_TRANSIENT_STATE)
+ return;
+ dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
+ print_st(mdev, " state", os);
+ print_st(mdev, "wanted", ns);
+}
+
+
+#define drbd_peer_str drbd_role_str
+#define drbd_pdsk_str drbd_disk_str
+
+#define drbd_susp_str(A) ((A) ? "1" : "0")
+#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
+#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
+#define drbd_user_isp_str(A) ((A) ? "1" : "0")
+
+#define PSC(A) \
+ ({ if (ns.A != os.A) { \
+ pbp += sprintf(pbp, #A "( %s -> %s ) ", \
+ drbd_##A##_str(os.A), \
+ drbd_##A##_str(ns.A)); \
+ } })
+
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev: DRBD device.
+ * @ns: State to consider.
+ */
+static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+{
+ /* See drbd_state_sw_errors in drbd_strings.c */
+
+ enum drbd_fencing_p fp;
+ int rv = SS_SUCCESS;
+
+ fp = FP_DONT_CARE;
+ if (get_ldev(mdev)) {
+ fp = mdev->ldev->dc.fencing;
+ put_ldev(mdev);
+ }
+
+ if (get_net_conf(mdev)) {
+ if (!mdev->net_conf->two_primaries &&
+ ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
+ rv = SS_TWO_PRIMARIES;
+ put_net_conf(mdev);
+ }
+
+ if (rv <= 0)
+ /* already found a reason to abort */;
+ else if (ns.role == R_SECONDARY && mdev->open_cnt)
+ rv = SS_DEVICE_IN_USE;
+
+ else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
+ rv = SS_NO_UP_TO_DATE_DISK;
+
+ else if (fp >= FP_RESOURCE &&
+ ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
+ rv = SS_PRIMARY_NOP;
+
+ else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
+ rv = SS_NO_UP_TO_DATE_DISK;
+
+ else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
+ rv = SS_NO_LOCAL_DISK;
+
+ else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
+ rv = SS_NO_REMOTE_DISK;
+
+ else if ((ns.conn == C_CONNECTED ||
+ ns.conn == C_WF_BITMAP_S ||
+ ns.conn == C_SYNC_SOURCE ||
+ ns.conn == C_PAUSED_SYNC_S) &&
+ ns.disk == D_OUTDATED)
+ rv = SS_CONNECTED_OUTDATES;
+
+ else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+ (mdev->sync_conf.verify_alg[0] == 0))
+ rv = SS_NO_VERIFY_ALG;
+
+ else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+ mdev->agreed_pro_version < 88)
+ rv = SS_NOT_SUPPORTED;
+
+ return rv;
+}
+
+/**
+ * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
+ * @mdev: DRBD device.
+ * @ns: new state.
+ * @os: old state.
+ */
+static int is_valid_state_transition(struct drbd_conf *mdev,
+ union drbd_state ns, union drbd_state os)
+{
+ int rv = SS_SUCCESS;
+
+ if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
+ os.conn > C_CONNECTED)
+ rv = SS_RESYNC_RUNNING;
+
+ if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
+ rv = SS_ALREADY_STANDALONE;
+
+ if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
+ rv = SS_IS_DISKLESS;
+
+ if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
+ rv = SS_NO_NET_CONFIG;
+
+ if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
+ rv = SS_LOWER_THAN_OUTDATED;
+
+ if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
+ rv = SS_IN_TRANSIENT_STATE;
+
+ if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
+ rv = SS_IN_TRANSIENT_STATE;
+
+ if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
+ rv = SS_NEED_CONNECTION;
+
+ if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+ ns.conn != os.conn && os.conn > C_CONNECTED)
+ rv = SS_RESYNC_RUNNING;
+
+ if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
+ os.conn < C_CONNECTED)
+ rv = SS_NEED_CONNECTION;
+
+ return rv;
+}
+
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev: DRBD device.
+ * @os: old state.
+ * @ns: new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+ union drbd_state ns, int *warn_sync_abort)
+{
+ enum drbd_fencing_p fp;
+
+ fp = FP_DONT_CARE;
+ if (get_ldev(mdev)) {
+ fp = mdev->ldev->dc.fencing;
+ put_ldev(mdev);
+ }
+
+ /* Disallow Network errors to configure a device's network part */
+ if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
+ os.conn <= C_DISCONNECTING)
+ ns.conn = os.conn;
+
+ /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */
+ if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
+ ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING)
+ ns.conn = os.conn;
+
+ /* After C_DISCONNECTING only C_STANDALONE may follow */
+ if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
+ ns.conn = os.conn;
+
+ if (ns.conn < C_CONNECTED) {
+ ns.peer_isp = 0;
+ ns.peer = R_UNKNOWN;
+ if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
+ ns.pdsk = D_UNKNOWN;
+ }
+
+ /* Clear the aftr_isp when becoming unconfigured */
+ if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
+ ns.aftr_isp = 0;
+
+ if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS)
+ ns.pdsk = D_UNKNOWN;
+
+ /* Abort resync if a disk fails/detaches */
+ if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
+ (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
+ if (warn_sync_abort)
+ *warn_sync_abort = 1;
+ ns.conn = C_CONNECTED;
+ }
+
+ if (ns.conn >= C_CONNECTED &&
+ ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
+ (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
+ switch (ns.conn) {
+ case C_WF_BITMAP_T:
+ case C_PAUSED_SYNC_T:
+ ns.disk = D_OUTDATED;
+ break;
+ case C_CONNECTED:
+ case C_WF_BITMAP_S:
+ case C_SYNC_SOURCE:
+ case C_PAUSED_SYNC_S:
+ ns.disk = D_UP_TO_DATE;
+ break;
+ case C_SYNC_TARGET:
+ ns.disk = D_INCONSISTENT;
+ dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
+ break;
+ }
+ if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
+ dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
+ }
+
+ if (ns.conn >= C_CONNECTED &&
+ (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
+ switch (ns.conn) {
+ case C_CONNECTED:
+ case C_WF_BITMAP_T:
+ case C_PAUSED_SYNC_T:
+ case C_SYNC_TARGET:
+ ns.pdsk = D_UP_TO_DATE;
+ break;
+ case C_WF_BITMAP_S:
+ case C_PAUSED_SYNC_S:
+ ns.pdsk = D_OUTDATED;
+ break;
+ case C_SYNC_SOURCE:
+ ns.pdsk = D_INCONSISTENT;
+ dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
+ break;
+ }
+ if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
+ dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
+ }
+
+ /* Connection breaks down before we finished "Negotiating" */
+ if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
+ get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
+ ns.disk = mdev->new_state_tmp.disk;
+ ns.pdsk = mdev->new_state_tmp.pdsk;
+ } else {
+ dev_alert(DEV, "Connection lost while negotiating, no data!\n");
+ ns.disk = D_DISKLESS;
+ ns.pdsk = D_UNKNOWN;
+ }
+ put_ldev(mdev);
+ }
+
+ if (fp == FP_STONITH &&
+ (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
+ !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
+ ns.susp = 1;
+
+ if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
+ if (ns.conn == C_SYNC_SOURCE)
+ ns.conn = C_PAUSED_SYNC_S;
+ if (ns.conn == C_SYNC_TARGET)
+ ns.conn = C_PAUSED_SYNC_T;
+ } else {
+ if (ns.conn == C_PAUSED_SYNC_S)
+ ns.conn = C_SYNC_SOURCE;
+ if (ns.conn == C_PAUSED_SYNC_T)
+ ns.conn = C_SYNC_TARGET;
+ }
+
+ return ns;
+}
+
+/* helper for __drbd_set_state */
+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
+{
+ if (cs == C_VERIFY_T) {
+ /* starting online verify from an arbitrary position
+ * does not fit well into the existing protocol.
+ * on C_VERIFY_T, we initialize ov_left and friends
+ * implicitly in receive_DataRequest once the
+ * first P_OV_REQUEST is received */
+ mdev->ov_start_sector = ~(sector_t)0;
+ } else {
+ unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
+ if (bit >= mdev->rs_total)
+ mdev->ov_start_sector =
+ BM_BIT_TO_SECT(mdev->rs_total - 1);
+ mdev->ov_position = mdev->ov_start_sector;
+ }
+}
+
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev: DRBD device.
+ * @ns: new state.
+ * @flags: Flags
+ * @done: Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
+int __drbd_set_state(struct drbd_conf *mdev,
+ union drbd_state ns, enum chg_state_flags flags,
+ struct completion *done)
+{
+ union drbd_state os;
+ int rv = SS_SUCCESS;
+ int warn_sync_abort = 0;
+ struct after_state_chg_work *ascw;
+
+ os = mdev->state;
+
+ ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
+
+ if (ns.i == os.i)
+ return SS_NOTHING_TO_DO;
+
+ if (!(flags & CS_HARD)) {
+ /* pre-state-change checks ; only look at ns */
+ /* See drbd_state_sw_errors in drbd_strings.c */
+
+ rv = is_valid_state(mdev, ns);
+ if (rv < SS_SUCCESS) {
+ /* If the old state was illegal as well, then let
+ this happen...*/
+
+ if (is_valid_state(mdev, os) == rv) {
+ dev_err(DEV, "Considering state change from bad state. "
+ "Error would be: '%s'\n",
+ drbd_set_st_err_str(rv));
+ print_st(mdev, "old", os);
+ print_st(mdev, "new", ns);
+ rv = is_valid_state_transition(mdev, ns, os);
+ }
+ } else
+ rv = is_valid_state_transition(mdev, ns, os);
+ }
+
+ if (rv < SS_SUCCESS) {
+ if (flags & CS_VERBOSE)
+ print_st_err(mdev, os, ns, rv);
+ return rv;
+ }
+
+ if (warn_sync_abort)
+ dev_warn(DEV, "Resync aborted.\n");
+
+ {
+ char *pbp, pb[300];
+ pbp = pb;
+ *pbp = 0;
+ PSC(role);
+ PSC(peer);
+ PSC(conn);
+ PSC(disk);
+ PSC(pdsk);
+ PSC(susp);
+ PSC(aftr_isp);
+ PSC(peer_isp);
+ PSC(user_isp);
+ dev_info(DEV, "%s\n", pb);
+ }
+
+ /* solve the race between becoming unconfigured,
+ * worker doing the cleanup, and
+ * admin reconfiguring us:
+ * on (re)configure, first set CONFIG_PENDING,
+ * then wait for a potentially exiting worker,
+ * start the worker, and schedule one no_op.
+ * then proceed with configuration.
+ */
+ if (ns.disk == D_DISKLESS &&
+ ns.conn == C_STANDALONE &&
+ ns.role == R_SECONDARY &&
+ !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
+ set_bit(DEVICE_DYING, &mdev->flags);
+
+ mdev->state.i = ns.i;
+ wake_up(&mdev->misc_wait);
+ wake_up(&mdev->state_wait);
+
+ /* post-state-change actions */
+ if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) {
+ set_bit(STOP_SYNC_TIMER, &mdev->flags);
+ mod_timer(&mdev->resync_timer, jiffies);
+ }
+
+ /* aborted verify run. log the last position */
+ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
+ ns.conn < C_CONNECTED) {
+ mdev->ov_start_sector =
+ BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+ dev_info(DEV, "Online Verify reached sector %llu\n",
+ (unsigned long long)mdev->ov_start_sector);
+ }
+
+ if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
+ (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
+ dev_info(DEV, "Syncer continues.\n");
+ mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time;
+ if (ns.conn == C_SYNC_TARGET) {
+ if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))
+ mod_timer(&mdev->resync_timer, jiffies);
+ /* This if (!test_bit) is only needed for the case
+ that a device that has ceased to used its timer,
+ i.e. it is already in drbd_resync_finished() gets
+ paused and resumed. */
+ }
+ }
+
+ if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
+ (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
+ dev_info(DEV, "Resync suspended\n");
+ mdev->rs_mark_time = jiffies;
+ if (ns.conn == C_PAUSED_SYNC_T)
+ set_bit(STOP_SYNC_TIMER, &mdev->flags);
+ }
+
+ if (os.conn == C_CONNECTED &&
+ (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
+ mdev->ov_position = 0;
+ mdev->rs_total =
+ mdev->rs_mark_left = drbd_bm_bits(mdev);
+ if (mdev->agreed_pro_version >= 90)
+ set_ov_position(mdev, ns.conn);
+ else
+ mdev->ov_start_sector = 0;
+ mdev->ov_left = mdev->rs_total
+ - BM_SECT_TO_BIT(mdev->ov_position);
+ mdev->rs_start =
+ mdev->rs_mark_time = jiffies;
+ mdev->ov_last_oos_size = 0;
+ mdev->ov_last_oos_start = 0;
+
+ if (ns.conn == C_VERIFY_S) {
+ dev_info(DEV, "Starting Online Verify from sector %llu\n",
+ (unsigned long long)mdev->ov_position);
+ mod_timer(&mdev->resync_timer, jiffies);
+ }
+ }
+
+ if (get_ldev(mdev)) {
+ u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+ MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
+ MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
+
+ if (test_bit(CRASHED_PRIMARY, &mdev->flags))
+ mdf |= MDF_CRASHED_PRIMARY;
+ if (mdev->state.role == R_PRIMARY ||
+ (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
+ mdf |= MDF_PRIMARY_IND;
+ if (mdev->state.conn > C_WF_REPORT_PARAMS)
+ mdf |= MDF_CONNECTED_IND;
+ if (mdev->state.disk > D_INCONSISTENT)
+ mdf |= MDF_CONSISTENT;
+ if (mdev->state.disk > D_OUTDATED)
+ mdf |= MDF_WAS_UP_TO_DATE;
+ if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
+ mdf |= MDF_PEER_OUT_DATED;
+ if (mdf != mdev->ldev->md.flags) {
+ mdev->ldev->md.flags = mdf;
+ drbd_md_mark_dirty(mdev);
+ }
+ if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
+ drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
+ put_ldev(mdev);
+ }
+
+ /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
+ if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
+ os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
+ set_bit(CONSIDER_RESYNC, &mdev->flags);
+
+ /* Receiver should clean up itself */
+ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
+ drbd_thread_stop_nowait(&mdev->receiver);
+
+ /* Now the receiver finished cleaning up itself, it should die */
+ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
+ drbd_thread_stop_nowait(&mdev->receiver);
+
+ /* Upon network failure, we need to restart the receiver. */
+ if (os.conn > C_TEAR_DOWN &&
+ ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
+ drbd_thread_restart_nowait(&mdev->receiver);
+
+ ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
+ if (ascw) {
+ ascw->os = os;
+ ascw->ns = ns;
+ ascw->flags = flags;
+ ascw->w.cb = w_after_state_ch;
+ ascw->done = done;
+ drbd_queue_work(&mdev->data.work, &ascw->w);
+ } else {
+ dev_warn(DEV, "Could not kmalloc an ascw\n");
+ }
+
+ return rv;
+}
+
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct after_state_chg_work *ascw =
+ container_of(w, struct after_state_chg_work, w);
+ after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
+ if (ascw->flags & CS_WAIT_COMPLETE) {
+ D_ASSERT(ascw->done != NULL);
+ complete(ascw->done);
+ }
+ kfree(ascw);
+
+ return 1;
+}
+
+static void abw_start_sync(struct drbd_conf *mdev, int rv)
+{
+ if (rv) {
+ dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
+ _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
+ return;
+ }
+
+ switch (mdev->state.conn) {
+ case C_STARTING_SYNC_T:
+ _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+ break;
+ case C_STARTING_SYNC_S:
+ drbd_start_resync(mdev, C_SYNC_SOURCE);
+ break;
+ }
+}
+
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev: DRBD device.
+ * @os: old state.
+ * @ns: new state.
+ * @flags: Flags
+ */
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+ union drbd_state ns, enum chg_state_flags flags)
+{
+ enum drbd_fencing_p fp;
+
+ if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+ clear_bit(CRASHED_PRIMARY, &mdev->flags);
+ if (mdev->p_uuid)
+ mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
+ }
+
+ fp = FP_DONT_CARE;
+ if (get_ldev(mdev)) {
+ fp = mdev->ldev->dc.fencing;
+ put_ldev(mdev);
+ }
+
+ /* Inform userspace about the change... */
+ drbd_bcast_state(mdev, ns);
+
+ if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
+ (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+ drbd_khelper(mdev, "pri-on-incon-degr");
+
+ /* Here we have the actions that are performed after a
+ state change. This function might sleep */
+
+ if (fp == FP_STONITH && ns.susp) {
+ /* case1: The outdate peer handler is successful:
+ * case2: The connection was established again: */
+ if ((os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) ||
+ (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) {
+ tl_clear(mdev);
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+ spin_unlock_irq(&mdev->req_lock);
+ }
+ }
+ /* Do not change the order of the if above and the two below... */
+ if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
+ drbd_send_uuids(mdev);
+ drbd_send_state(mdev);
+ }
+ if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
+ drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+
+ /* Lost contact to peer's copy of the data */
+ if ((os.pdsk >= D_INCONSISTENT &&
+ os.pdsk != D_UNKNOWN &&
+ os.pdsk != D_OUTDATED)
+ && (ns.pdsk < D_INCONSISTENT ||
+ ns.pdsk == D_UNKNOWN ||
+ ns.pdsk == D_OUTDATED)) {
+ kfree(mdev->p_uuid);
+ mdev->p_uuid = NULL;
+ if (get_ldev(mdev)) {
+ if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
+ mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+ drbd_uuid_new_current(mdev);
+ drbd_send_uuids(mdev);
+ }
+ put_ldev(mdev);
+ }
+ }
+
+ if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+ if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
+ drbd_uuid_new_current(mdev);
+
+ /* D_DISKLESS Peer becomes secondary */
+ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
+ drbd_al_to_on_disk_bm(mdev);
+ put_ldev(mdev);
+ }
+
+ /* Last part of the attaching process ... */
+ if (ns.conn >= C_CONNECTED &&
+ os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
+ kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
+ mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
+ drbd_send_sizes(mdev, 0); /* to start sync... */
+ drbd_send_uuids(mdev);
+ drbd_send_state(mdev);
+ }
+
+ /* We want to pause/continue resync, tell peer. */
+ if (ns.conn >= C_CONNECTED &&
+ ((os.aftr_isp != ns.aftr_isp) ||
+ (os.user_isp != ns.user_isp)))
+ drbd_send_state(mdev);
+
+ /* In case one of the isp bits got set, suspend other devices. */
+ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
+ (ns.aftr_isp || ns.peer_isp || ns.user_isp))
+ suspend_other_sg(mdev);
+
+ /* Make sure the peer gets informed about eventual state
+ changes (ISP bits) while we were in WFReportParams. */
+ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
+ drbd_send_state(mdev);
+
+ /* We are in the progress to start a full sync... */
+ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+ (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
+ drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+
+ /* We are invalidating our self... */
+ if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
+ os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
+ drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+
+ if (os.disk > D_FAILED && ns.disk == D_FAILED) {
+ enum drbd_io_error_p eh;
+
+ eh = EP_PASS_ON;
+ if (get_ldev_if_state(mdev, D_FAILED)) {
+ eh = mdev->ldev->dc.on_io_error;
+ put_ldev(mdev);
+ }
+
+ drbd_rs_cancel_all(mdev);
+ /* since get_ldev() only works as long as disk>=D_INCONSISTENT,
+ and it is D_DISKLESS here, local_cnt can only go down, it can
+ not increase... It will reach zero */
+ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (eh == EP_CALL_HELPER)
+ drbd_khelper(mdev, "local-io-error");
+ }
+
+ if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
+
+ if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ {
+ if (drbd_send_state(mdev))
+ dev_warn(DEV, "Notified peer that my disk is broken.\n");
+ else
+ dev_err(DEV, "Sending state in drbd_io_error() failed\n");
+ }
+
+ lc_destroy(mdev->resync);
+ mdev->resync = NULL;
+ lc_destroy(mdev->act_log);
+ mdev->act_log = NULL;
+ __no_warn(local,
+ drbd_free_bc(mdev->ldev);
+ mdev->ldev = NULL;);
+
+ if (mdev->md_io_tmpp)
+ __free_page(mdev->md_io_tmpp);
+ }
+
+ /* Disks got bigger while they were detached */
+ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
+ test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
+ if (ns.conn == C_CONNECTED)
+ resync_after_online_grow(mdev);
+ }
+
+ /* A resync finished or aborted, wake paused devices... */
+ if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
+ (os.peer_isp && !ns.peer_isp) ||
+ (os.user_isp && !ns.user_isp))
+ resume_next_sg(mdev);
+
+ /* Upon network connection, we need to start the receiver */
+ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
+ drbd_thread_start(&mdev->receiver);
+
+ /* Terminate worker thread if we are unconfigured - it will be
+ restarted as needed... */
+ if (ns.disk == D_DISKLESS &&
+ ns.conn == C_STANDALONE &&
+ ns.role == R_SECONDARY) {
+ if (os.aftr_isp != ns.aftr_isp)
+ resume_next_sg(mdev);
+ /* set in __drbd_set_state, unless CONFIG_PENDING was set */
+ if (test_bit(DEVICE_DYING, &mdev->flags))
+ drbd_thread_stop_nowait(&mdev->worker);
+ }
+
+ drbd_md_sync(mdev);
+}
+
+
+static int drbd_thread_setup(void *arg)
+{
+ struct drbd_thread *thi = (struct drbd_thread *) arg;
+ struct drbd_conf *mdev = thi->mdev;
+ unsigned long flags;
+ int retval;
+
+restart:
+ retval = thi->function(thi);
+
+ spin_lock_irqsave(&thi->t_lock, flags);
+
+ /* if the receiver has been "Exiting", the last thing it did
+ * was set the conn state to "StandAlone",
+ * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
+ * and receiver thread will be "started".
+ * drbd_thread_start needs to set "Restarting" in that case.
+ * t_state check and assignment needs to be within the same spinlock,
+ * so either thread_start sees Exiting, and can remap to Restarting,
+ * or thread_start see None, and can proceed as normal.
+ */
+
+ if (thi->t_state == Restarting) {
+ dev_info(DEV, "Restarting %s\n", current->comm);
+ thi->t_state = Running;
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ goto restart;
+ }
+
+ thi->task = NULL;
+ thi->t_state = None;
+ smp_mb();
+ complete(&thi->stop);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+
+ dev_info(DEV, "Terminating %s\n", current->comm);
+
+ /* Release mod reference taken when thread was started */
+ module_put(THIS_MODULE);
+ return retval;
+}
+
+static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
+ int (*func) (struct drbd_thread *))
+{
+ spin_lock_init(&thi->t_lock);
+ thi->task = NULL;
+ thi->t_state = None;
+ thi->function = func;
+ thi->mdev = mdev;
+}
+
+int drbd_thread_start(struct drbd_thread *thi)
+{
+ struct drbd_conf *mdev = thi->mdev;
+ struct task_struct *nt;
+ unsigned long flags;
+
+ const char *me =
+ thi == &mdev->receiver ? "receiver" :
+ thi == &mdev->asender ? "asender" :
+ thi == &mdev->worker ? "worker" : "NONSENSE";
+
+ /* is used from state engine doing drbd_thread_stop_nowait,
+ * while holding the req lock irqsave */
+ spin_lock_irqsave(&thi->t_lock, flags);
+
+ switch (thi->t_state) {
+ case None:
+ dev_info(DEV, "Starting %s thread (from %s [%d])\n",
+ me, current->comm, current->pid);
+
+ /* Get ref on module for thread - this is released when thread exits */
+ if (!try_module_get(THIS_MODULE)) {
+ dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ return FALSE;
+ }
+
+ init_completion(&thi->stop);
+ D_ASSERT(thi->task == NULL);
+ thi->reset_cpu_mask = 1;
+ thi->t_state = Running;
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
+
+ nt = kthread_create(drbd_thread_setup, (void *) thi,
+ "drbd%d_%s", mdev_to_minor(mdev), me);
+
+ if (IS_ERR(nt)) {
+ dev_err(DEV, "Couldn't start thread\n");
+
+ module_put(THIS_MODULE);
+ return FALSE;
+ }
+ spin_lock_irqsave(&thi->t_lock, flags);
+ thi->task = nt;
+ thi->t_state = Running;
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ wake_up_process(nt);
+ break;
+ case Exiting:
+ thi->t_state = Restarting;
+ dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
+ me, current->comm, current->pid);
+ /* fall through */
+ case Running:
+ case Restarting:
+ default:
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ break;
+ }
+
+ return TRUE;
+}
+
+
+void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
+{
+ unsigned long flags;
+
+ enum drbd_thread_state ns = restart ? Restarting : Exiting;
+
+ /* may be called from state engine, holding the req lock irqsave */
+ spin_lock_irqsave(&thi->t_lock, flags);
+
+ if (thi->t_state == None) {
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ if (restart)
+ drbd_thread_start(thi);
+ return;
+ }
+
+ if (thi->t_state != ns) {
+ if (thi->task == NULL) {
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+ return;
+ }
+
+ thi->t_state = ns;
+ smp_mb();
+ init_completion(&thi->stop);
+ if (thi->task != current)
+ force_sig(DRBD_SIGKILL, thi->task);
+
+ }
+
+ spin_unlock_irqrestore(&thi->t_lock, flags);
+
+ if (wait)
+ wait_for_completion(&thi->stop);
+}
+
+#ifdef CONFIG_SMP
+/**
+ * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
+ * @mdev: DRBD device.
+ *
+ * Forces all threads of a device onto the same CPU. This is beneficial for
+ * DRBD's performance. May be overwritten by user's configuration.
+ */
+void drbd_calc_cpu_mask(struct drbd_conf *mdev)
+{
+ int ord, cpu;
+
+ /* user override. */
+ if (cpumask_weight(mdev->cpu_mask))
+ return;
+
+ ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
+ for_each_online_cpu(cpu) {
+ if (ord-- == 0) {
+ cpumask_set_cpu(cpu, mdev->cpu_mask);
+ return;
+ }
+ }
+ /* should not be reached */
+ cpumask_setall(mdev->cpu_mask);
+}
+
+/**
+ * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
+ * @mdev: DRBD device.
+ *
+ * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
+ * prematurely.
+ */
+void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
+{
+ struct task_struct *p = current;
+ struct drbd_thread *thi =
+ p == mdev->asender.task ? &mdev->asender :
+ p == mdev->receiver.task ? &mdev->receiver :
+ p == mdev->worker.task ? &mdev->worker :
+ NULL;
+ ERR_IF(thi == NULL)
+ return;
+ if (!thi->reset_cpu_mask)
+ return;
+ thi->reset_cpu_mask = 0;
+ set_cpus_allowed_ptr(p, mdev->cpu_mask);
+}
+#endif
+
+/* the appropriate socket mutex must be held already */
+int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+ enum drbd_packets cmd, struct p_header *h,
+ size_t size, unsigned msg_flags)
+{
+ int sent, ok;
+
+ ERR_IF(!h) return FALSE;
+ ERR_IF(!size) return FALSE;
+
+ h->magic = BE_DRBD_MAGIC;
+ h->command = cpu_to_be16(cmd);
+ h->length = cpu_to_be16(size-sizeof(struct p_header));
+
+ sent = drbd_send(mdev, sock, h, size, msg_flags);
+
+ ok = (sent == size);
+ if (!ok)
+ dev_err(DEV, "short sent %s size=%d sent=%d\n",
+ cmdname(cmd), (int)size, sent);
+ return ok;
+}
+
+/* don't pass the socket. we may only look at it
+ * when we hold the appropriate socket mutex.
+ */
+int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+ enum drbd_packets cmd, struct p_header *h, size_t size)
+{
+ int ok = 0;
+ struct socket *sock;
+
+ if (use_data_socket) {
+ mutex_lock(&mdev->data.mutex);
+ sock = mdev->data.socket;
+ } else {
+ mutex_lock(&mdev->meta.mutex);
+ sock = mdev->meta.socket;
+ }
+
+ /* drbd_disconnect() could have called drbd_free_sock()
+ * while we were waiting in down()... */
+ if (likely(sock != NULL))
+ ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+
+ if (use_data_socket)
+ mutex_unlock(&mdev->data.mutex);
+ else
+ mutex_unlock(&mdev->meta.mutex);
+ return ok;
+}
+
+int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
+ size_t size)
+{
+ struct p_header h;
+ int ok;
+
+ h.magic = BE_DRBD_MAGIC;
+ h.command = cpu_to_be16(cmd);
+ h.length = cpu_to_be16(size);
+
+ if (!drbd_get_data_sock(mdev))
+ return 0;
+
+ ok = (sizeof(h) ==
+ drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
+ ok = ok && (size ==
+ drbd_send(mdev, mdev->data.socket, data, size, 0));
+
+ drbd_put_data_sock(mdev);
+
+ return ok;
+}
+
+int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+{
+ struct p_rs_param_89 *p;
+ struct socket *sock;
+ int size, rv;
+ const int apv = mdev->agreed_pro_version;
+
+ size = apv <= 87 ? sizeof(struct p_rs_param)
+ : apv == 88 ? sizeof(struct p_rs_param)
+ + strlen(mdev->sync_conf.verify_alg) + 1
+ : /* 89 */ sizeof(struct p_rs_param_89);
+
+ /* used from admin command context and receiver/worker context.
+ * to avoid kmalloc, grab the socket right here,
+ * then use the pre-allocated sbuf there */
+ mutex_lock(&mdev->data.mutex);
+ sock = mdev->data.socket;
+
+ if (likely(sock != NULL)) {
+ enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
+
+ p = &mdev->data.sbuf.rs_param_89;
+
+ /* initialize verify_alg and csums_alg */
+ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+ p->rate = cpu_to_be32(sc->rate);
+
+ if (apv >= 88)
+ strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
+ if (apv >= 89)
+ strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+
+ rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
+ } else
+ rv = 0; /* not ok */
+
+ mutex_unlock(&mdev->data.mutex);
+
+ return rv;
+}
+
+int drbd_send_protocol(struct drbd_conf *mdev)
+{
+ struct p_protocol *p;
+ int size, rv;
+
+ size = sizeof(struct p_protocol);
+
+ if (mdev->agreed_pro_version >= 87)
+ size += strlen(mdev->net_conf->integrity_alg) + 1;
+
+ /* we must not recurse into our own queue,
+ * as that is blocked during handshake */
+ p = kmalloc(size, GFP_NOIO);
+ if (p == NULL)
+ return 0;
+
+ p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
+ p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
+ p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
+ p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
+ p->want_lose = cpu_to_be32(mdev->net_conf->want_lose);
+ p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
+
+ if (mdev->agreed_pro_version >= 87)
+ strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
+
+ rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
+ (struct p_header *)p, size);
+ kfree(p);
+ return rv;
+}
+
+int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
+{
+ struct p_uuids p;
+ int i;
+
+ if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+ return 1;
+
+ for (i = UI_CURRENT; i < UI_SIZE; i++)
+ p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+
+ mdev->comm_bm_set = drbd_bm_total_weight(mdev);
+ p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
+ uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+ uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
+ uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
+ p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
+
+ put_ldev(mdev);
+
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
+ (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_uuids(struct drbd_conf *mdev)
+{
+ return _drbd_send_uuids(mdev, 0);
+}
+
+int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
+{
+ return _drbd_send_uuids(mdev, 8);
+}
+
+
+int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+{
+ struct p_rs_uuid p;
+
+ p.uuid = cpu_to_be64(val);
+
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
+ (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
+{
+ struct p_sizes p;
+ sector_t d_size, u_size;
+ int q_order_type;
+ int ok;
+
+ if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ D_ASSERT(mdev->ldev->backing_bdev);
+ d_size = drbd_get_max_capacity(mdev->ldev);
+ u_size = mdev->ldev->dc.disk_size;
+ q_order_type = drbd_queue_order_type(mdev);
+ p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
+ put_ldev(mdev);
+ } else {
+ d_size = 0;
+ u_size = 0;
+ q_order_type = QUEUE_ORDERED_NONE;
+ }
+
+ p.d_size = cpu_to_be64(d_size);
+ p.u_size = cpu_to_be64(u_size);
+ p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
+ p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+ p.queue_order_type = cpu_to_be32(q_order_type);
+
+ ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
+ (struct p_header *)&p, sizeof(p));
+ return ok;
+}
+
+/**
+ * drbd_send_state() - Sends the drbd state to the peer
+ * @mdev: DRBD device.
+ */
+int drbd_send_state(struct drbd_conf *mdev)
+{
+ struct socket *sock;
+ struct p_state p;
+ int ok = 0;
+
+ /* Grab state lock so we wont send state if we're in the middle
+ * of a cluster wide state change on another thread */
+ drbd_state_lock(mdev);
+
+ mutex_lock(&mdev->data.mutex);
+
+ p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
+ sock = mdev->data.socket;
+
+ if (likely(sock != NULL)) {
+ ok = _drbd_send_cmd(mdev, sock, P_STATE,
+ (struct p_header *)&p, sizeof(p), 0);
+ }
+
+ mutex_unlock(&mdev->data.mutex);
+
+ drbd_state_unlock(mdev);
+ return ok;
+}
+
+int drbd_send_state_req(struct drbd_conf *mdev,
+ union drbd_state mask, union drbd_state val)
+{
+ struct p_req_state p;
+
+ p.mask = cpu_to_be32(mask.i);
+ p.val = cpu_to_be32(val.i);
+
+ return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
+ (struct p_header *)&p, sizeof(p));
+}
+
+int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+{
+ struct p_req_state_reply p;
+
+ p.retcode = cpu_to_be32(retcode);
+
+ return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
+ (struct p_header *)&p, sizeof(p));
+}
+
+int fill_bitmap_rle_bits(struct drbd_conf *mdev,
+ struct p_compressed_bm *p,
+ struct bm_xfer_ctx *c)
+{
+ struct bitstream bs;
+ unsigned long plain_bits;
+ unsigned long tmp;
+ unsigned long rl;
+ unsigned len;
+ unsigned toggle;
+ int bits;
+
+ /* may we use this feature? */
+ if ((mdev->sync_conf.use_rle == 0) ||
+ (mdev->agreed_pro_version < 90))
+ return 0;
+
+ if (c->bit_offset >= c->bm_bits)
+ return 0; /* nothing to do. */
+
+ /* use at most thus many bytes */
+ bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
+ memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
+ /* plain bits covered in this code string */
+ plain_bits = 0;
+
+ /* p->encoding & 0x80 stores whether the first run length is set.
+ * bit offset is implicit.
+ * start with toggle == 2 to be able to tell the first iteration */
+ toggle = 2;
+
+ /* see how much plain bits we can stuff into one packet
+ * using RLE and VLI. */
+ do {
+ tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
+ : _drbd_bm_find_next(mdev, c->bit_offset);
+ if (tmp == -1UL)
+ tmp = c->bm_bits;
+ rl = tmp - c->bit_offset;
+
+ if (toggle == 2) { /* first iteration */
+ if (rl == 0) {
+ /* the first checked bit was set,
+ * store start value, */
+ DCBP_set_start(p, 1);
+ /* but skip encoding of zero run length */
+ toggle = !toggle;
+ continue;
+ }
+ DCBP_set_start(p, 0);
+ }
+
+ /* paranoia: catch zero runlength.
+ * can only happen if bitmap is modified while we scan it. */
+ if (rl == 0) {
+ dev_err(DEV, "unexpected zero runlength while encoding bitmap "
+ "t:%u bo:%lu\n", toggle, c->bit_offset);
+ return -1;
+ }
+
+ bits = vli_encode_bits(&bs, rl);
+ if (bits == -ENOBUFS) /* buffer full */
+ break;
+ if (bits <= 0) {
+ dev_err(DEV, "error while encoding bitmap: %d\n", bits);
+ return 0;
+ }
+
+ toggle = !toggle;
+ plain_bits += rl;
+ c->bit_offset = tmp;
+ } while (c->bit_offset < c->bm_bits);
+
+ len = bs.cur.b - p->code + !!bs.cur.bit;
+
+ if (plain_bits < (len << 3)) {
+ /* incompressible with this method.
+ * we need to rewind both word and bit position. */
+ c->bit_offset -= plain_bits;
+ bm_xfer_ctx_bit_to_word_offset(c);
+ c->bit_offset = c->word_offset * BITS_PER_LONG;
+ return 0;
+ }
+
+ /* RLE + VLI was able to compress it just fine.
+ * update c->word_offset. */
+ bm_xfer_ctx_bit_to_word_offset(c);
+
+ /* store pad_bits */
+ DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
+
+ return len;
+}
+
+enum { OK, FAILED, DONE }
+send_bitmap_rle_or_plain(struct drbd_conf *mdev,
+ struct p_header *h, struct bm_xfer_ctx *c)
+{
+ struct p_compressed_bm *p = (void*)h;
+ unsigned long num_words;
+ int len;
+ int ok;
+
+ len = fill_bitmap_rle_bits(mdev, p, c);
+
+ if (len < 0)
+ return FAILED;
+
+ if (len) {
+ DCBP_set_code(p, RLE_VLI_Bits);
+ ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
+ sizeof(*p) + len, 0);
+
+ c->packets[0]++;
+ c->bytes[0] += sizeof(*p) + len;
+
+ if (c->bit_offset >= c->bm_bits)
+ len = 0; /* DONE */
+ } else {
+ /* was not compressible.
+ * send a buffer full of plain text bits instead. */
+ num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+ len = num_words * sizeof(long);
+ if (len)
+ drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
+ ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
+ h, sizeof(struct p_header) + len, 0);
+ c->word_offset += num_words;
+ c->bit_offset = c->word_offset * BITS_PER_LONG;
+
+ c->packets[1]++;
+ c->bytes[1] += sizeof(struct p_header) + len;
+
+ if (c->bit_offset > c->bm_bits)
+ c->bit_offset = c->bm_bits;
+ }
+ ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
+
+ if (ok == DONE)
+ INFO_bm_xfer_stats(mdev, "send", c);
+ return ok;
+}
+
+/* See the comment at receive_bitmap() */
+int _drbd_send_bitmap(struct drbd_conf *mdev)
+{
+ struct bm_xfer_ctx c;
+ struct p_header *p;
+ int ret;
+
+ ERR_IF(!mdev->bitmap) return FALSE;
+
+ /* maybe we should use some per thread scratch page,
+ * and allocate that during initial device creation? */
+ p = (struct p_header *) __get_free_page(GFP_NOIO);
+ if (!p) {
+ dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+ return FALSE;
+ }
+
+ if (get_ldev(mdev)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+ dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
+ drbd_bm_set_all(mdev);
+ if (drbd_bm_write(mdev)) {
+ /* write_bm did fail! Leave full sync flag set in Meta P_DATA
+ * but otherwise process as per normal - need to tell other
+ * side that a full resync is required! */
+ dev_err(DEV, "Failed to write bitmap to disk!\n");
+ } else {
+ drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+ drbd_md_sync(mdev);
+ }
+ }
+ put_ldev(mdev);
+ }
+
+ c = (struct bm_xfer_ctx) {
+ .bm_bits = drbd_bm_bits(mdev),
+ .bm_words = drbd_bm_words(mdev),
+ };
+
+ do {
+ ret = send_bitmap_rle_or_plain(mdev, p, &c);
+ } while (ret == OK);
+
+ free_page((unsigned long) p);
+ return (ret == DONE);
+}
+
+int drbd_send_bitmap(struct drbd_conf *mdev)
+{
+ int err;
+
+ if (!drbd_get_data_sock(mdev))
+ return -1;
+ err = !_drbd_send_bitmap(mdev);
+ drbd_put_data_sock(mdev);
+ return err;
+}
+
+int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
+{
+ int ok;
+ struct p_barrier_ack p;
+
+ p.barrier = barrier_nr;
+ p.set_size = cpu_to_be32(set_size);
+
+ if (mdev->state.conn < C_CONNECTED)
+ return FALSE;
+ ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
+ (struct p_header *)&p, sizeof(p));
+ return ok;
+}
+
+/**
+ * _drbd_send_ack() - Sends an ack packet
+ * @mdev: DRBD device.
+ * @cmd: Packet command code.
+ * @sector: sector, needs to be in big endian byte order
+ * @blksize: size in byte, needs to be in big endian byte order
+ * @block_id: Id, big endian byte order
+ */
+static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+ u64 sector,
+ u32 blksize,
+ u64 block_id)
+{
+ int ok;
+ struct p_block_ack p;
+
+ p.sector = sector;
+ p.block_id = block_id;
+ p.blksize = blksize;
+ p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
+
+ if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
+ return FALSE;
+ ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
+ (struct p_header *)&p, sizeof(p));
+ return ok;
+}
+
+int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct p_data *dp)
+{
+ const int header_size = sizeof(struct p_data)
+ - sizeof(struct p_header);
+ int data_size = ((struct p_header *)dp)->length - header_size;
+
+ return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
+ dp->block_id);
+}
+
+int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct p_block_req *rp)
+{
+ return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
+}
+
+/**
+ * drbd_send_ack() - Sends an ack packet
+ * @mdev: DRBD device.
+ * @cmd: Packet command code.
+ * @e: Epoch entry.
+ */
+int drbd_send_ack(struct drbd_conf *mdev,
+ enum drbd_packets cmd, struct drbd_epoch_entry *e)
+{
+ return _drbd_send_ack(mdev, cmd,
+ cpu_to_be64(e->sector),
+ cpu_to_be32(e->size),
+ e->block_id);
+}
+
+/* This function misuses the block_id field to signal if the blocks
+ * are is sync or not. */
+int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+ sector_t sector, int blksize, u64 block_id)
+{
+ return _drbd_send_ack(mdev, cmd,
+ cpu_to_be64(sector),
+ cpu_to_be32(blksize),
+ cpu_to_be64(block_id));
+}
+
+int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+ sector_t sector, int size, u64 block_id)
+{
+ int ok;
+ struct p_block_req p;
+
+ p.sector = cpu_to_be64(sector);
+ p.block_id = block_id;
+ p.blksize = cpu_to_be32(size);
+
+ ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
+ (struct p_header *)&p, sizeof(p));
+ return ok;
+}
+
+int drbd_send_drequest_csum(struct drbd_conf *mdev,
+ sector_t sector, int size,
+ void *digest, int digest_size,
+ enum drbd_packets cmd)
+{
+ int ok;
+ struct p_block_req p;
+
+ p.sector = cpu_to_be64(sector);
+ p.block_id = BE_DRBD_MAGIC + 0xbeef;
+ p.blksize = cpu_to_be32(size);
+
+ p.head.magic = BE_DRBD_MAGIC;
+ p.head.command = cpu_to_be16(cmd);
+ p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size);
+
+ mutex_lock(&mdev->data.mutex);
+
+ ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
+ ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
+
+ mutex_unlock(&mdev->data.mutex);
+
+ return ok;
+}
+
+int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
+{
+ int ok;
+ struct p_block_req p;
+
+ p.sector = cpu_to_be64(sector);
+ p.block_id = BE_DRBD_MAGIC + 0xbabe;
+ p.blksize = cpu_to_be32(size);
+
+ ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
+ (struct p_header *)&p, sizeof(p));
+ return ok;
+}
+
+/* called on sndtimeo
+ * returns FALSE if we should retry,
+ * TRUE if we think connection is dead
+ */
+static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
+{
+ int drop_it;
+ /* long elapsed = (long)(jiffies - mdev->last_received); */
+
+ drop_it = mdev->meta.socket == sock
+ || !mdev->asender.task
+ || get_t_state(&mdev->asender) != Running
+ || mdev->state.conn < C_CONNECTED;
+
+ if (drop_it)
+ return TRUE;
+
+ drop_it = !--mdev->ko_count;
+ if (!drop_it) {
+ dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
+ current->comm, current->pid, mdev->ko_count);
+ request_ping(mdev);
+ }
+
+ return drop_it; /* && (mdev->state == R_PRIMARY) */;
+}
+
+/* The idea of sendpage seems to be to put some kind of reference
+ * to the page into the skb, and to hand it over to the NIC. In
+ * this process get_page() gets called.
+ *
+ * As soon as the page was really sent over the network put_page()
+ * gets called by some part of the network layer. [ NIC driver? ]
+ *
+ * [ get_page() / put_page() increment/decrement the count. If count
+ * reaches 0 the page will be freed. ]
+ *
+ * This works nicely with pages from FSs.
+ * But this means that in protocol A we might signal IO completion too early!
+ *
+ * In order not to corrupt data during a resync we must make sure
+ * that we do not reuse our own buffer pages (EEs) to early, therefore
+ * we have the net_ee list.
+ *
+ * XFS seems to have problems, still, it submits pages with page_count == 0!
+ * As a workaround, we disable sendpage on pages
+ * with page_count == 0 or PageSlab.
+ */
+static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
+ int offset, size_t size)
+{
+ int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+ kunmap(page);
+ if (sent == size)
+ mdev->send_cnt += size>>9;
+ return sent == size;
+}
+
+static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
+ int offset, size_t size)
+{
+ mm_segment_t oldfs = get_fs();
+ int sent, ok;
+ int len = size;
+
+ /* e.g. XFS meta- & log-data is in slab pages, which have a
+ * page_count of 0 and/or have PageSlab() set.
+ * we cannot use send_page for those, as that does get_page();
+ * put_page(); and would cause either a VM_BUG directly, or
+ * __page_cache_release a page that would actually still be referenced
+ * by someone, leading to some obscure delayed Oops somewhere else. */
+ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+ return _drbd_no_send_page(mdev, page, offset, size);
+
+ drbd_update_congested(mdev);
+ set_fs(KERNEL_DS);
+ do {
+ sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
+ offset, len,
+ MSG_NOSIGNAL);
+ if (sent == -EAGAIN) {
+ if (we_should_drop_the_connection(mdev,
+ mdev->data.socket))
+ break;
+ else
+ continue;
+ }
+ if (sent <= 0) {
+ dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
+ __func__, (int)size, len, sent);
+ break;
+ }
+ len -= sent;
+ offset += sent;
+ } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
+ set_fs(oldfs);
+ clear_bit(NET_CONGESTED, &mdev->flags);
+
+ ok = (len == 0);
+ if (likely(ok))
+ mdev->send_cnt += size>>9;
+ return ok;
+}
+
+static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ if (!_drbd_no_send_page(mdev, bvec->bv_page,
+ bvec->bv_offset, bvec->bv_len))
+ return 0;
+ }
+ return 1;
+}
+
+static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ if (!_drbd_send_page(mdev, bvec->bv_page,
+ bvec->bv_offset, bvec->bv_len))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Used to send write requests
+ * R_PRIMARY -> Peer (P_DATA)
+ */
+int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
+{
+ int ok = 1;
+ struct p_data p;
+ unsigned int dp_flags = 0;
+ void *dgb;
+ int dgs;
+
+ if (!drbd_get_data_sock(mdev))
+ return 0;
+
+ dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+ crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+ p.head.magic = BE_DRBD_MAGIC;
+ p.head.command = cpu_to_be16(P_DATA);
+ p.head.length =
+ cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size);
+
+ p.sector = cpu_to_be64(req->sector);
+ p.block_id = (unsigned long)req;
+ p.seq_num = cpu_to_be32(req->seq_num =
+ atomic_add_return(1, &mdev->packet_seq));
+ dp_flags = 0;
+
+ /* NOTE: no need to check if barriers supported here as we would
+ * not pass the test in make_request_common in that case
+ */
+ if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) {
+ dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
+ /* dp_flags |= DP_HARDBARRIER; */
+ }
+ if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO))
+ dp_flags |= DP_RW_SYNC;
+ /* for now handle SYNCIO and UNPLUG
+ * as if they still were one and the same flag */
+ if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG))
+ dp_flags |= DP_RW_SYNC;
+ if (mdev->state.conn >= C_SYNC_SOURCE &&
+ mdev->state.conn <= C_PAUSED_SYNC_T)
+ dp_flags |= DP_MAY_SET_IN_SYNC;
+
+ p.dp_flags = cpu_to_be32(dp_flags);
+ set_bit(UNPLUG_REMOTE, &mdev->flags);
+ ok = (sizeof(p) ==
+ drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
+ if (ok && dgs) {
+ dgb = mdev->int_dig_out;
+ drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
+ ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+ }
+ if (ok) {
+ if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+ ok = _drbd_send_bio(mdev, req->master_bio);
+ else
+ ok = _drbd_send_zc_bio(mdev, req->master_bio);
+ }
+
+ drbd_put_data_sock(mdev);
+ return ok;
+}
+
+/* answer packet, used to send data back for read requests:
+ * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
+ * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
+ */
+int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+ struct drbd_epoch_entry *e)
+{
+ int ok;
+ struct p_data p;
+ void *dgb;
+ int dgs;
+
+ dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+ crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+ p.head.magic = BE_DRBD_MAGIC;
+ p.head.command = cpu_to_be16(cmd);
+ p.head.length =
+ cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size);
+
+ p.sector = cpu_to_be64(e->sector);
+ p.block_id = e->block_id;
+ /* p.seq_num = 0; No sequence numbers here.. */
+
+ /* Only called by our kernel thread.
+ * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
+ * in response to admin command or module unload.
+ */
+ if (!drbd_get_data_sock(mdev))
+ return 0;
+
+ ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
+ sizeof(p), MSG_MORE);
+ if (ok && dgs) {
+ dgb = mdev->int_dig_out;
+ drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb);
+ ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
+ }
+ if (ok)
+ ok = _drbd_send_zc_bio(mdev, e->private_bio);
+
+ drbd_put_data_sock(mdev);
+ return ok;
+}
+
+/*
+ drbd_send distinguishes two cases:
+
+ Packets sent via the data socket "sock"
+ and packets sent via the meta data socket "msock"
+
+ sock msock
+ -----------------+-------------------------+------------------------------
+ timeout conf.timeout / 2 conf.timeout / 2
+ timeout action send a ping via msock Abort communication
+ and close all sockets
+*/
+
+/*
+ * you must have down()ed the appropriate [m]sock_mutex elsewhere!
+ */
+int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+ void *buf, size_t size, unsigned msg_flags)
+{
+ struct kvec iov;
+ struct msghdr msg;
+ int rv, sent = 0;
+
+ if (!sock)
+ return -1000;
+
+ /* THINK if (signal_pending) return ... ? */
+
+ iov.iov_base = buf;
+ iov.iov_len = size;
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+ if (sock == mdev->data.socket) {
+ mdev->ko_count = mdev->net_conf->ko_count;
+ drbd_update_congested(mdev);
+ }
+ do {
+ /* STRANGE
+ * tcp_sendmsg does _not_ use its size parameter at all ?
+ *
+ * -EAGAIN on timeout, -EINTR on signal.
+ */
+/* THINK
+ * do we need to block DRBD_SIG if sock == &meta.socket ??
+ * otherwise wake_asender() might interrupt some send_*Ack !
+ */
+ rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
+ if (rv == -EAGAIN) {
+ if (we_should_drop_the_connection(mdev, sock))
+ break;
+ else
+ continue;
+ }
+ D_ASSERT(rv != 0);
+ if (rv == -EINTR) {
+ flush_signals(current);
+ rv = 0;
+ }
+ if (rv < 0)
+ break;
+ sent += rv;
+ iov.iov_base += rv;
+ iov.iov_len -= rv;
+ } while (sent < size);
+
+ if (sock == mdev->data.socket)
+ clear_bit(NET_CONGESTED, &mdev->flags);
+
+ if (rv <= 0) {
+ if (rv != -EAGAIN) {
+ dev_err(DEV, "%s_sendmsg returned %d\n",
+ sock == mdev->meta.socket ? "msock" : "sock",
+ rv);
+ drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+ } else
+ drbd_force_state(mdev, NS(conn, C_TIMEOUT));
+ }
+
+ return sent;
+}
+
+static int drbd_open(struct block_device *bdev, fmode_t mode)
+{
+ struct drbd_conf *mdev = bdev->bd_disk->private_data;
+ unsigned long flags;
+ int rv = 0;
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ /* to have a stable mdev->state.role
+ * and no race with updating open_cnt */
+
+ if (mdev->state.role != R_PRIMARY) {
+ if (mode & FMODE_WRITE)
+ rv = -EROFS;
+ else if (!allow_oos)
+ rv = -EMEDIUMTYPE;
+ }
+
+ if (!rv)
+ mdev->open_cnt++;
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ return rv;
+}
+
+static int drbd_release(struct gendisk *gd, fmode_t mode)
+{
+ struct drbd_conf *mdev = gd->private_data;
+ mdev->open_cnt--;
+ return 0;
+}
+
+static void drbd_unplug_fn(struct request_queue *q)
+{
+ struct drbd_conf *mdev = q->queuedata;
+
+ /* unplug FIRST */
+ spin_lock_irq(q->queue_lock);
+ blk_remove_plug(q);
+ spin_unlock_irq(q->queue_lock);
+
+ /* only if connected */
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
+ D_ASSERT(mdev->state.role == R_PRIMARY);
+ if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
+ /* add to the data.work queue,
+ * unless already queued.
+ * XXX this might be a good addition to drbd_queue_work
+ * anyways, to detect "double queuing" ... */
+ if (list_empty(&mdev->unplug_work.list))
+ drbd_queue_work(&mdev->data.work,
+ &mdev->unplug_work);
+ }
+ }
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (mdev->state.disk >= D_INCONSISTENT)
+ drbd_kick_lo(mdev);
+}
+
+static void drbd_set_defaults(struct drbd_conf *mdev)
+{
+ mdev->sync_conf.after = DRBD_AFTER_DEF;
+ mdev->sync_conf.rate = DRBD_RATE_DEF;
+ mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF;
+ mdev->state = (union drbd_state) {
+ { .role = R_SECONDARY,
+ .peer = R_UNKNOWN,
+ .conn = C_STANDALONE,
+ .disk = D_DISKLESS,
+ .pdsk = D_UNKNOWN,
+ .susp = 0
+ } };
+}
+
+void drbd_init_set_defaults(struct drbd_conf *mdev)
+{
+ /* the memset(,0,) did most of this.
+ * note: only assignments, no allocation in here */
+
+ drbd_set_defaults(mdev);
+
+ /* for now, we do NOT yet support it,
+ * even though we start some framework
+ * to eventually support barriers */
+ set_bit(NO_BARRIER_SUPP, &mdev->flags);
+
+ atomic_set(&mdev->ap_bio_cnt, 0);
+ atomic_set(&mdev->ap_pending_cnt, 0);
+ atomic_set(&mdev->rs_pending_cnt, 0);
+ atomic_set(&mdev->unacked_cnt, 0);
+ atomic_set(&mdev->local_cnt, 0);
+ atomic_set(&mdev->net_cnt, 0);
+ atomic_set(&mdev->packet_seq, 0);
+ atomic_set(&mdev->pp_in_use, 0);
+
+ mutex_init(&mdev->md_io_mutex);
+ mutex_init(&mdev->data.mutex);
+ mutex_init(&mdev->meta.mutex);
+ sema_init(&mdev->data.work.s, 0);
+ sema_init(&mdev->meta.work.s, 0);
+ mutex_init(&mdev->state_mutex);
+
+ spin_lock_init(&mdev->data.work.q_lock);
+ spin_lock_init(&mdev->meta.work.q_lock);
+
+ spin_lock_init(&mdev->al_lock);
+ spin_lock_init(&mdev->req_lock);
+ spin_lock_init(&mdev->peer_seq_lock);
+ spin_lock_init(&mdev->epoch_lock);
+
+ INIT_LIST_HEAD(&mdev->active_ee);
+ INIT_LIST_HEAD(&mdev->sync_ee);
+ INIT_LIST_HEAD(&mdev->done_ee);
+ INIT_LIST_HEAD(&mdev->read_ee);
+ INIT_LIST_HEAD(&mdev->net_ee);
+ INIT_LIST_HEAD(&mdev->resync_reads);
+ INIT_LIST_HEAD(&mdev->data.work.q);
+ INIT_LIST_HEAD(&mdev->meta.work.q);
+ INIT_LIST_HEAD(&mdev->resync_work.list);
+ INIT_LIST_HEAD(&mdev->unplug_work.list);
+ INIT_LIST_HEAD(&mdev->md_sync_work.list);
+ INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
+ mdev->resync_work.cb = w_resync_inactive;
+ mdev->unplug_work.cb = w_send_write_hint;
+ mdev->md_sync_work.cb = w_md_sync;
+ mdev->bm_io_work.w.cb = w_bitmap_io;
+ init_timer(&mdev->resync_timer);
+ init_timer(&mdev->md_sync_timer);
+ mdev->resync_timer.function = resync_timer_fn;
+ mdev->resync_timer.data = (unsigned long) mdev;
+ mdev->md_sync_timer.function = md_sync_timer_fn;
+ mdev->md_sync_timer.data = (unsigned long) mdev;
+
+ init_waitqueue_head(&mdev->misc_wait);
+ init_waitqueue_head(&mdev->state_wait);
+ init_waitqueue_head(&mdev->ee_wait);
+ init_waitqueue_head(&mdev->al_wait);
+ init_waitqueue_head(&mdev->seq_wait);
+
+ drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
+ drbd_thread_init(mdev, &mdev->worker, drbd_worker);
+ drbd_thread_init(mdev, &mdev->asender, drbd_asender);
+
+ mdev->agreed_pro_version = PRO_VERSION_MAX;
+ mdev->write_ordering = WO_bio_barrier;
+ mdev->resync_wenr = LC_FREE;
+}
+
+void drbd_mdev_cleanup(struct drbd_conf *mdev)
+{
+ if (mdev->receiver.t_state != None)
+ dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
+ mdev->receiver.t_state);
+
+ /* no need to lock it, I'm the only thread alive */
+ if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
+ dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
+ mdev->al_writ_cnt =
+ mdev->bm_writ_cnt =
+ mdev->read_cnt =
+ mdev->recv_cnt =
+ mdev->send_cnt =
+ mdev->writ_cnt =
+ mdev->p_size =
+ mdev->rs_start =
+ mdev->rs_total =
+ mdev->rs_failed =
+ mdev->rs_mark_left =
+ mdev->rs_mark_time = 0;
+ D_ASSERT(mdev->net_conf == NULL);
+
+ drbd_set_my_capacity(mdev, 0);
+ if (mdev->bitmap) {
+ /* maybe never allocated. */
+ drbd_bm_resize(mdev, 0);
+ drbd_bm_cleanup(mdev);
+ }
+
+ drbd_free_resources(mdev);
+
+ /*
+ * currently we drbd_init_ee only on module load, so
+ * we may do drbd_release_ee only on module unload!
+ */
+ D_ASSERT(list_empty(&mdev->active_ee));
+ D_ASSERT(list_empty(&mdev->sync_ee));
+ D_ASSERT(list_empty(&mdev->done_ee));
+ D_ASSERT(list_empty(&mdev->read_ee));
+ D_ASSERT(list_empty(&mdev->net_ee));
+ D_ASSERT(list_empty(&mdev->resync_reads));
+ D_ASSERT(list_empty(&mdev->data.work.q));
+ D_ASSERT(list_empty(&mdev->meta.work.q));
+ D_ASSERT(list_empty(&mdev->resync_work.list));
+ D_ASSERT(list_empty(&mdev->unplug_work.list));
+
+}
+
+
+static void drbd_destroy_mempools(void)
+{
+ struct page *page;
+
+ while (drbd_pp_pool) {
+ page = drbd_pp_pool;
+ drbd_pp_pool = (struct page *)page_private(page);
+ __free_page(page);
+ drbd_pp_vacant--;
+ }
+
+ /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
+
+ if (drbd_ee_mempool)
+ mempool_destroy(drbd_ee_mempool);
+ if (drbd_request_mempool)
+ mempool_destroy(drbd_request_mempool);
+ if (drbd_ee_cache)
+ kmem_cache_destroy(drbd_ee_cache);
+ if (drbd_request_cache)
+ kmem_cache_destroy(drbd_request_cache);
+ if (drbd_bm_ext_cache)
+ kmem_cache_destroy(drbd_bm_ext_cache);
+ if (drbd_al_ext_cache)
+ kmem_cache_destroy(drbd_al_ext_cache);
+
+ drbd_ee_mempool = NULL;
+ drbd_request_mempool = NULL;
+ drbd_ee_cache = NULL;
+ drbd_request_cache = NULL;
+ drbd_bm_ext_cache = NULL;
+ drbd_al_ext_cache = NULL;
+
+ return;
+}
+
+static int drbd_create_mempools(void)
+{
+ struct page *page;
+ const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+ int i;
+
+ /* prepare our caches and mempools */
+ drbd_request_mempool = NULL;
+ drbd_ee_cache = NULL;
+ drbd_request_cache = NULL;
+ drbd_bm_ext_cache = NULL;
+ drbd_al_ext_cache = NULL;
+ drbd_pp_pool = NULL;
+
+ /* caches */
+ drbd_request_cache = kmem_cache_create(
+ "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
+ if (drbd_request_cache == NULL)
+ goto Enomem;
+
+ drbd_ee_cache = kmem_cache_create(
+ "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
+ if (drbd_ee_cache == NULL)
+ goto Enomem;
+
+ drbd_bm_ext_cache = kmem_cache_create(
+ "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
+ if (drbd_bm_ext_cache == NULL)
+ goto Enomem;
+
+ drbd_al_ext_cache = kmem_cache_create(
+ "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
+ if (drbd_al_ext_cache == NULL)
+ goto Enomem;
+
+ /* mempools */
+ drbd_request_mempool = mempool_create(number,
+ mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
+ if (drbd_request_mempool == NULL)
+ goto Enomem;
+
+ drbd_ee_mempool = mempool_create(number,
+ mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
+ if (drbd_request_mempool == NULL)
+ goto Enomem;
+
+ /* drbd's page pool */
+ spin_lock_init(&drbd_pp_lock);
+
+ for (i = 0; i < number; i++) {
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page)
+ goto Enomem;
+ set_page_private(page, (unsigned long)drbd_pp_pool);
+ drbd_pp_pool = page;
+ }
+ drbd_pp_vacant = number;
+
+ return 0;
+
+Enomem:
+ drbd_destroy_mempools(); /* in case we allocated some */
+ return -ENOMEM;
+}
+
+static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
+ void *unused)
+{
+ /* just so we have it. you never know what interesting things we
+ * might want to do here some day...
+ */
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block drbd_notifier = {
+ .notifier_call = drbd_notify_sys,
+};
+
+static void drbd_release_ee_lists(struct drbd_conf *mdev)
+{
+ int rr;
+
+ rr = drbd_release_ee(mdev, &mdev->active_ee);
+ if (rr)
+ dev_err(DEV, "%d EEs in active list found!\n", rr);
+
+ rr = drbd_release_ee(mdev, &mdev->sync_ee);
+ if (rr)
+ dev_err(DEV, "%d EEs in sync list found!\n", rr);
+
+ rr = drbd_release_ee(mdev, &mdev->read_ee);
+ if (rr)
+ dev_err(DEV, "%d EEs in read list found!\n", rr);
+
+ rr = drbd_release_ee(mdev, &mdev->done_ee);
+ if (rr)
+ dev_err(DEV, "%d EEs in done list found!\n", rr);
+
+ rr = drbd_release_ee(mdev, &mdev->net_ee);
+ if (rr)
+ dev_err(DEV, "%d EEs in net list found!\n", rr);
+}
+
+/* caution. no locking.
+ * currently only used from module cleanup code. */
+static void drbd_delete_device(unsigned int minor)
+{
+ struct drbd_conf *mdev = minor_to_mdev(minor);
+
+ if (!mdev)
+ return;
+
+ /* paranoia asserts */
+ if (mdev->open_cnt != 0)
+ dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
+ __FILE__ , __LINE__);
+
+ ERR_IF (!list_empty(&mdev->data.work.q)) {
+ struct list_head *lp;
+ list_for_each(lp, &mdev->data.work.q) {
+ dev_err(DEV, "lp = %p\n", lp);
+ }
+ };
+ /* end paranoia asserts */
+
+ del_gendisk(mdev->vdisk);
+
+ /* cleanup stuff that may have been allocated during
+ * device (re-)configuration or state changes */
+
+ if (mdev->this_bdev)
+ bdput(mdev->this_bdev);
+
+ drbd_free_resources(mdev);
+
+ drbd_release_ee_lists(mdev);
+
+ /* should be free'd on disconnect? */
+ kfree(mdev->ee_hash);
+ /*
+ mdev->ee_hash_s = 0;
+ mdev->ee_hash = NULL;
+ */
+
+ lc_destroy(mdev->act_log);
+ lc_destroy(mdev->resync);
+
+ kfree(mdev->p_uuid);
+ /* mdev->p_uuid = NULL; */
+
+ kfree(mdev->int_dig_out);
+ kfree(mdev->int_dig_in);
+ kfree(mdev->int_dig_vv);
+
+ /* cleanup the rest that has been
+ * allocated from drbd_new_device
+ * and actually free the mdev itself */
+ drbd_free_mdev(mdev);
+}
+
+static void drbd_cleanup(void)
+{
+ unsigned int i;
+
+ unregister_reboot_notifier(&drbd_notifier);
+
+ drbd_nl_cleanup();
+
+ if (minor_table) {
+ if (drbd_proc)
+ remove_proc_entry("drbd", NULL);
+ i = minor_count;
+ while (i--)
+ drbd_delete_device(i);
+ drbd_destroy_mempools();
+ }
+
+ kfree(minor_table);
+
+ unregister_blkdev(DRBD_MAJOR, "drbd");
+
+ printk(KERN_INFO "drbd: module cleanup done.\n");
+}
+
+/**
+ * drbd_congested() - Callback for pdflush
+ * @congested_data: User data
+ * @bdi_bits: Bits pdflush is currently interested in
+ *
+ * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ */
+static int drbd_congested(void *congested_data, int bdi_bits)
+{
+ struct drbd_conf *mdev = congested_data;
+ struct request_queue *q;
+ char reason = '-';
+ int r = 0;
+
+ if (!__inc_ap_bio_cond(mdev)) {
+ /* DRBD has frozen IO */
+ r = bdi_bits;
+ reason = 'd';
+ goto out;
+ }
+
+ if (get_ldev(mdev)) {
+ q = bdev_get_queue(mdev->ldev->backing_bdev);
+ r = bdi_congested(&q->backing_dev_info, bdi_bits);
+ put_ldev(mdev);
+ if (r)
+ reason = 'b';
+ }
+
+ if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
+ r |= (1 << BDI_async_congested);
+ reason = reason == 'b' ? 'a' : 'n';
+ }
+
+out:
+ mdev->congestion_reason = reason;
+ return r;
+}
+
+struct drbd_conf *drbd_new_device(unsigned int minor)
+{
+ struct drbd_conf *mdev;
+ struct gendisk *disk;
+ struct request_queue *q;
+
+ /* GFP_KERNEL, we are outside of all write-out paths */
+ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
+ if (!mdev)
+ return NULL;
+ if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
+ goto out_no_cpumask;
+
+ mdev->minor = minor;
+
+ drbd_init_set_defaults(mdev);
+
+ q = blk_alloc_queue(GFP_KERNEL);
+ if (!q)
+ goto out_no_q;
+ mdev->rq_queue = q;
+ q->queuedata = mdev;
+ blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+
+ disk = alloc_disk(1);
+ if (!disk)
+ goto out_no_disk;
+ mdev->vdisk = disk;
+
+ set_disk_ro(disk, TRUE);
+
+ disk->queue = q;
+ disk->major = DRBD_MAJOR;
+ disk->first_minor = minor;
+ disk->fops = &drbd_ops;
+ sprintf(disk->disk_name, "drbd%d", minor);
+ disk->private_data = mdev;
+
+ mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
+ /* we have no partitions. we contain only ourselves. */
+ mdev->this_bdev->bd_contains = mdev->this_bdev;
+
+ q->backing_dev_info.congested_fn = drbd_congested;
+ q->backing_dev_info.congested_data = mdev;
+
+ blk_queue_make_request(q, drbd_make_request_26);
+ blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
+ blk_queue_merge_bvec(q, drbd_merge_bvec);
+ q->queue_lock = &mdev->req_lock; /* needed since we use */
+ /* plugging on a queue, that actually has no requests! */
+ q->unplug_fn = drbd_unplug_fn;
+
+ mdev->md_io_page = alloc_page(GFP_KERNEL);
+ if (!mdev->md_io_page)
+ goto out_no_io_page;
+
+ if (drbd_bm_init(mdev))
+ goto out_no_bitmap;
+ /* no need to lock access, we are still initializing this minor device. */
+ if (!tl_init(mdev))
+ goto out_no_tl;
+
+ mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
+ if (!mdev->app_reads_hash)
+ goto out_no_app_reads;
+
+ mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+ if (!mdev->current_epoch)
+ goto out_no_epoch;
+
+ INIT_LIST_HEAD(&mdev->current_epoch->list);
+ mdev->epochs = 1;
+
+ return mdev;
+
+/* out_whatever_else:
+ kfree(mdev->current_epoch); */
+out_no_epoch:
+ kfree(mdev->app_reads_hash);
+out_no_app_reads:
+ tl_cleanup(mdev);
+out_no_tl:
+ drbd_bm_cleanup(mdev);
+out_no_bitmap:
+ __free_page(mdev->md_io_page);
+out_no_io_page:
+ put_disk(disk);
+out_no_disk:
+ blk_cleanup_queue(q);
+out_no_q:
+ free_cpumask_var(mdev->cpu_mask);
+out_no_cpumask:
+ kfree(mdev);
+ return NULL;
+}
+
+/* counterpart of drbd_new_device.
+ * last part of drbd_delete_device. */
+void drbd_free_mdev(struct drbd_conf *mdev)
+{
+ kfree(mdev->current_epoch);
+ kfree(mdev->app_reads_hash);
+ tl_cleanup(mdev);
+ if (mdev->bitmap) /* should no longer be there. */
+ drbd_bm_cleanup(mdev);
+ __free_page(mdev->md_io_page);
+ put_disk(mdev->vdisk);
+ blk_cleanup_queue(mdev->rq_queue);
+ free_cpumask_var(mdev->cpu_mask);
+ kfree(mdev);
+}
+
+
+int __init drbd_init(void)
+{
+ int err;
+
+ if (sizeof(struct p_handshake) != 80) {
+ printk(KERN_ERR
+ "drbd: never change the size or layout "
+ "of the HandShake packet.\n");
+ return -EINVAL;
+ }
+
+ if (1 > minor_count || minor_count > 255) {
+ printk(KERN_ERR
+ "drbd: invalid minor_count (%d)\n", minor_count);
+#ifdef MODULE
+ return -EINVAL;
+#else
+ minor_count = 8;
+#endif
+ }
+
+ err = drbd_nl_init();
+ if (err)
+ return err;
+
+ err = register_blkdev(DRBD_MAJOR, "drbd");
+ if (err) {
+ printk(KERN_ERR
+ "drbd: unable to register block device major %d\n",
+ DRBD_MAJOR);
+ return err;
+ }
+
+ register_reboot_notifier(&drbd_notifier);
+
+ /*
+ * allocate all necessary structs
+ */
+ err = -ENOMEM;
+
+ init_waitqueue_head(&drbd_pp_wait);
+
+ drbd_proc = NULL; /* play safe for drbd_cleanup */
+ minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
+ GFP_KERNEL);
+ if (!minor_table)
+ goto Enomem;
+
+ err = drbd_create_mempools();
+ if (err)
+ goto Enomem;
+
+ drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops);
+ if (!drbd_proc) {
+ printk(KERN_ERR "drbd: unable to register proc file\n");
+ goto Enomem;
+ }
+
+ rwlock_init(&global_state_lock);
+
+ printk(KERN_INFO "drbd: initialized. "
+ "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
+ API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
+ printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
+ printk(KERN_INFO "drbd: registered as block device major %d\n",
+ DRBD_MAJOR);
+ printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
+
+ return 0; /* Success! */
+
+Enomem:
+ drbd_cleanup();
+ if (err == -ENOMEM)
+ /* currently always the case */
+ printk(KERN_ERR "drbd: ran out of memory\n");
+ else
+ printk(KERN_ERR "drbd: initialization failure\n");
+ return err;
+}
+
+void drbd_free_bc(struct drbd_backing_dev *ldev)
+{
+ if (ldev == NULL)
+ return;
+
+ bd_release(ldev->backing_bdev);
+ bd_release(ldev->md_bdev);
+
+ fput(ldev->lo_file);
+ fput(ldev->md_file);
+
+ kfree(ldev);
+}
+
+void drbd_free_sock(struct drbd_conf *mdev)
+{
+ if (mdev->data.socket) {
+ kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
+ sock_release(mdev->data.socket);
+ mdev->data.socket = NULL;
+ }
+ if (mdev->meta.socket) {
+ kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
+ sock_release(mdev->meta.socket);
+ mdev->meta.socket = NULL;
+ }
+}
+
+
+void drbd_free_resources(struct drbd_conf *mdev)
+{
+ crypto_free_hash(mdev->csums_tfm);
+ mdev->csums_tfm = NULL;
+ crypto_free_hash(mdev->verify_tfm);
+ mdev->verify_tfm = NULL;
+ crypto_free_hash(mdev->cram_hmac_tfm);
+ mdev->cram_hmac_tfm = NULL;
+ crypto_free_hash(mdev->integrity_w_tfm);
+ mdev->integrity_w_tfm = NULL;
+ crypto_free_hash(mdev->integrity_r_tfm);
+ mdev->integrity_r_tfm = NULL;
+
+ drbd_free_sock(mdev);
+
+ __no_warn(local,
+ drbd_free_bc(mdev->ldev);
+ mdev->ldev = NULL;);
+}
+
+/* meta data management */
+
+struct meta_data_on_disk {
+ u64 la_size; /* last agreed size. */
+ u64 uuid[UI_SIZE]; /* UUIDs. */
+ u64 device_uuid;
+ u64 reserved_u64_1;
+ u32 flags; /* MDF */
+ u32 magic;
+ u32 md_size_sect;
+ u32 al_offset; /* offset to this block */
+ u32 al_nr_extents; /* important for restoring the AL */
+ /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+ u32 bm_offset; /* offset to the bitmap, from here */
+ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
+ u32 reserved_u32[4];
+
+} __packed;
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev: DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+ struct meta_data_on_disk *buffer;
+ sector_t sector;
+ int i;
+
+ if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+ return;
+ del_timer(&mdev->md_sync_timer);
+
+ /* We use here D_FAILED and not D_ATTACHING because we try to write
+ * metadata even if we detach due to a disk failure! */
+ if (!get_ldev_if_state(mdev, D_FAILED))
+ return;
+
+ mutex_lock(&mdev->md_io_mutex);
+ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+ memset(buffer, 0, 512);
+
+ buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
+ for (i = UI_CURRENT; i < UI_SIZE; i++)
+ buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
+ buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
+ buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+
+ buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
+ buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
+ buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
+ buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
+ buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
+
+ buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+
+ D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
+ sector = mdev->ldev->md.md_offset;
+
+ if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+ clear_bit(MD_DIRTY, &mdev->flags);
+ } else {
+ /* this was a try anyways ... */
+ dev_err(DEV, "meta data update failed!\n");
+
+ drbd_chk_io_error(mdev, 1, TRUE);
+ }
+
+ /* Update mdev->ldev->md.la_size_sect,
+ * since we updated it on metadata. */
+ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
+
+ mutex_unlock(&mdev->md_io_mutex);
+ put_ldev(mdev);
+}
+
+/**
+ * drbd_md_read() - Reads in the meta data super block
+ * @mdev: DRBD device.
+ * @bdev: Device from which the meta data should be read in.
+ *
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ */
+int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+ struct meta_data_on_disk *buffer;
+ int i, rv = NO_ERROR;
+
+ if (!get_ldev_if_state(mdev, D_ATTACHING))
+ return ERR_IO_MD_DISK;
+
+ mutex_lock(&mdev->md_io_mutex);
+ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+
+ if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
+ /* NOTE: cant do normal error processing here as this is
+ called BEFORE disk is attached */
+ dev_err(DEV, "Error while reading metadata.\n");
+ rv = ERR_IO_MD_DISK;
+ goto err;
+ }
+
+ if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
+ dev_err(DEV, "Error while reading metadata, magic not found.\n");
+ rv = ERR_MD_INVALID;
+ goto err;
+ }
+ if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
+ dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
+ be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
+ rv = ERR_MD_INVALID;
+ goto err;
+ }
+ if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
+ dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
+ be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
+ rv = ERR_MD_INVALID;
+ goto err;
+ }
+ if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
+ dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
+ be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
+ rv = ERR_MD_INVALID;
+ goto err;
+ }
+
+ if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
+ dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
+ be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
+ rv = ERR_MD_INVALID;
+ goto err;
+ }
+
+ bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+ for (i = UI_CURRENT; i < UI_SIZE; i++)
+ bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
+ bdev->md.flags = be32_to_cpu(buffer->flags);
+ mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
+ bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
+
+ if (mdev->sync_conf.al_extents < 7)
+ mdev->sync_conf.al_extents = 127;
+
+ err:
+ mutex_unlock(&mdev->md_io_mutex);
+ put_ldev(mdev);
+
+ return rv;
+}
+
+/**
+ * drbd_md_mark_dirty() - Mark meta data super block as dirty
+ * @mdev: DRBD device.
+ *
+ * Call this function if you change anything that should be written to
+ * the meta-data super block. This function sets MD_DIRTY, and starts a
+ * timer that ensures that within five seconds you have to call drbd_md_sync().
+ */
+void drbd_md_mark_dirty(struct drbd_conf *mdev)
+{
+ set_bit(MD_DIRTY, &mdev->flags);
+ mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
+}
+
+
+static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+{
+ int i;
+
+ for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
+ mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
+}
+
+void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+ if (idx == UI_CURRENT) {
+ if (mdev->state.role == R_PRIMARY)
+ val |= 1;
+ else
+ val &= ~((u64)1);
+
+ drbd_set_ed_uuid(mdev, val);
+ }
+
+ mdev->ldev->md.uuid[idx] = val;
+ drbd_md_mark_dirty(mdev);
+}
+
+
+void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+ if (mdev->ldev->md.uuid[idx]) {
+ drbd_uuid_move_history(mdev);
+ mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
+ }
+ _drbd_uuid_set(mdev, idx, val);
+}
+
+/**
+ * drbd_uuid_new_current() - Creates a new current UUID
+ * @mdev: DRBD device.
+ *
+ * Creates a new current UUID, and rotates the old current UUID into
+ * the bitmap slot. Causes an incremental resync upon next connect.
+ */
+void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
+{
+ u64 val;
+
+ dev_info(DEV, "Creating new current UUID\n");
+ D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
+ mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
+
+ get_random_bytes(&val, sizeof(u64));
+ _drbd_uuid_set(mdev, UI_CURRENT, val);
+}
+
+void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
+{
+ if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
+ return;
+
+ if (val == 0) {
+ drbd_uuid_move_history(mdev);
+ mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+ mdev->ldev->md.uuid[UI_BITMAP] = 0;
+ } else {
+ if (mdev->ldev->md.uuid[UI_BITMAP])
+ dev_warn(DEV, "bm UUID already set");
+
+ mdev->ldev->md.uuid[UI_BITMAP] = val;
+ mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
+
+ }
+ drbd_md_mark_dirty(mdev);
+}
+
+/**
+ * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev: DRBD device.
+ *
+ * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_set_n_write(struct drbd_conf *mdev)
+{
+ int rv = -EIO;
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+ drbd_md_sync(mdev);
+ drbd_bm_set_all(mdev);
+
+ rv = drbd_bm_write(mdev);
+
+ if (!rv) {
+ drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+ drbd_md_sync(mdev);
+ }
+
+ put_ldev(mdev);
+ }
+
+ return rv;
+}
+
+/**
+ * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev: DRBD device.
+ *
+ * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
+{
+ int rv = -EIO;
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ drbd_bm_clear_all(mdev);
+ rv = drbd_bm_write(mdev);
+ put_ldev(mdev);
+ }
+
+ return rv;
+}
+
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct bm_io_work *work = container_of(w, struct bm_io_work, w);
+ int rv;
+
+ D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
+
+ drbd_bm_lock(mdev, work->why);
+ rv = work->io_fn(mdev);
+ drbd_bm_unlock(mdev);
+
+ clear_bit(BITMAP_IO, &mdev->flags);
+ wake_up(&mdev->misc_wait);
+
+ if (work->done)
+ work->done(mdev, rv);
+
+ clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
+ work->why = NULL;
+
+ return 1;
+}
+
+/**
+ * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
+ * @mdev: DRBD device.
+ * @io_fn: IO callback to be called when bitmap IO is possible
+ * @done: callback to be called after the bitmap IO was performed
+ * @why: Descriptive text of the reason for doing the IO
+ *
+ * While IO on the bitmap happens we freeze application IO thus we ensure
+ * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
+ * called from worker context. It MUST NOT be used while a previous such
+ * work is still pending!
+ */
+void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+ int (*io_fn)(struct drbd_conf *),
+ void (*done)(struct drbd_conf *, int),
+ char *why)
+{
+ D_ASSERT(current == mdev->worker.task);
+
+ D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
+ D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
+ D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
+ if (mdev->bm_io_work.why)
+ dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
+ why, mdev->bm_io_work.why);
+
+ mdev->bm_io_work.io_fn = io_fn;
+ mdev->bm_io_work.done = done;
+ mdev->bm_io_work.why = why;
+
+ set_bit(BITMAP_IO, &mdev->flags);
+ if (atomic_read(&mdev->ap_bio_cnt) == 0) {
+ if (list_empty(&mdev->bm_io_work.w.list)) {
+ set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+ drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+ } else
+ dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
+ }
+}
+
+/**
+ * drbd_bitmap_io() - Does an IO operation on the whole bitmap
+ * @mdev: DRBD device.
+ * @io_fn: IO callback to be called when bitmap IO is possible
+ * @why: Descriptive text of the reason for doing the IO
+ *
+ * freezes application IO while that the actual IO operations runs. This
+ * functions MAY NOT be called from worker context.
+ */
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+{
+ int rv;
+
+ D_ASSERT(current != mdev->worker.task);
+
+ drbd_suspend_io(mdev);
+
+ drbd_bm_lock(mdev, why);
+ rv = io_fn(mdev);
+ drbd_bm_unlock(mdev);
+
+ drbd_resume_io(mdev);
+
+ return rv;
+}
+
+void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+ if ((mdev->ldev->md.flags & flag) != flag) {
+ drbd_md_mark_dirty(mdev);
+ mdev->ldev->md.flags |= flag;
+ }
+}
+
+void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+ if ((mdev->ldev->md.flags & flag) != 0) {
+ drbd_md_mark_dirty(mdev);
+ mdev->ldev->md.flags &= ~flag;
+ }
+}
+int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
+{
+ return (bdev->md.flags & flag) != 0;
+}
+
+static void md_sync_timer_fn(unsigned long data)
+{
+ struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+ drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
+}
+
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+ drbd_md_sync(mdev);
+
+ return 1;
+}
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+/* Fault insertion support including random number generator shamelessly
+ * stolen from kernel/rcutorture.c */
+struct fault_random_state {
+ unsigned long state;
+ unsigned long count;
+};
+
+#define FAULT_RANDOM_MULT 39916801 /* prime */
+#define FAULT_RANDOM_ADD 479001701 /* prime */
+#define FAULT_RANDOM_REFRESH 10000
+
+/*
+ * Crude but fast random-number generator. Uses a linear congruential
+ * generator, with occasional help from get_random_bytes().
+ */
+static unsigned long
+_drbd_fault_random(struct fault_random_state *rsp)
+{
+ long refresh;
+
+ if (--rsp->count < 0) {
+ get_random_bytes(&refresh, sizeof(refresh));
+ rsp->state += refresh;
+ rsp->count = FAULT_RANDOM_REFRESH;
+ }
+ rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
+ return swahw32(rsp->state);
+}
+
+static char *
+_drbd_fault_str(unsigned int type) {
+ static char *_faults[] = {
+ [DRBD_FAULT_MD_WR] = "Meta-data write",
+ [DRBD_FAULT_MD_RD] = "Meta-data read",
+ [DRBD_FAULT_RS_WR] = "Resync write",
+ [DRBD_FAULT_RS_RD] = "Resync read",
+ [DRBD_FAULT_DT_WR] = "Data write",
+ [DRBD_FAULT_DT_RD] = "Data read",
+ [DRBD_FAULT_DT_RA] = "Data read ahead",
+ [DRBD_FAULT_BM_ALLOC] = "BM allocation",
+ [DRBD_FAULT_AL_EE] = "EE allocation"
+ };
+
+ return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
+}
+
+unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
+{
+ static struct fault_random_state rrs = {0, 0};
+
+ unsigned int ret = (
+ (fault_devs == 0 ||
+ ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
+ (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+
+ if (ret) {
+ fault_count++;
+
+ if (printk_ratelimit())
+ dev_warn(DEV, "***Simulating %s failure\n",
+ _drbd_fault_str(type));
+ }
+
+ return ret;
+}
+#endif
+
+const char *drbd_buildtag(void)
+{
+ /* DRBD built from external sources has here a reference to the
+ git hash of the source code. */
+
+ static char buildtag[38] = "\0uilt-in";
+
+ if (buildtag[0] == 0) {
+#ifdef CONFIG_MODULES
+ if (THIS_MODULE != NULL)
+ sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
+ else
+#endif
+ buildtag[0] = 'b';
+ }
+
+ return buildtag;
+}
+
+module_init(drbd_init)
+module_exit(drbd_cleanup)
+
+EXPORT_SYMBOL(drbd_conn_str);
+EXPORT_SYMBOL(drbd_role_str);
+EXPORT_SYMBOL(drbd_disk_str);
+EXPORT_SYMBOL(drbd_set_st_err_str);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
new file mode 100644
index 000000000000..436a090b532b
--- /dev/null
+++ b/drivers/block/drbd/drbd_nl.c
@@ -0,0 +1,2364 @@
+/*
+ drbd_nl.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/connector.h>
+#include <linux/blkpg.h>
+#include <linux/cpumask.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+#include <asm/unaligned.h>
+#include <linux/drbd_tag_magic.h>
+#include <linux/drbd_limits.h>
+
+static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
+static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
+static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
+
+/* see get_sb_bdev and bd_claim */
+static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
+
+/* Generate the tag_list to struct functions */
+#define NL_PACKET(name, number, fields) \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+ unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+ unsigned short *tags, struct name *arg) \
+{ \
+ int tag; \
+ int dlen; \
+ \
+ while ((tag = get_unaligned(tags++)) != TT_END) { \
+ dlen = get_unaligned(tags++); \
+ switch (tag_number(tag)) { \
+ fields \
+ default: \
+ if (tag & T_MANDATORY) { \
+ dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
+ return 0; \
+ } \
+ } \
+ tags = (unsigned short *)((char *)tags + dlen); \
+ } \
+ return 1; \
+}
+#define NL_INTEGER(pn, pr, member) \
+ case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
+ arg->member = get_unaligned((int *)(tags)); \
+ break;
+#define NL_INT64(pn, pr, member) \
+ case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
+ arg->member = get_unaligned((u64 *)(tags)); \
+ break;
+#define NL_BIT(pn, pr, member) \
+ case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
+ arg->member = *(char *)(tags) ? 1 : 0; \
+ break;
+#define NL_STRING(pn, pr, member, len) \
+ case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
+ if (dlen > len) { \
+ dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
+ #member, dlen, (unsigned int)len); \
+ return 0; \
+ } \
+ arg->member ## _len = dlen; \
+ memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
+ break;
+#include "linux/drbd_nl.h"
+
+/* Generate the struct to tag_list functions */
+#define NL_PACKET(name, number, fields) \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+ struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+ struct name *arg, unsigned short *tags) \
+{ \
+ fields \
+ return tags; \
+}
+
+#define NL_INTEGER(pn, pr, member) \
+ put_unaligned(pn | pr | TT_INTEGER, tags++); \
+ put_unaligned(sizeof(int), tags++); \
+ put_unaligned(arg->member, (int *)tags); \
+ tags = (unsigned short *)((char *)tags+sizeof(int));
+#define NL_INT64(pn, pr, member) \
+ put_unaligned(pn | pr | TT_INT64, tags++); \
+ put_unaligned(sizeof(u64), tags++); \
+ put_unaligned(arg->member, (u64 *)tags); \
+ tags = (unsigned short *)((char *)tags+sizeof(u64));
+#define NL_BIT(pn, pr, member) \
+ put_unaligned(pn | pr | TT_BIT, tags++); \
+ put_unaligned(sizeof(char), tags++); \
+ *(char *)tags = arg->member; \
+ tags = (unsigned short *)((char *)tags+sizeof(char));
+#define NL_STRING(pn, pr, member, len) \
+ put_unaligned(pn | pr | TT_STRING, tags++); \
+ put_unaligned(arg->member ## _len, tags++); \
+ memcpy(tags, arg->member, arg->member ## _len); \
+ tags = (unsigned short *)((char *)tags + arg->member ## _len);
+#include "linux/drbd_nl.h"
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
+void drbd_nl_send_reply(struct cn_msg *, int);
+
+int drbd_khelper(struct drbd_conf *mdev, char *cmd)
+{
+ char *envp[] = { "HOME=/",
+ "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL, /* Will be set to address family */
+ NULL, /* Will be set to address */
+ NULL };
+
+ char mb[12], af[20], ad[60], *afs;
+ char *argv[] = {usermode_helper, cmd, mb, NULL };
+ int ret;
+
+ snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
+
+ if (get_net_conf(mdev)) {
+ switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
+ case AF_INET6:
+ afs = "ipv6";
+ snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
+ &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
+ break;
+ case AF_INET:
+ afs = "ipv4";
+ snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+ &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+ break;
+ default:
+ afs = "ssocks";
+ snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+ &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+ }
+ snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
+ envp[3]=af;
+ envp[4]=ad;
+ put_net_conf(mdev);
+ }
+
+ dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+
+ drbd_bcast_ev_helper(mdev, cmd);
+ ret = call_usermodehelper(usermode_helper, argv, envp, 1);
+ if (ret)
+ dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+ usermode_helper, cmd, mb,
+ (ret >> 8) & 0xff, ret);
+ else
+ dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+ usermode_helper, cmd, mb,
+ (ret >> 8) & 0xff, ret);
+
+ if (ret < 0) /* Ignore any ERRNOs we got. */
+ ret = 0;
+
+ return ret;
+}
+
+enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+{
+ char *ex_to_string;
+ int r;
+ enum drbd_disk_state nps;
+ enum drbd_fencing_p fp;
+
+ D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+
+ if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+ fp = mdev->ldev->dc.fencing;
+ put_ldev(mdev);
+ } else {
+ dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
+ return mdev->state.pdsk;
+ }
+
+ if (fp == FP_STONITH)
+ _drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
+
+ r = drbd_khelper(mdev, "fence-peer");
+
+ switch ((r>>8) & 0xff) {
+ case 3: /* peer is inconsistent */
+ ex_to_string = "peer is inconsistent or worse";
+ nps = D_INCONSISTENT;
+ break;
+ case 4: /* peer got outdated, or was already outdated */
+ ex_to_string = "peer was fenced";
+ nps = D_OUTDATED;
+ break;
+ case 5: /* peer was down */
+ if (mdev->state.disk == D_UP_TO_DATE) {
+ /* we will(have) create(d) a new UUID anyways... */
+ ex_to_string = "peer is unreachable, assumed to be dead";
+ nps = D_OUTDATED;
+ } else {
+ ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
+ nps = mdev->state.pdsk;
+ }
+ break;
+ case 6: /* Peer is primary, voluntarily outdate myself.
+ * This is useful when an unconnected R_SECONDARY is asked to
+ * become R_PRIMARY, but finds the other peer being active. */
+ ex_to_string = "peer is active";
+ dev_warn(DEV, "Peer is primary, outdating myself.\n");
+ nps = D_UNKNOWN;
+ _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+ break;
+ case 7:
+ if (fp != FP_STONITH)
+ dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+ ex_to_string = "peer was stonithed";
+ nps = D_OUTDATED;
+ break;
+ default:
+ /* The script is broken ... */
+ nps = D_UNKNOWN;
+ dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+ return nps;
+ }
+
+ dev_info(DEV, "fence-peer helper returned %d (%s)\n",
+ (r>>8) & 0xff, ex_to_string);
+ return nps;
+}
+
+
+int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
+{
+ const int max_tries = 4;
+ int r = 0;
+ int try = 0;
+ int forced = 0;
+ union drbd_state mask, val;
+ enum drbd_disk_state nps;
+
+ if (new_role == R_PRIMARY)
+ request_ping(mdev); /* Detect a dead peer ASAP */
+
+ mutex_lock(&mdev->state_mutex);
+
+ mask.i = 0; mask.role = R_MASK;
+ val.i = 0; val.role = new_role;
+
+ while (try++ < max_tries) {
+ r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
+
+ /* in case we first succeeded to outdate,
+ * but now suddenly could establish a connection */
+ if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
+ val.pdsk = 0;
+ mask.pdsk = 0;
+ continue;
+ }
+
+ if (r == SS_NO_UP_TO_DATE_DISK && force &&
+ (mdev->state.disk == D_INCONSISTENT ||
+ mdev->state.disk == D_OUTDATED)) {
+ mask.disk = D_MASK;
+ val.disk = D_UP_TO_DATE;
+ forced = 1;
+ continue;
+ }
+
+ if (r == SS_NO_UP_TO_DATE_DISK &&
+ mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
+ D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+ nps = drbd_try_outdate_peer(mdev);
+
+ if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+ val.disk = D_UP_TO_DATE;
+ mask.disk = D_MASK;
+ }
+
+ val.pdsk = nps;
+ mask.pdsk = D_MASK;
+
+ continue;
+ }
+
+ if (r == SS_NOTHING_TO_DO)
+ goto fail;
+ if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
+ nps = drbd_try_outdate_peer(mdev);
+
+ if (force && nps > D_OUTDATED) {
+ dev_warn(DEV, "Forced into split brain situation!\n");
+ nps = D_OUTDATED;
+ }
+
+ mask.pdsk = D_MASK;
+ val.pdsk = nps;
+
+ continue;
+ }
+ if (r == SS_TWO_PRIMARIES) {
+ /* Maybe the peer is detected as dead very soon...
+ retry at most once more in this case. */
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
+ if (try < max_tries)
+ try = max_tries - 1;
+ continue;
+ }
+ if (r < SS_SUCCESS) {
+ r = _drbd_request_state(mdev, mask, val,
+ CS_VERBOSE + CS_WAIT_COMPLETE);
+ if (r < SS_SUCCESS)
+ goto fail;
+ }
+ break;
+ }
+
+ if (r < SS_SUCCESS)
+ goto fail;
+
+ if (forced)
+ dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
+
+ /* Wait until nothing is on the fly :) */
+ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
+
+ if (new_role == R_SECONDARY) {
+ set_disk_ro(mdev->vdisk, TRUE);
+ if (get_ldev(mdev)) {
+ mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+ put_ldev(mdev);
+ }
+ } else {
+ if (get_net_conf(mdev)) {
+ mdev->net_conf->want_lose = 0;
+ put_net_conf(mdev);
+ }
+ set_disk_ro(mdev->vdisk, FALSE);
+ if (get_ldev(mdev)) {
+ if (((mdev->state.conn < C_CONNECTED ||
+ mdev->state.pdsk <= D_FAILED)
+ && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
+ drbd_uuid_new_current(mdev);
+
+ mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
+ put_ldev(mdev);
+ }
+ }
+
+ if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
+ drbd_al_to_on_disk_bm(mdev);
+ put_ldev(mdev);
+ }
+
+ if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
+ /* if this was forced, we should consider sync */
+ if (forced)
+ drbd_send_uuids(mdev);
+ drbd_send_state(mdev);
+ }
+
+ drbd_md_sync(mdev);
+
+ kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+ fail:
+ mutex_unlock(&mdev->state_mutex);
+ return r;
+}
+
+
+static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ struct primary primary_args;
+
+ memset(&primary_args, 0, sizeof(struct primary));
+ if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
+ reply->ret_code = ERR_MANDATORY_TAG;
+ return 0;
+ }
+
+ reply->ret_code =
+ drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer);
+
+ return 0;
+}
+
+static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+
+ return 0;
+}
+
+/* initializes the md.*_offset members, so we are able to find
+ * the on disk meta data */
+static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
+ struct drbd_backing_dev *bdev)
+{
+ sector_t md_size_sect = 0;
+ switch (bdev->dc.meta_dev_idx) {
+ default:
+ /* v07 style fixed size indexed meta data */
+ bdev->md.md_size_sect = MD_RESERVED_SECT;
+ bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+ bdev->md.al_offset = MD_AL_OFFSET;
+ bdev->md.bm_offset = MD_BM_OFFSET;
+ break;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ /* just occupy the full device; unit: sectors */
+ bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
+ bdev->md.md_offset = 0;
+ bdev->md.al_offset = MD_AL_OFFSET;
+ bdev->md.bm_offset = MD_BM_OFFSET;
+ break;
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+ /* al size is still fixed */
+ bdev->md.al_offset = -MD_AL_MAX_SIZE;
+ /* we need (slightly less than) ~ this much bitmap sectors: */
+ md_size_sect = drbd_get_capacity(bdev->backing_bdev);
+ md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
+ md_size_sect = BM_SECT_TO_EXT(md_size_sect);
+ md_size_sect = ALIGN(md_size_sect, 8);
+
+ /* plus the "drbd meta data super block",
+ * and the activity log; */
+ md_size_sect += MD_BM_OFFSET;
+
+ bdev->md.md_size_sect = md_size_sect;
+ /* bitmap offset is adjusted by 'super' block size */
+ bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
+ break;
+ }
+}
+
+char *ppsize(char *buf, unsigned long long size)
+{
+ /* Needs 9 bytes at max. */
+ static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
+ int base = 0;
+ while (size >= 10000) {
+ /* shift + round */
+ size = (size >> 10) + !!(size & (1<<9));
+ base++;
+ }
+ sprintf(buf, "%lu %cB", (long)size, units[base]);
+
+ return buf;
+}
+
+/* there is still a theoretical deadlock when called from receiver
+ * on an D_INCONSISTENT R_PRIMARY:
+ * remote READ does inc_ap_bio, receiver would need to receive answer
+ * packet from remote to dec_ap_bio again.
+ * receiver receive_sizes(), comes here,
+ * waits for ap_bio_cnt == 0. -> deadlock.
+ * but this cannot happen, actually, because:
+ * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
+ * (not connected, or bad/no disk on peer):
+ * see drbd_fail_request_early, ap_bio_cnt is zero.
+ * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
+ * peer may not initiate a resize.
+ */
+void drbd_suspend_io(struct drbd_conf *mdev)
+{
+ set_bit(SUSPEND_IO, &mdev->flags);
+ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+}
+
+void drbd_resume_io(struct drbd_conf *mdev)
+{
+ clear_bit(SUSPEND_IO, &mdev->flags);
+ wake_up(&mdev->misc_wait);
+}
+
+/**
+ * drbd_determine_dev_size() - Sets the right device size obeying all constraints
+ * @mdev: DRBD device.
+ *
+ * Returns 0 on success, negative return values indicate errors.
+ * You should call drbd_md_sync() after calling this function.
+ */
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+{
+ sector_t prev_first_sect, prev_size; /* previous meta location */
+ sector_t la_size;
+ sector_t size;
+ char ppb[10];
+
+ int md_moved, la_size_changed;
+ enum determine_dev_size rv = unchanged;
+
+ /* race:
+ * application request passes inc_ap_bio,
+ * but then cannot get an AL-reference.
+ * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+ *
+ * to avoid that:
+ * Suspend IO right here.
+ * still lock the act_log to not trigger ASSERTs there.
+ */
+ drbd_suspend_io(mdev);
+
+ /* no wait necessary anymore, actually we could assert that */
+ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+ prev_first_sect = drbd_md_first_sector(mdev->ldev);
+ prev_size = mdev->ldev->md.md_size_sect;
+ la_size = mdev->ldev->md.la_size_sect;
+
+ /* TODO: should only be some assert here, not (re)init... */
+ drbd_md_set_sector_offsets(mdev, mdev->ldev);
+
+ size = drbd_new_dev_size(mdev, mdev->ldev);
+
+ if (drbd_get_capacity(mdev->this_bdev) != size ||
+ drbd_bm_capacity(mdev) != size) {
+ int err;
+ err = drbd_bm_resize(mdev, size);
+ if (unlikely(err)) {
+ /* currently there is only one error: ENOMEM! */
+ size = drbd_bm_capacity(mdev)>>1;
+ if (size == 0) {
+ dev_err(DEV, "OUT OF MEMORY! "
+ "Could not allocate bitmap!\n");
+ } else {
+ dev_err(DEV, "BM resizing failed. "
+ "Leaving size unchanged at size = %lu KB\n",
+ (unsigned long)size);
+ }
+ rv = dev_size_error;
+ }
+ /* racy, see comments above. */
+ drbd_set_my_capacity(mdev, size);
+ mdev->ldev->md.la_size_sect = size;
+ dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
+ (unsigned long long)size>>1);
+ }
+ if (rv == dev_size_error)
+ goto out;
+
+ la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
+
+ md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
+ || prev_size != mdev->ldev->md.md_size_sect;
+
+ if (la_size_changed || md_moved) {
+ drbd_al_shrink(mdev); /* All extents inactive. */
+ dev_info(DEV, "Writing the whole bitmap, %s\n",
+ la_size_changed && md_moved ? "size changed and md moved" :
+ la_size_changed ? "size changed" : "md moved");
+ rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
+ drbd_md_mark_dirty(mdev);
+ }
+
+ if (size > la_size)
+ rv = grew;
+ if (size < la_size)
+ rv = shrunk;
+out:
+ lc_unlock(mdev->act_log);
+ wake_up(&mdev->al_wait);
+ drbd_resume_io(mdev);
+
+ return rv;
+}
+
+sector_t
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+ sector_t p_size = mdev->p_size; /* partner's disk size. */
+ sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
+ sector_t m_size; /* my size */
+ sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
+ sector_t size = 0;
+
+ m_size = drbd_get_max_capacity(bdev);
+
+ if (p_size && m_size) {
+ size = min_t(sector_t, p_size, m_size);
+ } else {
+ if (la_size) {
+ size = la_size;
+ if (m_size && m_size < size)
+ size = m_size;
+ if (p_size && p_size < size)
+ size = p_size;
+ } else {
+ if (m_size)
+ size = m_size;
+ if (p_size)
+ size = p_size;
+ }
+ }
+
+ if (size == 0)
+ dev_err(DEV, "Both nodes diskless!\n");
+
+ if (u_size) {
+ if (u_size > size)
+ dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
+ (unsigned long)u_size>>1, (unsigned long)size>>1);
+ else
+ size = u_size;
+ }
+
+ return size;
+}
+
+/**
+ * drbd_check_al_size() - Ensures that the AL is of the right size
+ * @mdev: DRBD device.
+ *
+ * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
+ * failed, and 0 on success. You should call drbd_md_sync() after you called
+ * this function.
+ */
+static int drbd_check_al_size(struct drbd_conf *mdev)
+{
+ struct lru_cache *n, *t;
+ struct lc_element *e;
+ unsigned int in_use;
+ int i;
+
+ ERR_IF(mdev->sync_conf.al_extents < 7)
+ mdev->sync_conf.al_extents = 127;
+
+ if (mdev->act_log &&
+ mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+ return 0;
+
+ in_use = 0;
+ t = mdev->act_log;
+ n = lc_create("act_log", drbd_al_ext_cache,
+ mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+
+ if (n == NULL) {
+ dev_err(DEV, "Cannot allocate act_log lru!\n");
+ return -ENOMEM;
+ }
+ spin_lock_irq(&mdev->al_lock);
+ if (t) {
+ for (i = 0; i < t->nr_elements; i++) {
+ e = lc_element_by_index(t, i);
+ if (e->refcnt)
+ dev_err(DEV, "refcnt(%d)==%d\n",
+ e->lc_number, e->refcnt);
+ in_use += e->refcnt;
+ }
+ }
+ if (!in_use)
+ mdev->act_log = n;
+ spin_unlock_irq(&mdev->al_lock);
+ if (in_use) {
+ dev_err(DEV, "Activity log still in use!\n");
+ lc_destroy(n);
+ return -EBUSY;
+ } else {
+ if (t)
+ lc_destroy(t);
+ }
+ drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
+ return 0;
+}
+
+void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
+{
+ struct request_queue * const q = mdev->rq_queue;
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+ int max_segments = mdev->ldev->dc.max_bio_bvecs;
+
+ if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv)
+ max_seg_s = PAGE_SIZE;
+
+ max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
+
+ blk_queue_max_sectors(q, max_seg_s >> 9);
+ blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS);
+ blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS);
+ blk_queue_max_segment_size(q, max_seg_s);
+ blk_queue_logical_block_size(q, 512);
+ blk_queue_segment_boundary(q, PAGE_SIZE-1);
+ blk_stack_limits(&q->limits, &b->limits, 0);
+
+ if (b->merge_bvec_fn)
+ dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
+ b->merge_bvec_fn);
+ dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
+
+ if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+ dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+ q->backing_dev_info.ra_pages,
+ b->backing_dev_info.ra_pages);
+ q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+ }
+}
+
+/* serialize deconfig (worker exiting, doing cleanup)
+ * and reconfig (drbdsetup disk, drbdsetup net)
+ *
+ * wait for a potentially exiting worker, then restart it,
+ * or start a new one.
+ */
+static void drbd_reconfig_start(struct drbd_conf *mdev)
+{
+ wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
+ wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
+ drbd_thread_start(&mdev->worker);
+}
+
+/* if still unconfigured, stops worker again.
+ * if configured now, clears CONFIG_PENDING.
+ * wakes potential waiters */
+static void drbd_reconfig_done(struct drbd_conf *mdev)
+{
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.disk == D_DISKLESS &&
+ mdev->state.conn == C_STANDALONE &&
+ mdev->state.role == R_SECONDARY) {
+ set_bit(DEVICE_DYING, &mdev->flags);
+ drbd_thread_stop_nowait(&mdev->worker);
+ } else
+ clear_bit(CONFIG_PENDING, &mdev->flags);
+ spin_unlock_irq(&mdev->req_lock);
+ wake_up(&mdev->state_wait);
+}
+
+/* does always return 0;
+ * interesting return code is in reply->ret_code */
+static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ enum drbd_ret_codes retcode;
+ enum determine_dev_size dd;
+ sector_t max_possible_sectors;
+ sector_t min_md_device_sectors;
+ struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
+ struct inode *inode, *inode2;
+ struct lru_cache *resync_lru = NULL;
+ union drbd_state ns, os;
+ int rv;
+ int cp_discovered = 0;
+ int logical_block_size;
+
+ drbd_reconfig_start(mdev);
+
+ /* if you want to reconfigure, please tear down first */
+ if (mdev->state.disk > D_DISKLESS) {
+ retcode = ERR_DISK_CONFIGURED;
+ goto fail;
+ }
+
+ /* allocation not in the IO path, cqueue thread context */
+ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
+ if (!nbc) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+
+ nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF;
+ nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF;
+ nbc->dc.fencing = DRBD_FENCING_DEF;
+ nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
+
+ if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
+ retcode = ERR_MANDATORY_TAG;
+ goto fail;
+ }
+
+ if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+ retcode = ERR_MD_IDX_INVALID;
+ goto fail;
+ }
+
+ nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
+ if (IS_ERR(nbc->lo_file)) {
+ dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
+ PTR_ERR(nbc->lo_file));
+ nbc->lo_file = NULL;
+ retcode = ERR_OPEN_DISK;
+ goto fail;
+ }
+
+ inode = nbc->lo_file->f_dentry->d_inode;
+
+ if (!S_ISBLK(inode->i_mode)) {
+ retcode = ERR_DISK_NOT_BDEV;
+ goto fail;
+ }
+
+ nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
+ if (IS_ERR(nbc->md_file)) {
+ dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
+ PTR_ERR(nbc->md_file));
+ nbc->md_file = NULL;
+ retcode = ERR_OPEN_MD_DISK;
+ goto fail;
+ }
+
+ inode2 = nbc->md_file->f_dentry->d_inode;
+
+ if (!S_ISBLK(inode2->i_mode)) {
+ retcode = ERR_MD_NOT_BDEV;
+ goto fail;
+ }
+
+ nbc->backing_bdev = inode->i_bdev;
+ if (bd_claim(nbc->backing_bdev, mdev)) {
+ printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
+ nbc->backing_bdev, mdev,
+ nbc->backing_bdev->bd_holder,
+ nbc->backing_bdev->bd_contains->bd_holder,
+ nbc->backing_bdev->bd_holders);
+ retcode = ERR_BDCLAIM_DISK;
+ goto fail;
+ }
+
+ resync_lru = lc_create("resync", drbd_bm_ext_cache,
+ 61, sizeof(struct bm_extent),
+ offsetof(struct bm_extent, lce));
+ if (!resync_lru) {
+ retcode = ERR_NOMEM;
+ goto release_bdev_fail;
+ }
+
+ /* meta_dev_idx >= 0: external fixed size,
+ * possibly multiple drbd sharing one meta device.
+ * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
+ * not yet used by some other drbd minor!
+ * (if you use drbd.conf + drbdadm,
+ * that should check it for you already; but if you don't, or someone
+ * fooled it, we need to double check here) */
+ nbc->md_bdev = inode2->i_bdev;
+ if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
+ : (void *) drbd_m_holder)) {
+ retcode = ERR_BDCLAIM_MD_DISK;
+ goto release_bdev_fail;
+ }
+
+ if ((nbc->backing_bdev == nbc->md_bdev) !=
+ (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+ nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+ retcode = ERR_MD_IDX_INVALID;
+ goto release_bdev2_fail;
+ }
+
+ /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
+ drbd_md_set_sector_offsets(mdev, nbc);
+
+ if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
+ dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
+ (unsigned long long) drbd_get_max_capacity(nbc),
+ (unsigned long long) nbc->dc.disk_size);
+ retcode = ERR_DISK_TO_SMALL;
+ goto release_bdev2_fail;
+ }
+
+ if (nbc->dc.meta_dev_idx < 0) {
+ max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
+ /* at least one MB, otherwise it does not make sense */
+ min_md_device_sectors = (2<<10);
+ } else {
+ max_possible_sectors = DRBD_MAX_SECTORS;
+ min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
+ }
+
+ if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
+ retcode = ERR_MD_DISK_TO_SMALL;
+ dev_warn(DEV, "refusing attach: md-device too small, "
+ "at least %llu sectors needed for this meta-disk type\n",
+ (unsigned long long) min_md_device_sectors);
+ goto release_bdev2_fail;
+ }
+
+ /* Make sure the new disk is big enough
+ * (we may currently be R_PRIMARY with no local disk...) */
+ if (drbd_get_max_capacity(nbc) <
+ drbd_get_capacity(mdev->this_bdev)) {
+ retcode = ERR_DISK_TO_SMALL;
+ goto release_bdev2_fail;
+ }
+
+ nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
+
+ if (nbc->known_size > max_possible_sectors) {
+ dev_warn(DEV, "==> truncating very big lower level device "
+ "to currently maximum possible %llu sectors <==\n",
+ (unsigned long long) max_possible_sectors);
+ if (nbc->dc.meta_dev_idx >= 0)
+ dev_warn(DEV, "==>> using internal or flexible "
+ "meta data may help <<==\n");
+ }
+
+ drbd_suspend_io(mdev);
+ /* also wait for the last barrier ack. */
+ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
+ /* and for any other previously queued work */
+ drbd_flush_workqueue(mdev);
+
+ retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+ drbd_resume_io(mdev);
+ if (retcode < SS_SUCCESS)
+ goto release_bdev2_fail;
+
+ if (!get_ldev_if_state(mdev, D_ATTACHING))
+ goto force_diskless;
+
+ drbd_md_set_sector_offsets(mdev, nbc);
+
+ if (!mdev->bitmap) {
+ if (drbd_bm_init(mdev)) {
+ retcode = ERR_NOMEM;
+ goto force_diskless_dec;
+ }
+ }
+
+ retcode = drbd_md_read(mdev, nbc);
+ if (retcode != NO_ERROR)
+ goto force_diskless_dec;
+
+ if (mdev->state.conn < C_CONNECTED &&
+ mdev->state.role == R_PRIMARY &&
+ (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
+ dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
+ (unsigned long long)mdev->ed_uuid);
+ retcode = ERR_DATA_NOT_CURRENT;
+ goto force_diskless_dec;
+ }
+
+ /* Since we are diskless, fix the activity log first... */
+ if (drbd_check_al_size(mdev)) {
+ retcode = ERR_NOMEM;
+ goto force_diskless_dec;
+ }
+
+ /* Prevent shrinking of consistent devices ! */
+ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
+ drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+ dev_warn(DEV, "refusing to truncate a consistent device\n");
+ retcode = ERR_DISK_TO_SMALL;
+ goto force_diskless_dec;
+ }
+
+ if (!drbd_al_read_log(mdev, nbc)) {
+ retcode = ERR_IO_MD_DISK;
+ goto force_diskless_dec;
+ }
+
+ /* allocate a second IO page if logical_block_size != 512 */
+ logical_block_size = bdev_logical_block_size(nbc->md_bdev);
+ if (logical_block_size == 0)
+ logical_block_size = MD_SECTOR_SIZE;
+
+ if (logical_block_size != MD_SECTOR_SIZE) {
+ if (!mdev->md_io_tmpp) {
+ struct page *page = alloc_page(GFP_NOIO);
+ if (!page)
+ goto force_diskless_dec;
+
+ dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
+ logical_block_size, MD_SECTOR_SIZE);
+ dev_warn(DEV, "Workaround engaged (has performance impact).\n");
+
+ mdev->md_io_tmpp = page;
+ }
+ }
+
+ /* Reset the "barriers don't work" bits here, then force meta data to
+ * be written, to ensure we determine if barriers are supported. */
+ if (nbc->dc.no_md_flush)
+ set_bit(MD_NO_BARRIER, &mdev->flags);
+ else
+ clear_bit(MD_NO_BARRIER, &mdev->flags);
+
+ /* Point of no return reached.
+ * Devices and memory are no longer released by error cleanup below.
+ * now mdev takes over responsibility, and the state engine should
+ * clean it up somewhere. */
+ D_ASSERT(mdev->ldev == NULL);
+ mdev->ldev = nbc;
+ mdev->resync = resync_lru;
+ nbc = NULL;
+ resync_lru = NULL;
+
+ mdev->write_ordering = WO_bio_barrier;
+ drbd_bump_write_ordering(mdev, WO_bio_barrier);
+
+ if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
+ set_bit(CRASHED_PRIMARY, &mdev->flags);
+ else
+ clear_bit(CRASHED_PRIMARY, &mdev->flags);
+
+ if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
+ set_bit(CRASHED_PRIMARY, &mdev->flags);
+ cp_discovered = 1;
+ }
+
+ mdev->send_cnt = 0;
+ mdev->recv_cnt = 0;
+ mdev->read_cnt = 0;
+ mdev->writ_cnt = 0;
+
+ drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
+
+ /* If I am currently not R_PRIMARY,
+ * but meta data primary indicator is set,
+ * I just now recover from a hard crash,
+ * and have been R_PRIMARY before that crash.
+ *
+ * Now, if I had no connection before that crash
+ * (have been degraded R_PRIMARY), chances are that
+ * I won't find my peer now either.
+ *
+ * In that case, and _only_ in that case,
+ * we use the degr-wfc-timeout instead of the default,
+ * so we can automatically recover from a crash of a
+ * degraded but active "cluster" after a certain timeout.
+ */
+ clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+ if (mdev->state.role != R_PRIMARY &&
+ drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+ !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
+ set_bit(USE_DEGR_WFC_T, &mdev->flags);
+
+ dd = drbd_determin_dev_size(mdev);
+ if (dd == dev_size_error) {
+ retcode = ERR_NOMEM_BITMAP;
+ goto force_diskless_dec;
+ } else if (dd == grew)
+ set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+
+ if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+ dev_info(DEV, "Assuming that all blocks are out of sync "
+ "(aka FullSync)\n");
+ if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
+ retcode = ERR_IO_MD_DISK;
+ goto force_diskless_dec;
+ }
+ } else {
+ if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
+ retcode = ERR_IO_MD_DISK;
+ goto force_diskless_dec;
+ }
+ }
+
+ if (cp_discovered) {
+ drbd_al_apply_to_bm(mdev);
+ drbd_al_to_on_disk_bm(mdev);
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ os = mdev->state;
+ ns.i = os.i;
+ /* If MDF_CONSISTENT is not set go into inconsistent state,
+ otherwise investigate MDF_WasUpToDate...
+ If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
+ otherwise into D_CONSISTENT state.
+ */
+ if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
+ ns.disk = D_CONSISTENT;
+ else
+ ns.disk = D_OUTDATED;
+ } else {
+ ns.disk = D_INCONSISTENT;
+ }
+
+ if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
+ ns.pdsk = D_OUTDATED;
+
+ if ( ns.disk == D_CONSISTENT &&
+ (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
+ ns.disk = D_UP_TO_DATE;
+
+ /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
+ MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
+ this point, because drbd_request_state() modifies these
+ flags. */
+
+ /* In case we are C_CONNECTED postpone any decision on the new disk
+ state after the negotiation phase. */
+ if (mdev->state.conn == C_CONNECTED) {
+ mdev->new_state_tmp.i = ns.i;
+ ns.i = os.i;
+ ns.disk = D_NEGOTIATING;
+ }
+
+ rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+ ns = mdev->state;
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (rv < SS_SUCCESS)
+ goto force_diskless_dec;
+
+ if (mdev->state.role == R_PRIMARY)
+ mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
+ else
+ mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+
+ drbd_md_mark_dirty(mdev);
+ drbd_md_sync(mdev);
+
+ kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+ put_ldev(mdev);
+ reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
+ return 0;
+
+ force_diskless_dec:
+ put_ldev(mdev);
+ force_diskless:
+ drbd_force_state(mdev, NS(disk, D_DISKLESS));
+ drbd_md_sync(mdev);
+ release_bdev2_fail:
+ if (nbc)
+ bd_release(nbc->md_bdev);
+ release_bdev_fail:
+ if (nbc)
+ bd_release(nbc->backing_bdev);
+ fail:
+ if (nbc) {
+ if (nbc->lo_file)
+ fput(nbc->lo_file);
+ if (nbc->md_file)
+ fput(nbc->md_file);
+ kfree(nbc);
+ }
+ lc_destroy(resync_lru);
+
+ reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
+ return 0;
+}
+
+static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+ return 0;
+}
+
+static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int i, ns;
+ enum drbd_ret_codes retcode;
+ struct net_conf *new_conf = NULL;
+ struct crypto_hash *tfm = NULL;
+ struct crypto_hash *integrity_w_tfm = NULL;
+ struct crypto_hash *integrity_r_tfm = NULL;
+ struct hlist_head *new_tl_hash = NULL;
+ struct hlist_head *new_ee_hash = NULL;
+ struct drbd_conf *odev;
+ char hmac_name[CRYPTO_MAX_ALG_NAME];
+ void *int_dig_out = NULL;
+ void *int_dig_in = NULL;
+ void *int_dig_vv = NULL;
+ struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+
+ drbd_reconfig_start(mdev);
+
+ if (mdev->state.conn > C_STANDALONE) {
+ retcode = ERR_NET_CONFIGURED;
+ goto fail;
+ }
+
+ /* allocation not in the IO path, cqueue thread context */
+ new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
+ if (!new_conf) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+
+ memset(new_conf, 0, sizeof(struct net_conf));
+ new_conf->timeout = DRBD_TIMEOUT_DEF;
+ new_conf->try_connect_int = DRBD_CONNECT_INT_DEF;
+ new_conf->ping_int = DRBD_PING_INT_DEF;
+ new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
+ new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF;
+ new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
+ new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
+ new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF;
+ new_conf->ko_count = DRBD_KO_COUNT_DEF;
+ new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
+ new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
+ new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
+ new_conf->want_lose = 0;
+ new_conf->two_primaries = 0;
+ new_conf->wire_protocol = DRBD_PROT_C;
+ new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
+ new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
+
+ if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
+ retcode = ERR_MANDATORY_TAG;
+ goto fail;
+ }
+
+ if (new_conf->two_primaries
+ && (new_conf->wire_protocol != DRBD_PROT_C)) {
+ retcode = ERR_NOT_PROTO_C;
+ goto fail;
+ };
+
+ if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
+ retcode = ERR_DISCARD;
+ goto fail;
+ }
+
+ retcode = NO_ERROR;
+
+ new_my_addr = (struct sockaddr *)&new_conf->my_addr;
+ new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
+ for (i = 0; i < minor_count; i++) {
+ odev = minor_to_mdev(i);
+ if (!odev || odev == mdev)
+ continue;
+ if (get_net_conf(odev)) {
+ taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
+ if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
+ !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
+ retcode = ERR_LOCAL_ADDR;
+
+ taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
+ if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
+ !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
+ retcode = ERR_PEER_ADDR;
+
+ put_net_conf(odev);
+ if (retcode != NO_ERROR)
+ goto fail;
+ }
+ }
+
+ if (new_conf->cram_hmac_alg[0] != 0) {
+ snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+ new_conf->cram_hmac_alg);
+ tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm)) {
+ tfm = NULL;
+ retcode = ERR_AUTH_ALG;
+ goto fail;
+ }
+
+ if (crypto_tfm_alg_type(crypto_hash_tfm(tfm))
+ != CRYPTO_ALG_TYPE_HASH) {
+ retcode = ERR_AUTH_ALG_ND;
+ goto fail;
+ }
+ }
+
+ if (new_conf->integrity_alg[0]) {
+ integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(integrity_w_tfm)) {
+ integrity_w_tfm = NULL;
+ retcode=ERR_INTEGRITY_ALG;
+ goto fail;
+ }
+
+ if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
+ retcode=ERR_INTEGRITY_ALG_ND;
+ goto fail;
+ }
+
+ integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(integrity_r_tfm)) {
+ integrity_r_tfm = NULL;
+ retcode=ERR_INTEGRITY_ALG;
+ goto fail;
+ }
+ }
+
+ ns = new_conf->max_epoch_size/8;
+ if (mdev->tl_hash_s != ns) {
+ new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+ if (!new_tl_hash) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ }
+
+ ns = new_conf->max_buffers/8;
+ if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
+ new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+ if (!new_ee_hash) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ }
+
+ ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
+
+ if (integrity_w_tfm) {
+ i = crypto_hash_digestsize(integrity_w_tfm);
+ int_dig_out = kmalloc(i, GFP_KERNEL);
+ if (!int_dig_out) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ int_dig_in = kmalloc(i, GFP_KERNEL);
+ if (!int_dig_in) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ int_dig_vv = kmalloc(i, GFP_KERNEL);
+ if (!int_dig_vv) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ }
+
+ if (!mdev->bitmap) {
+ if(drbd_bm_init(mdev)) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->net_conf != NULL) {
+ retcode = ERR_NET_CONFIGURED;
+ spin_unlock_irq(&mdev->req_lock);
+ goto fail;
+ }
+ mdev->net_conf = new_conf;
+
+ mdev->send_cnt = 0;
+ mdev->recv_cnt = 0;
+
+ if (new_tl_hash) {
+ kfree(mdev->tl_hash);
+ mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
+ mdev->tl_hash = new_tl_hash;
+ }
+
+ if (new_ee_hash) {
+ kfree(mdev->ee_hash);
+ mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
+ mdev->ee_hash = new_ee_hash;
+ }
+
+ crypto_free_hash(mdev->cram_hmac_tfm);
+ mdev->cram_hmac_tfm = tfm;
+
+ crypto_free_hash(mdev->integrity_w_tfm);
+ mdev->integrity_w_tfm = integrity_w_tfm;
+
+ crypto_free_hash(mdev->integrity_r_tfm);
+ mdev->integrity_r_tfm = integrity_r_tfm;
+
+ kfree(mdev->int_dig_out);
+ kfree(mdev->int_dig_in);
+ kfree(mdev->int_dig_vv);
+ mdev->int_dig_out=int_dig_out;
+ mdev->int_dig_in=int_dig_in;
+ mdev->int_dig_vv=int_dig_vv;
+ spin_unlock_irq(&mdev->req_lock);
+
+ retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
+
+ kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+ reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
+ return 0;
+
+fail:
+ kfree(int_dig_out);
+ kfree(int_dig_in);
+ kfree(int_dig_vv);
+ crypto_free_hash(tfm);
+ crypto_free_hash(integrity_w_tfm);
+ crypto_free_hash(integrity_r_tfm);
+ kfree(new_tl_hash);
+ kfree(new_ee_hash);
+ kfree(new_conf);
+
+ reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
+ return 0;
+}
+
+static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode;
+
+ retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
+
+ if (retcode == SS_NOTHING_TO_DO)
+ goto done;
+ else if (retcode == SS_ALREADY_STANDALONE)
+ goto done;
+ else if (retcode == SS_PRIMARY_NOP) {
+ /* Our statche checking code wants to see the peer outdated. */
+ retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+ pdsk, D_OUTDATED));
+ } else if (retcode == SS_CW_FAILED_BY_PEER) {
+ /* The peer probably wants to see us outdated. */
+ retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+ disk, D_OUTDATED),
+ CS_ORDERED);
+ if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ retcode = SS_SUCCESS;
+ }
+ }
+
+ if (retcode < SS_SUCCESS)
+ goto fail;
+
+ if (wait_event_interruptible(mdev->state_wait,
+ mdev->state.conn != C_DISCONNECTING)) {
+ /* Do not test for mdev->state.conn == C_STANDALONE, since
+ someone else might connect us in the mean time! */
+ retcode = ERR_INTR;
+ goto fail;
+ }
+
+ done:
+ retcode = NO_ERROR;
+ fail:
+ drbd_md_sync(mdev);
+ reply->ret_code = retcode;
+ return 0;
+}
+
+void resync_after_online_grow(struct drbd_conf *mdev)
+{
+ int iass; /* I am sync source */
+
+ dev_info(DEV, "Resync of new storage after online grow\n");
+ if (mdev->state.role != mdev->state.peer)
+ iass = (mdev->state.role == R_PRIMARY);
+ else
+ iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+ if (iass)
+ drbd_start_resync(mdev, C_SYNC_SOURCE);
+ else
+ _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
+}
+
+static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ struct resize rs;
+ int retcode = NO_ERROR;
+ int ldsc = 0; /* local disk size changed */
+ enum determine_dev_size dd;
+
+ memset(&rs, 0, sizeof(struct resize));
+ if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
+ retcode = ERR_MANDATORY_TAG;
+ goto fail;
+ }
+
+ if (mdev->state.conn > C_CONNECTED) {
+ retcode = ERR_RESIZE_RESYNC;
+ goto fail;
+ }
+
+ if (mdev->state.role == R_SECONDARY &&
+ mdev->state.peer == R_SECONDARY) {
+ retcode = ERR_NO_PRIMARY;
+ goto fail;
+ }
+
+ if (!get_ldev(mdev)) {
+ retcode = ERR_NO_DISK;
+ goto fail;
+ }
+
+ if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+ mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+ ldsc = 1;
+ }
+
+ mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
+ dd = drbd_determin_dev_size(mdev);
+ drbd_md_sync(mdev);
+ put_ldev(mdev);
+ if (dd == dev_size_error) {
+ retcode = ERR_NOMEM_BITMAP;
+ goto fail;
+ }
+
+ if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) {
+ if (dd == grew)
+ set_bit(RESIZE_PENDING, &mdev->flags);
+
+ drbd_send_uuids(mdev);
+ drbd_send_sizes(mdev, 1);
+ }
+
+ fail:
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode = NO_ERROR;
+ int err;
+ int ovr; /* online verify running */
+ int rsr; /* re-sync running */
+ struct crypto_hash *verify_tfm = NULL;
+ struct crypto_hash *csums_tfm = NULL;
+ struct syncer_conf sc;
+ cpumask_var_t new_cpu_mask;
+
+ if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+
+ if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
+ memset(&sc, 0, sizeof(struct syncer_conf));
+ sc.rate = DRBD_RATE_DEF;
+ sc.after = DRBD_AFTER_DEF;
+ sc.al_extents = DRBD_AL_EXTENTS_DEF;
+ } else
+ memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+
+ if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
+ retcode = ERR_MANDATORY_TAG;
+ goto fail;
+ }
+
+ /* re-sync running */
+ rsr = ( mdev->state.conn == C_SYNC_SOURCE ||
+ mdev->state.conn == C_SYNC_TARGET ||
+ mdev->state.conn == C_PAUSED_SYNC_S ||
+ mdev->state.conn == C_PAUSED_SYNC_T );
+
+ if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
+ retcode = ERR_CSUMS_RESYNC_RUNNING;
+ goto fail;
+ }
+
+ if (!rsr && sc.csums_alg[0]) {
+ csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(csums_tfm)) {
+ csums_tfm = NULL;
+ retcode = ERR_CSUMS_ALG;
+ goto fail;
+ }
+
+ if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
+ retcode = ERR_CSUMS_ALG_ND;
+ goto fail;
+ }
+ }
+
+ /* online verify running */
+ ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
+
+ if (ovr) {
+ if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
+ retcode = ERR_VERIFY_RUNNING;
+ goto fail;
+ }
+ }
+
+ if (!ovr && sc.verify_alg[0]) {
+ verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(verify_tfm)) {
+ verify_tfm = NULL;
+ retcode = ERR_VERIFY_ALG;
+ goto fail;
+ }
+
+ if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
+ retcode = ERR_VERIFY_ALG_ND;
+ goto fail;
+ }
+ }
+
+ /* silently ignore cpu mask on UP kernel */
+ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
+ err = __bitmap_parse(sc.cpu_mask, 32, 0,
+ cpumask_bits(new_cpu_mask), nr_cpu_ids);
+ if (err) {
+ dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+ retcode = ERR_CPU_MASK_PARSE;
+ goto fail;
+ }
+ }
+
+ ERR_IF (sc.rate < 1) sc.rate = 1;
+ ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
+#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
+ if (sc.al_extents > AL_MAX) {
+ dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
+ sc.al_extents = AL_MAX;
+ }
+#undef AL_MAX
+
+ /* most sanity checks done, try to assign the new sync-after
+ * dependency. need to hold the global lock in there,
+ * to avoid a race in the dependency loop check. */
+ retcode = drbd_alter_sa(mdev, sc.after);
+ if (retcode != NO_ERROR)
+ goto fail;
+
+ /* ok, assign the rest of it as well.
+ * lock against receive_SyncParam() */
+ spin_lock(&mdev->peer_seq_lock);
+ mdev->sync_conf = sc;
+
+ if (!rsr) {
+ crypto_free_hash(mdev->csums_tfm);
+ mdev->csums_tfm = csums_tfm;
+ csums_tfm = NULL;
+ }
+
+ if (!ovr) {
+ crypto_free_hash(mdev->verify_tfm);
+ mdev->verify_tfm = verify_tfm;
+ verify_tfm = NULL;
+ }
+ spin_unlock(&mdev->peer_seq_lock);
+
+ if (get_ldev(mdev)) {
+ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+ drbd_al_shrink(mdev);
+ err = drbd_check_al_size(mdev);
+ lc_unlock(mdev->act_log);
+ wake_up(&mdev->al_wait);
+
+ put_ldev(mdev);
+ drbd_md_sync(mdev);
+
+ if (err) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ }
+
+ if (mdev->state.conn >= C_CONNECTED)
+ drbd_send_sync_param(mdev, &sc);
+
+ if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
+ cpumask_copy(mdev->cpu_mask, new_cpu_mask);
+ drbd_calc_cpu_mask(mdev);
+ mdev->receiver.reset_cpu_mask = 1;
+ mdev->asender.reset_cpu_mask = 1;
+ mdev->worker.reset_cpu_mask = 1;
+ }
+
+ kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+fail:
+ free_cpumask_var(new_cpu_mask);
+ crypto_free_hash(csums_tfm);
+ crypto_free_hash(verify_tfm);
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode;
+
+ retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
+
+ if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
+ retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+
+ while (retcode == SS_NEED_CONNECTION) {
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.conn < C_CONNECTED)
+ retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (retcode != SS_NEED_CONNECTION)
+ break;
+
+ retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+ }
+
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+
+ reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+
+ return 0;
+}
+
+static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode = NO_ERROR;
+
+ if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+ retcode = ERR_PAUSE_IS_SET;
+
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode = NO_ERROR;
+
+ if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
+ retcode = ERR_PAUSE_IS_CLEAR;
+
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
+
+ return 0;
+}
+
+static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+ return 0;
+}
+
+static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
+ return 0;
+}
+
+static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ unsigned short *tl;
+
+ tl = reply->tag_list;
+
+ if (get_ldev(mdev)) {
+ tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
+ put_ldev(mdev);
+ }
+
+ if (get_net_conf(mdev)) {
+ tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
+ put_net_conf(mdev);
+ }
+ tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
+
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ unsigned short *tl = reply->tag_list;
+ union drbd_state s = mdev->state;
+ unsigned long rs_left;
+ unsigned int res;
+
+ tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
+
+ /* no local ref, no bitmap, no syncer progress. */
+ if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
+ if (get_ldev(mdev)) {
+ drbd_get_syncer_progress(mdev, &rs_left, &res);
+ tl = tl_add_int(tl, T_sync_progress, &res);
+ put_ldev(mdev);
+ }
+ }
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ unsigned short *tl;
+
+ tl = reply->tag_list;
+
+ if (get_ldev(mdev)) {
+ tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
+ tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
+ put_ldev(mdev);
+ }
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+/**
+ * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
+ * @mdev: DRBD device.
+ * @nlp: Netlink/connector packet from drbdsetup
+ * @reply: Reply packet for drbdsetup
+ */
+static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ unsigned short *tl;
+ char rv;
+
+ tl = reply->tag_list;
+
+ rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+ test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+
+ tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ /* default to resume from last known position, if possible */
+ struct start_ov args =
+ { .start_sector = mdev->ov_start_sector };
+
+ if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
+ reply->ret_code = ERR_MANDATORY_TAG;
+ return 0;
+ }
+ /* w_make_ov_request expects position to be aligned */
+ mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
+ reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+ return 0;
+}
+
+
+static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+ struct drbd_nl_cfg_reply *reply)
+{
+ int retcode = NO_ERROR;
+ int skip_initial_sync = 0;
+ int err;
+
+ struct new_c_uuid args;
+
+ memset(&args, 0, sizeof(struct new_c_uuid));
+ if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
+ reply->ret_code = ERR_MANDATORY_TAG;
+ return 0;
+ }
+
+ mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
+
+ if (!get_ldev(mdev)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ /* this is "skip initial sync", assume to be clean */
+ if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
+ mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
+ dev_info(DEV, "Preparing to skip initial sync\n");
+ skip_initial_sync = 1;
+ } else if (mdev->state.conn != C_STANDALONE) {
+ retcode = ERR_CONNECTED;
+ goto out_dec;
+ }
+
+ drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
+ drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
+
+ if (args.clear_bm) {
+ err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
+ if (err) {
+ dev_err(DEV, "Writing bitmap failed with %d\n",err);
+ retcode = ERR_IO_MD_DISK;
+ }
+ if (skip_initial_sync) {
+ drbd_send_uuids_skip_initial_sync(mdev);
+ _drbd_uuid_set(mdev, UI_BITMAP, 0);
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+ CS_VERBOSE, NULL);
+ spin_unlock_irq(&mdev->req_lock);
+ }
+ }
+
+ drbd_md_sync(mdev);
+out_dec:
+ put_ldev(mdev);
+out:
+ mutex_unlock(&mdev->state_mutex);
+
+ reply->ret_code = retcode;
+ return 0;
+}
+
+static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
+{
+ struct drbd_conf *mdev;
+
+ if (nlp->drbd_minor >= minor_count)
+ return NULL;
+
+ mdev = minor_to_mdev(nlp->drbd_minor);
+
+ if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
+ struct gendisk *disk = NULL;
+ mdev = drbd_new_device(nlp->drbd_minor);
+
+ spin_lock_irq(&drbd_pp_lock);
+ if (minor_table[nlp->drbd_minor] == NULL) {
+ minor_table[nlp->drbd_minor] = mdev;
+ disk = mdev->vdisk;
+ mdev = NULL;
+ } /* else: we lost the race */
+ spin_unlock_irq(&drbd_pp_lock);
+
+ if (disk) /* we won the race above */
+ /* in case we ever add a drbd_delete_device(),
+ * don't forget the del_gendisk! */
+ add_disk(disk);
+ else /* we lost the race above */
+ drbd_free_mdev(mdev);
+
+ mdev = minor_to_mdev(nlp->drbd_minor);
+ }
+
+ return mdev;
+}
+
+struct cn_handler_struct {
+ int (*function)(struct drbd_conf *,
+ struct drbd_nl_cfg_req *,
+ struct drbd_nl_cfg_reply *);
+ int reply_body_size;
+};
+
+static struct cn_handler_struct cnd_table[] = {
+ [ P_primary ] = { &drbd_nl_primary, 0 },
+ [ P_secondary ] = { &drbd_nl_secondary, 0 },
+ [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 },
+ [ P_detach ] = { &drbd_nl_detach, 0 },
+ [ P_net_conf ] = { &drbd_nl_net_conf, 0 },
+ [ P_disconnect ] = { &drbd_nl_disconnect, 0 },
+ [ P_resize ] = { &drbd_nl_resize, 0 },
+ [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 },
+ [ P_invalidate ] = { &drbd_nl_invalidate, 0 },
+ [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 },
+ [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 },
+ [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 },
+ [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 },
+ [ P_resume_io ] = { &drbd_nl_resume_io, 0 },
+ [ P_outdate ] = { &drbd_nl_outdate, 0 },
+ [ P_get_config ] = { &drbd_nl_get_config,
+ sizeof(struct syncer_conf_tag_len_struct) +
+ sizeof(struct disk_conf_tag_len_struct) +
+ sizeof(struct net_conf_tag_len_struct) },
+ [ P_get_state ] = { &drbd_nl_get_state,
+ sizeof(struct get_state_tag_len_struct) +
+ sizeof(struct sync_progress_tag_len_struct) },
+ [ P_get_uuids ] = { &drbd_nl_get_uuids,
+ sizeof(struct get_uuids_tag_len_struct) },
+ [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag,
+ sizeof(struct get_timeout_flag_tag_len_struct)},
+ [ P_start_ov ] = { &drbd_nl_start_ov, 0 },
+ [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 },
+};
+
+static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+{
+ struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
+ struct cn_handler_struct *cm;
+ struct cn_msg *cn_reply;
+ struct drbd_nl_cfg_reply *reply;
+ struct drbd_conf *mdev;
+ int retcode, rr;
+ int reply_size = sizeof(struct cn_msg)
+ + sizeof(struct drbd_nl_cfg_reply)
+ + sizeof(short int);
+
+ if (!try_module_get(THIS_MODULE)) {
+ printk(KERN_ERR "drbd: try_module_get() failed!\n");
+ return;
+ }
+
+ if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
+ retcode = ERR_PERM;
+ goto fail;
+ }
+
+ mdev = ensure_mdev(nlp);
+ if (!mdev) {
+ retcode = ERR_MINOR_INVALID;
+ goto fail;
+ }
+
+ if (nlp->packet_type >= P_nl_after_last_packet) {
+ retcode = ERR_PACKET_NR;
+ goto fail;
+ }
+
+ cm = cnd_table + nlp->packet_type;
+
+ /* This may happen if packet number is 0: */
+ if (cm->function == NULL) {
+ retcode = ERR_PACKET_NR;
+ goto fail;
+ }
+
+ reply_size += cm->reply_body_size;
+
+ /* allocation not in the IO path, cqueue thread context */
+ cn_reply = kmalloc(reply_size, GFP_KERNEL);
+ if (!cn_reply) {
+ retcode = ERR_NOMEM;
+ goto fail;
+ }
+ reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
+
+ reply->packet_type =
+ cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
+ reply->minor = nlp->drbd_minor;
+ reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
+ /* reply->tag_list; might be modified by cm->function. */
+
+ rr = cm->function(mdev, nlp, reply);
+
+ cn_reply->id = req->id;
+ cn_reply->seq = req->seq;
+ cn_reply->ack = req->ack + 1;
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
+ cn_reply->flags = 0;
+
+ rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
+ if (rr && rr != -ESRCH)
+ printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+
+ kfree(cn_reply);
+ module_put(THIS_MODULE);
+ return;
+ fail:
+ drbd_nl_send_reply(req, retcode);
+ module_put(THIS_MODULE);
+}
+
+static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
+
+static unsigned short *
+__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
+ unsigned short len, int nul_terminated)
+{
+ unsigned short l = tag_descriptions[tag_number(tag)].max_len;
+ len = (len < l) ? len : l;
+ put_unaligned(tag, tl++);
+ put_unaligned(len, tl++);
+ memcpy(tl, data, len);
+ tl = (unsigned short*)((char*)tl + len);
+ if (nul_terminated)
+ *((char*)tl - 1) = 0;
+ return tl;
+}
+
+static unsigned short *
+tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
+{
+ return __tl_add_blob(tl, tag, data, len, 0);
+}
+
+static unsigned short *
+tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
+{
+ return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
+}
+
+static unsigned short *
+tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
+{
+ put_unaligned(tag, tl++);
+ switch(tag_type(tag)) {
+ case TT_INTEGER:
+ put_unaligned(sizeof(int), tl++);
+ put_unaligned(*(int *)val, (int *)tl);
+ tl = (unsigned short*)((char*)tl+sizeof(int));
+ break;
+ case TT_INT64:
+ put_unaligned(sizeof(u64), tl++);
+ put_unaligned(*(u64 *)val, (u64 *)tl);
+ tl = (unsigned short*)((char*)tl+sizeof(u64));
+ break;
+ default:
+ /* someone did something stupid. */
+ ;
+ }
+ return tl;
+}
+
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
+{
+ char buffer[sizeof(struct cn_msg)+
+ sizeof(struct drbd_nl_cfg_reply)+
+ sizeof(struct get_state_tag_len_struct)+
+ sizeof(short int)];
+ struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+ struct drbd_nl_cfg_reply *reply =
+ (struct drbd_nl_cfg_reply *)cn_reply->data;
+ unsigned short *tl = reply->tag_list;
+
+ /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+ tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
+
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ cn_reply->id.idx = CN_IDX_DRBD;
+ cn_reply->id.val = CN_VAL_DRBD;
+
+ cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+ cn_reply->ack = 0; /* not used here. */
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+ (int)((char *)tl - (char *)reply->tag_list);
+ cn_reply->flags = 0;
+
+ reply->packet_type = P_get_state;
+ reply->minor = mdev_to_minor(mdev);
+ reply->ret_code = NO_ERROR;
+
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
+{
+ char buffer[sizeof(struct cn_msg)+
+ sizeof(struct drbd_nl_cfg_reply)+
+ sizeof(struct call_helper_tag_len_struct)+
+ sizeof(short int)];
+ struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+ struct drbd_nl_cfg_reply *reply =
+ (struct drbd_nl_cfg_reply *)cn_reply->data;
+ unsigned short *tl = reply->tag_list;
+
+ /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+ tl = tl_add_str(tl, T_helper, helper_name);
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ cn_reply->id.idx = CN_IDX_DRBD;
+ cn_reply->id.val = CN_VAL_DRBD;
+
+ cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+ cn_reply->ack = 0; /* not used here. */
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+ (int)((char *)tl - (char *)reply->tag_list);
+ cn_reply->flags = 0;
+
+ reply->packet_type = P_call_helper;
+ reply->minor = mdev_to_minor(mdev);
+ reply->ret_code = NO_ERROR;
+
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ee(struct drbd_conf *mdev,
+ const char *reason, const int dgs,
+ const char* seen_hash, const char* calc_hash,
+ const struct drbd_epoch_entry* e)
+{
+ struct cn_msg *cn_reply;
+ struct drbd_nl_cfg_reply *reply;
+ struct bio_vec *bvec;
+ unsigned short *tl;
+ int i;
+
+ if (!e)
+ return;
+ if (!reason || !reason[0])
+ return;
+
+ /* apparently we have to memcpy twice, first to prepare the data for the
+ * struct cn_msg, then within cn_netlink_send from the cn_msg to the
+ * netlink skb. */
+ /* receiver thread context, which is not in the writeout path (of this node),
+ * but may be in the writeout path of the _other_ node.
+ * GFP_NOIO to avoid potential "distributed deadlock". */
+ cn_reply = kmalloc(
+ sizeof(struct cn_msg)+
+ sizeof(struct drbd_nl_cfg_reply)+
+ sizeof(struct dump_ee_tag_len_struct)+
+ sizeof(short int),
+ GFP_NOIO);
+
+ if (!cn_reply) {
+ dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
+ (unsigned long long)e->sector, e->size);
+ return;
+ }
+
+ reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
+ tl = reply->tag_list;
+
+ tl = tl_add_str(tl, T_dump_ee_reason, reason);
+ tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
+ tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
+ tl = tl_add_int(tl, T_ee_sector, &e->sector);
+ tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
+
+ put_unaligned(T_ee_data, tl++);
+ put_unaligned(e->size, tl++);
+
+ __bio_for_each_segment(bvec, e->private_bio, i, 0) {
+ void *d = kmap(bvec->bv_page);
+ memcpy(tl, d + bvec->bv_offset, bvec->bv_len);
+ kunmap(bvec->bv_page);
+ tl=(unsigned short*)((char*)tl + bvec->bv_len);
+ }
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ cn_reply->id.idx = CN_IDX_DRBD;
+ cn_reply->id.val = CN_VAL_DRBD;
+
+ cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
+ cn_reply->ack = 0; // not used here.
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+ (int)((char*)tl - (char*)reply->tag_list);
+ cn_reply->flags = 0;
+
+ reply->packet_type = P_dump_ee;
+ reply->minor = mdev_to_minor(mdev);
+ reply->ret_code = NO_ERROR;
+
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ kfree(cn_reply);
+}
+
+void drbd_bcast_sync_progress(struct drbd_conf *mdev)
+{
+ char buffer[sizeof(struct cn_msg)+
+ sizeof(struct drbd_nl_cfg_reply)+
+ sizeof(struct sync_progress_tag_len_struct)+
+ sizeof(short int)];
+ struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+ struct drbd_nl_cfg_reply *reply =
+ (struct drbd_nl_cfg_reply *)cn_reply->data;
+ unsigned short *tl = reply->tag_list;
+ unsigned long rs_left;
+ unsigned int res;
+
+ /* no local ref, no bitmap, no syncer progress, no broadcast. */
+ if (!get_ldev(mdev))
+ return;
+ drbd_get_syncer_progress(mdev, &rs_left, &res);
+ put_ldev(mdev);
+
+ tl = tl_add_int(tl, T_sync_progress, &res);
+ put_unaligned(TT_END, tl++); /* Close the tag list */
+
+ cn_reply->id.idx = CN_IDX_DRBD;
+ cn_reply->id.val = CN_VAL_DRBD;
+
+ cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+ cn_reply->ack = 0; /* not used here. */
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+ (int)((char *)tl - (char *)reply->tag_list);
+ cn_reply->flags = 0;
+
+ reply->packet_type = P_sync_progress;
+ reply->minor = mdev_to_minor(mdev);
+ reply->ret_code = NO_ERROR;
+
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+int __init drbd_nl_init(void)
+{
+ static struct cb_id cn_id_drbd;
+ int err, try=10;
+
+ cn_id_drbd.val = CN_VAL_DRBD;
+ do {
+ cn_id_drbd.idx = cn_idx;
+ err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
+ if (!err)
+ break;
+ cn_idx = (cn_idx + CN_IDX_STEP);
+ } while (try--);
+
+ if (err) {
+ printk(KERN_ERR "drbd: cn_drbd failed to register\n");
+ return err;
+ }
+
+ return 0;
+}
+
+void drbd_nl_cleanup(void)
+{
+ static struct cb_id cn_id_drbd;
+
+ cn_id_drbd.idx = cn_idx;
+ cn_id_drbd.val = CN_VAL_DRBD;
+
+ cn_del_callback(&cn_id_drbd);
+}
+
+void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
+{
+ char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
+ struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+ struct drbd_nl_cfg_reply *reply =
+ (struct drbd_nl_cfg_reply *)cn_reply->data;
+ int rr;
+
+ cn_reply->id = req->id;
+
+ cn_reply->seq = req->seq;
+ cn_reply->ack = req->ack + 1;
+ cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
+ cn_reply->flags = 0;
+
+ reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
+ reply->ret_code = ret_code;
+
+ rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ if (rr && rr != -ESRCH)
+ printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+}
+
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
new file mode 100644
index 000000000000..bdd0b4943b10
--- /dev/null
+++ b/drivers/block/drbd/drbd_proc.c
@@ -0,0 +1,265 @@
+/*
+ drbd_proc.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+
+static int drbd_proc_open(struct inode *inode, struct file *file);
+
+
+struct proc_dir_entry *drbd_proc;
+struct file_operations drbd_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = drbd_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+
+/*lge
+ * progress bars shamelessly adapted from driver/md/md.c
+ * output looks like
+ * [=====>..............] 33.5% (23456/123456)
+ * finish: 2:20:20 speed: 6,345 (6,456) K/sec
+ */
+static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
+{
+ unsigned long db, dt, dbdt, rt, rs_left;
+ unsigned int res;
+ int i, x, y;
+
+ drbd_get_syncer_progress(mdev, &rs_left, &res);
+
+ x = res/50;
+ y = 20-x;
+ seq_printf(seq, "\t[");
+ for (i = 1; i < x; i++)
+ seq_printf(seq, "=");
+ seq_printf(seq, ">");
+ for (i = 0; i < y; i++)
+ seq_printf(seq, ".");
+ seq_printf(seq, "] ");
+
+ seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
+ /* if more than 1 GB display in MB */
+ if (mdev->rs_total > 0x100000L)
+ seq_printf(seq, "(%lu/%lu)M\n\t",
+ (unsigned long) Bit2KB(rs_left >> 10),
+ (unsigned long) Bit2KB(mdev->rs_total >> 10));
+ else
+ seq_printf(seq, "(%lu/%lu)K\n\t",
+ (unsigned long) Bit2KB(rs_left),
+ (unsigned long) Bit2KB(mdev->rs_total));
+
+ /* see drivers/md/md.c
+ * We do not want to overflow, so the order of operands and
+ * the * 100 / 100 trick are important. We do a +1 to be
+ * safe against division by zero. We only estimate anyway.
+ *
+ * dt: time from mark until now
+ * db: blocks written from mark until now
+ * rt: remaining time
+ */
+ dt = (jiffies - mdev->rs_mark_time) / HZ;
+
+ if (dt > 20) {
+ /* if we made no update to rs_mark_time for too long,
+ * we are stalled. show that. */
+ seq_printf(seq, "stalled\n");
+ return;
+ }
+
+ if (!dt)
+ dt++;
+ db = mdev->rs_mark_left - rs_left;
+ rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
+
+ seq_printf(seq, "finish: %lu:%02lu:%02lu",
+ rt / 3600, (rt % 3600) / 60, rt % 60);
+
+ /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
+ dbdt = Bit2KB(db/dt);
+ if (dbdt > 1000)
+ seq_printf(seq, " speed: %ld,%03ld",
+ dbdt/1000, dbdt % 1000);
+ else
+ seq_printf(seq, " speed: %ld", dbdt);
+
+ /* mean speed since syncer started
+ * we do account for PausedSync periods */
+ dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+ if (dt <= 0)
+ dt = 1;
+ db = mdev->rs_total - rs_left;
+ dbdt = Bit2KB(db/dt);
+ if (dbdt > 1000)
+ seq_printf(seq, " (%ld,%03ld)",
+ dbdt/1000, dbdt % 1000);
+ else
+ seq_printf(seq, " (%ld)", dbdt);
+
+ seq_printf(seq, " K/sec\n");
+}
+
+static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
+{
+ struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+ seq_printf(seq, "%5d %s %s\n", bme->rs_left,
+ bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
+ bme->flags & BME_LOCKED ? "LOCKED" : "------"
+ );
+}
+
+static int drbd_seq_show(struct seq_file *seq, void *v)
+{
+ int i, hole = 0;
+ const char *sn;
+ struct drbd_conf *mdev;
+
+ static char write_ordering_chars[] = {
+ [WO_none] = 'n',
+ [WO_drain_io] = 'd',
+ [WO_bdev_flush] = 'f',
+ [WO_bio_barrier] = 'b',
+ };
+
+ seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
+ API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
+
+ /*
+ cs .. connection state
+ ro .. node role (local/remote)
+ ds .. disk state (local/remote)
+ protocol
+ various flags
+ ns .. network send
+ nr .. network receive
+ dw .. disk write
+ dr .. disk read
+ al .. activity log write count
+ bm .. bitmap update write count
+ pe .. pending (waiting for ack or data reply)
+ ua .. unack'd (still need to send ack or data reply)
+ ap .. application requests accepted, but not yet completed
+ ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
+ wo .. write ordering mode currently in use
+ oos .. known out-of-sync kB
+ */
+
+ for (i = 0; i < minor_count; i++) {
+ mdev = minor_to_mdev(i);
+ if (!mdev) {
+ hole = 1;
+ continue;
+ }
+ if (hole) {
+ hole = 0;
+ seq_printf(seq, "\n");
+ }
+
+ sn = drbd_conn_str(mdev->state.conn);
+
+ if (mdev->state.conn == C_STANDALONE &&
+ mdev->state.disk == D_DISKLESS &&
+ mdev->state.role == R_SECONDARY) {
+ seq_printf(seq, "%2d: cs:Unconfigured\n", i);
+ } else {
+ seq_printf(seq,
+ "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
+ " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
+ "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
+ i, sn,
+ drbd_role_str(mdev->state.role),
+ drbd_role_str(mdev->state.peer),
+ drbd_disk_str(mdev->state.disk),
+ drbd_disk_str(mdev->state.pdsk),
+ (mdev->net_conf == NULL ? ' ' :
+ (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
+ mdev->state.susp ? 's' : 'r',
+ mdev->state.aftr_isp ? 'a' : '-',
+ mdev->state.peer_isp ? 'p' : '-',
+ mdev->state.user_isp ? 'u' : '-',
+ mdev->congestion_reason ?: '-',
+ mdev->send_cnt/2,
+ mdev->recv_cnt/2,
+ mdev->writ_cnt/2,
+ mdev->read_cnt/2,
+ mdev->al_writ_cnt,
+ mdev->bm_writ_cnt,
+ atomic_read(&mdev->local_cnt),
+ atomic_read(&mdev->ap_pending_cnt) +
+ atomic_read(&mdev->rs_pending_cnt),
+ atomic_read(&mdev->unacked_cnt),
+ atomic_read(&mdev->ap_bio_cnt),
+ mdev->epochs,
+ write_ordering_chars[mdev->write_ordering]
+ );
+ seq_printf(seq, " oos:%lu\n",
+ Bit2KB(drbd_bm_total_weight(mdev)));
+ }
+ if (mdev->state.conn == C_SYNC_SOURCE ||
+ mdev->state.conn == C_SYNC_TARGET)
+ drbd_syncer_progress(mdev, seq);
+
+ if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+ seq_printf(seq, "\t%3d%% %lu/%lu\n",
+ (int)((mdev->rs_total-mdev->ov_left) /
+ (mdev->rs_total/100+1)),
+ mdev->rs_total - mdev->ov_left,
+ mdev->rs_total);
+
+ if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
+ lc_seq_printf_stats(seq, mdev->resync);
+ lc_seq_printf_stats(seq, mdev->act_log);
+ put_ldev(mdev);
+ }
+
+ if (proc_details >= 2) {
+ if (mdev->resync) {
+ lc_seq_dump_details(seq, mdev->resync, "rs_left",
+ resync_dump_detail);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int drbd_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, drbd_seq_show, PDE(inode)->data);
+}
+
+/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
new file mode 100644
index 000000000000..c548f24f54a1
--- /dev/null
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -0,0 +1,4426 @@
+/*
+ drbd_receiver.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <net/sock.h>
+
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/pkt_sched.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+#include "drbd_vli.h"
+
+struct flush_work {
+ struct drbd_work w;
+ struct drbd_epoch *epoch;
+};
+
+enum finish_epoch {
+ FE_STILL_LIVE,
+ FE_DESTROYED,
+ FE_RECYCLED,
+};
+
+static int drbd_do_handshake(struct drbd_conf *mdev);
+static int drbd_do_auth(struct drbd_conf *mdev);
+
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
+static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
+
+static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+{
+ struct drbd_epoch *prev;
+ spin_lock(&mdev->epoch_lock);
+ prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
+ if (prev == epoch || prev == mdev->current_epoch)
+ prev = NULL;
+ spin_unlock(&mdev->epoch_lock);
+ return prev;
+}
+
+#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
+
+static struct page *drbd_pp_first_page_or_try_alloc(struct drbd_conf *mdev)
+{
+ struct page *page = NULL;
+
+ /* Yes, testing drbd_pp_vacant outside the lock is racy.
+ * So what. It saves a spin_lock. */
+ if (drbd_pp_vacant > 0) {
+ spin_lock(&drbd_pp_lock);
+ page = drbd_pp_pool;
+ if (page) {
+ drbd_pp_pool = (struct page *)page_private(page);
+ set_page_private(page, 0); /* just to be polite */
+ drbd_pp_vacant--;
+ }
+ spin_unlock(&drbd_pp_lock);
+ }
+ /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
+ * "criss-cross" setup, that might cause write-out on some other DRBD,
+ * which in turn might block on the other node at this very place. */
+ if (!page)
+ page = alloc_page(GFP_TRY);
+ if (page)
+ atomic_inc(&mdev->pp_in_use);
+ return page;
+}
+
+/* kick lower level device, if we have more than (arbitrary number)
+ * reference counts on it, which typically are locally submitted io
+ * requests. don't use unacked_cnt, so we speed up proto A and B, too. */
+static void maybe_kick_lo(struct drbd_conf *mdev)
+{
+ if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
+ drbd_kick_lo(mdev);
+}
+
+static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
+{
+ struct drbd_epoch_entry *e;
+ struct list_head *le, *tle;
+
+ /* The EEs are always appended to the end of the list. Since
+ they are sent in order over the wire, they have to finish
+ in order. As soon as we see the first not finished we can
+ stop to examine the list... */
+
+ list_for_each_safe(le, tle, &mdev->net_ee) {
+ e = list_entry(le, struct drbd_epoch_entry, w.list);
+ if (drbd_bio_has_active_page(e->private_bio))
+ break;
+ list_move(le, to_be_freed);
+ }
+}
+
+static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
+{
+ LIST_HEAD(reclaimed);
+ struct drbd_epoch_entry *e, *t;
+
+ maybe_kick_lo(mdev);
+ spin_lock_irq(&mdev->req_lock);
+ reclaim_net_ee(mdev, &reclaimed);
+ spin_unlock_irq(&mdev->req_lock);
+
+ list_for_each_entry_safe(e, t, &reclaimed, w.list)
+ drbd_free_ee(mdev, e);
+}
+
+/**
+ * drbd_pp_alloc() - Returns a page, fails only if a signal comes in
+ * @mdev: DRBD device.
+ * @retry: whether or not to retry allocation forever (or until signalled)
+ *
+ * Tries to allocate a page, first from our own page pool, then from the
+ * kernel, unless this allocation would exceed the max_buffers setting.
+ * If @retry is non-zero, retry until DRBD frees a page somewhere else.
+ */
+static struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry)
+{
+ struct page *page = NULL;
+ DEFINE_WAIT(wait);
+
+ if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
+ page = drbd_pp_first_page_or_try_alloc(mdev);
+ if (page)
+ return page;
+ }
+
+ for (;;) {
+ prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
+
+ drbd_kick_lo_and_reclaim_net(mdev);
+
+ if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
+ page = drbd_pp_first_page_or_try_alloc(mdev);
+ if (page)
+ break;
+ }
+
+ if (!retry)
+ break;
+
+ if (signal_pending(current)) {
+ dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
+ break;
+ }
+
+ schedule();
+ }
+ finish_wait(&drbd_pp_wait, &wait);
+
+ return page;
+}
+
+/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
+ * Is also used from inside an other spin_lock_irq(&mdev->req_lock) */
+static void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
+{
+ int free_it;
+
+ spin_lock(&drbd_pp_lock);
+ if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
+ free_it = 1;
+ } else {
+ set_page_private(page, (unsigned long)drbd_pp_pool);
+ drbd_pp_pool = page;
+ drbd_pp_vacant++;
+ free_it = 0;
+ }
+ spin_unlock(&drbd_pp_lock);
+
+ atomic_dec(&mdev->pp_in_use);
+
+ if (free_it)
+ __free_page(page);
+
+ wake_up(&drbd_pp_wait);
+}
+
+static void drbd_pp_free_bio_pages(struct drbd_conf *mdev, struct bio *bio)
+{
+ struct page *p_to_be_freed = NULL;
+ struct page *page;
+ struct bio_vec *bvec;
+ int i;
+
+ spin_lock(&drbd_pp_lock);
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
+ set_page_private(bvec->bv_page, (unsigned long)p_to_be_freed);
+ p_to_be_freed = bvec->bv_page;
+ } else {
+ set_page_private(bvec->bv_page, (unsigned long)drbd_pp_pool);
+ drbd_pp_pool = bvec->bv_page;
+ drbd_pp_vacant++;
+ }
+ }
+ spin_unlock(&drbd_pp_lock);
+ atomic_sub(bio->bi_vcnt, &mdev->pp_in_use);
+
+ while (p_to_be_freed) {
+ page = p_to_be_freed;
+ p_to_be_freed = (struct page *)page_private(page);
+ set_page_private(page, 0); /* just to be polite */
+ put_page(page);
+ }
+
+ wake_up(&drbd_pp_wait);
+}
+
+/*
+You need to hold the req_lock:
+ _drbd_wait_ee_list_empty()
+
+You must not have the req_lock:
+ drbd_free_ee()
+ drbd_alloc_ee()
+ drbd_init_ee()
+ drbd_release_ee()
+ drbd_ee_fix_bhs()
+ drbd_process_done_ee()
+ drbd_clear_done_ee()
+ drbd_wait_ee_list_empty()
+*/
+
+struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+ u64 id,
+ sector_t sector,
+ unsigned int data_size,
+ gfp_t gfp_mask) __must_hold(local)
+{
+ struct request_queue *q;
+ struct drbd_epoch_entry *e;
+ struct page *page;
+ struct bio *bio;
+ unsigned int ds;
+
+ if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE))
+ return NULL;
+
+ e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+ if (!e) {
+ if (!(gfp_mask & __GFP_NOWARN))
+ dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
+ return NULL;
+ }
+
+ bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE));
+ if (!bio) {
+ if (!(gfp_mask & __GFP_NOWARN))
+ dev_err(DEV, "alloc_ee: Allocation of a bio failed\n");
+ goto fail1;
+ }
+
+ bio->bi_bdev = mdev->ldev->backing_bdev;
+ bio->bi_sector = sector;
+
+ ds = data_size;
+ while (ds) {
+ page = drbd_pp_alloc(mdev, (gfp_mask & __GFP_WAIT));
+ if (!page) {
+ if (!(gfp_mask & __GFP_NOWARN))
+ dev_err(DEV, "alloc_ee: Allocation of a page failed\n");
+ goto fail2;
+ }
+ if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) {
+ drbd_pp_free(mdev, page);
+ dev_err(DEV, "alloc_ee: bio_add_page(s=%llu,"
+ "data_size=%u,ds=%u) failed\n",
+ (unsigned long long)sector, data_size, ds);
+
+ q = bdev_get_queue(bio->bi_bdev);
+ if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+ .bi_size = bio->bi_size,
+ .bi_rw = bio->bi_rw,
+ };
+ int l = q->merge_bvec_fn(q, &bvm,
+ &bio->bi_io_vec[bio->bi_vcnt]);
+ dev_err(DEV, "merge_bvec_fn() = %d\n", l);
+ }
+
+ /* dump more of the bio. */
+ dev_err(DEV, "bio->bi_max_vecs = %d\n", bio->bi_max_vecs);
+ dev_err(DEV, "bio->bi_vcnt = %d\n", bio->bi_vcnt);
+ dev_err(DEV, "bio->bi_size = %d\n", bio->bi_size);
+ dev_err(DEV, "bio->bi_phys_segments = %d\n", bio->bi_phys_segments);
+
+ goto fail2;
+ break;
+ }
+ ds -= min_t(int, ds, PAGE_SIZE);
+ }
+
+ D_ASSERT(data_size == bio->bi_size);
+
+ bio->bi_private = e;
+ e->mdev = mdev;
+ e->sector = sector;
+ e->size = bio->bi_size;
+
+ e->private_bio = bio;
+ e->block_id = id;
+ INIT_HLIST_NODE(&e->colision);
+ e->epoch = NULL;
+ e->flags = 0;
+
+ return e;
+
+ fail2:
+ drbd_pp_free_bio_pages(mdev, bio);
+ bio_put(bio);
+ fail1:
+ mempool_free(e, drbd_ee_mempool);
+
+ return NULL;
+}
+
+void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+ struct bio *bio = e->private_bio;
+ drbd_pp_free_bio_pages(mdev, bio);
+ bio_put(bio);
+ D_ASSERT(hlist_unhashed(&e->colision));
+ mempool_free(e, drbd_ee_mempool);
+}
+
+int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
+{
+ LIST_HEAD(work_list);
+ struct drbd_epoch_entry *e, *t;
+ int count = 0;
+
+ spin_lock_irq(&mdev->req_lock);
+ list_splice_init(list, &work_list);
+ spin_unlock_irq(&mdev->req_lock);
+
+ list_for_each_entry_safe(e, t, &work_list, w.list) {
+ drbd_free_ee(mdev, e);
+ count++;
+ }
+ return count;
+}
+
+
+/*
+ * This function is called from _asender only_
+ * but see also comments in _req_mod(,barrier_acked)
+ * and receive_Barrier.
+ *
+ * Move entries from net_ee to done_ee, if ready.
+ * Grab done_ee, call all callbacks, free the entries.
+ * The callbacks typically send out ACKs.
+ */
+static int drbd_process_done_ee(struct drbd_conf *mdev)
+{
+ LIST_HEAD(work_list);
+ LIST_HEAD(reclaimed);
+ struct drbd_epoch_entry *e, *t;
+ int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
+
+ spin_lock_irq(&mdev->req_lock);
+ reclaim_net_ee(mdev, &reclaimed);
+ list_splice_init(&mdev->done_ee, &work_list);
+ spin_unlock_irq(&mdev->req_lock);
+
+ list_for_each_entry_safe(e, t, &reclaimed, w.list)
+ drbd_free_ee(mdev, e);
+
+ /* possible callbacks here:
+ * e_end_block, and e_end_resync_block, e_send_discard_ack.
+ * all ignore the last argument.
+ */
+ list_for_each_entry_safe(e, t, &work_list, w.list) {
+ /* list_del not necessary, next/prev members not touched */
+ ok = e->w.cb(mdev, &e->w, !ok) && ok;
+ drbd_free_ee(mdev, e);
+ }
+ wake_up(&mdev->ee_wait);
+
+ return ok;
+}
+
+void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+ DEFINE_WAIT(wait);
+
+ /* avoids spin_lock/unlock
+ * and calling prepare_to_wait in the fast path */
+ while (!list_empty(head)) {
+ prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&mdev->req_lock);
+ drbd_kick_lo(mdev);
+ schedule();
+ finish_wait(&mdev->ee_wait, &wait);
+ spin_lock_irq(&mdev->req_lock);
+ }
+}
+
+void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_wait_ee_list_empty(mdev, head);
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+/* see also kernel_accept; which is only present since 2.6.18.
+ * also we want to log which part of it failed, exactly */
+static int drbd_accept(struct drbd_conf *mdev, const char **what,
+ struct socket *sock, struct socket **newsock)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ *what = "listen";
+ err = sock->ops->listen(sock, 5);
+ if (err < 0)
+ goto out;
+
+ *what = "sock_create_lite";
+ err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+ newsock);
+ if (err < 0)
+ goto out;
+
+ *what = "accept";
+ err = sock->ops->accept(sock, *newsock, 0);
+ if (err < 0) {
+ sock_release(*newsock);
+ *newsock = NULL;
+ goto out;
+ }
+ (*newsock)->ops = sock->ops;
+
+out:
+ return err;
+}
+
+static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
+ void *buf, size_t size, int flags)
+{
+ mm_segment_t oldfs;
+ struct kvec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ struct msghdr msg = {
+ .msg_iovlen = 1,
+ .msg_iov = (struct iovec *)&iov,
+ .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
+ };
+ int rv;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
+ set_fs(oldfs);
+
+ return rv;
+}
+
+static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
+{
+ mm_segment_t oldfs;
+ struct kvec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ struct msghdr msg = {
+ .msg_iovlen = 1,
+ .msg_iov = (struct iovec *)&iov,
+ .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
+ };
+ int rv;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+
+ for (;;) {
+ rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags);
+ if (rv == size)
+ break;
+
+ /* Note:
+ * ECONNRESET other side closed the connection
+ * ERESTARTSYS (on sock) we got a signal
+ */
+
+ if (rv < 0) {
+ if (rv == -ECONNRESET)
+ dev_info(DEV, "sock was reset by peer\n");
+ else if (rv != -ERESTARTSYS)
+ dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+ break;
+ } else if (rv == 0) {
+ dev_info(DEV, "sock was shut down by peer\n");
+ break;
+ } else {
+ /* signal came in, or peer/link went down,
+ * after we read a partial message
+ */
+ /* D_ASSERT(signal_pending(current)); */
+ break;
+ }
+ };
+
+ set_fs(oldfs);
+
+ if (rv != size)
+ drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+
+ return rv;
+}
+
+static struct socket *drbd_try_connect(struct drbd_conf *mdev)
+{
+ const char *what;
+ struct socket *sock;
+ struct sockaddr_in6 src_in6;
+ int err;
+ int disconnect_on_error = 1;
+
+ if (!get_net_conf(mdev))
+ return NULL;
+
+ what = "sock_create_kern";
+ err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
+ if (err < 0) {
+ sock = NULL;
+ goto out;
+ }
+
+ sock->sk->sk_rcvtimeo =
+ sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
+
+ /* explicitly bind to the configured IP as source IP
+ * for the outgoing connections.
+ * This is needed for multihomed hosts and to be
+ * able to use lo: interfaces for drbd.
+ * Make sure to use 0 as port number, so linux selects
+ * a free one dynamically.
+ */
+ memcpy(&src_in6, mdev->net_conf->my_addr,
+ min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
+ if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
+ src_in6.sin6_port = 0;
+ else
+ ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
+
+ what = "bind before connect";
+ err = sock->ops->bind(sock,
+ (struct sockaddr *) &src_in6,
+ mdev->net_conf->my_addr_len);
+ if (err < 0)
+ goto out;
+
+ /* connect may fail, peer not yet available.
+ * stay C_WF_CONNECTION, don't go Disconnecting! */
+ disconnect_on_error = 0;
+ what = "connect";
+ err = sock->ops->connect(sock,
+ (struct sockaddr *)mdev->net_conf->peer_addr,
+ mdev->net_conf->peer_addr_len, 0);
+
+out:
+ if (err < 0) {
+ if (sock) {
+ sock_release(sock);
+ sock = NULL;
+ }
+ switch (-err) {
+ /* timeout, busy, signal pending */
+ case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
+ case EINTR: case ERESTARTSYS:
+ /* peer not (yet) available, network problem */
+ case ECONNREFUSED: case ENETUNREACH:
+ case EHOSTDOWN: case EHOSTUNREACH:
+ disconnect_on_error = 0;
+ break;
+ default:
+ dev_err(DEV, "%s failed, err = %d\n", what, err);
+ }
+ if (disconnect_on_error)
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ }
+ put_net_conf(mdev);
+ return sock;
+}
+
+static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
+{
+ int timeo, err;
+ struct socket *s_estab = NULL, *s_listen;
+ const char *what;
+
+ if (!get_net_conf(mdev))
+ return NULL;
+
+ what = "sock_create_kern";
+ err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, &s_listen);
+ if (err) {
+ s_listen = NULL;
+ goto out;
+ }
+
+ timeo = mdev->net_conf->try_connect_int * HZ;
+ timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+
+ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
+ s_listen->sk->sk_rcvtimeo = timeo;
+ s_listen->sk->sk_sndtimeo = timeo;
+
+ what = "bind before listen";
+ err = s_listen->ops->bind(s_listen,
+ (struct sockaddr *) mdev->net_conf->my_addr,
+ mdev->net_conf->my_addr_len);
+ if (err < 0)
+ goto out;
+
+ err = drbd_accept(mdev, &what, s_listen, &s_estab);
+
+out:
+ if (s_listen)
+ sock_release(s_listen);
+ if (err < 0) {
+ if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
+ dev_err(DEV, "%s failed, err = %d\n", what, err);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ }
+ }
+ put_net_conf(mdev);
+
+ return s_estab;
+}
+
+static int drbd_send_fp(struct drbd_conf *mdev,
+ struct socket *sock, enum drbd_packets cmd)
+{
+ struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+
+ return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
+}
+
+static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
+{
+ struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+ int rr;
+
+ rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
+
+ if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC)
+ return be16_to_cpu(h->command);
+
+ return 0xffff;
+}
+
+/**
+ * drbd_socket_okay() - Free the socket if its connection is not okay
+ * @mdev: DRBD device.
+ * @sock: pointer to the pointer to the socket.
+ */
+static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
+{
+ int rr;
+ char tb[4];
+
+ if (!*sock)
+ return FALSE;
+
+ rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
+
+ if (rr > 0 || rr == -EAGAIN) {
+ return TRUE;
+ } else {
+ sock_release(*sock);
+ *sock = NULL;
+ return FALSE;
+ }
+}
+
+/*
+ * return values:
+ * 1 yes, we have a valid connection
+ * 0 oops, did not work out, please try again
+ * -1 peer talks different language,
+ * no point in trying again, please go standalone.
+ * -2 We do not have a network config...
+ */
+static int drbd_connect(struct drbd_conf *mdev)
+{
+ struct socket *s, *sock, *msock;
+ int try, h, ok;
+
+ D_ASSERT(!mdev->data.socket);
+
+ if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
+ dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
+
+ if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
+ return -2;
+
+ clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+ sock = NULL;
+ msock = NULL;
+
+ do {
+ for (try = 0;;) {
+ /* 3 tries, this should take less than a second! */
+ s = drbd_try_connect(mdev);
+ if (s || ++try >= 3)
+ break;
+ /* give the other side time to call bind() & listen() */
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / 10);
+ }
+
+ if (s) {
+ if (!sock) {
+ drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
+ sock = s;
+ s = NULL;
+ } else if (!msock) {
+ drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
+ msock = s;
+ s = NULL;
+ } else {
+ dev_err(DEV, "Logic error in drbd_connect()\n");
+ goto out_release_sockets;
+ }
+ }
+
+ if (sock && msock) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / 10);
+ ok = drbd_socket_okay(mdev, &sock);
+ ok = drbd_socket_okay(mdev, &msock) && ok;
+ if (ok)
+ break;
+ }
+
+retry:
+ s = drbd_wait_for_connect(mdev);
+ if (s) {
+ try = drbd_recv_fp(mdev, s);
+ drbd_socket_okay(mdev, &sock);
+ drbd_socket_okay(mdev, &msock);
+ switch (try) {
+ case P_HAND_SHAKE_S:
+ if (sock) {
+ dev_warn(DEV, "initial packet S crossed\n");
+ sock_release(sock);
+ }
+ sock = s;
+ break;
+ case P_HAND_SHAKE_M:
+ if (msock) {
+ dev_warn(DEV, "initial packet M crossed\n");
+ sock_release(msock);
+ }
+ msock = s;
+ set_bit(DISCARD_CONCURRENT, &mdev->flags);
+ break;
+ default:
+ dev_warn(DEV, "Error receiving initial packet\n");
+ sock_release(s);
+ if (random32() & 1)
+ goto retry;
+ }
+ }
+
+ if (mdev->state.conn <= C_DISCONNECTING)
+ goto out_release_sockets;
+ if (signal_pending(current)) {
+ flush_signals(current);
+ smp_rmb();
+ if (get_t_state(&mdev->receiver) == Exiting)
+ goto out_release_sockets;
+ }
+
+ if (sock && msock) {
+ ok = drbd_socket_okay(mdev, &sock);
+ ok = drbd_socket_okay(mdev, &msock) && ok;
+ if (ok)
+ break;
+ }
+ } while (1);
+
+ msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+ sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+
+ sock->sk->sk_allocation = GFP_NOIO;
+ msock->sk->sk_allocation = GFP_NOIO;
+
+ sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
+ msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
+
+ if (mdev->net_conf->sndbuf_size) {
+ sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
+ sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ }
+
+ if (mdev->net_conf->rcvbuf_size) {
+ sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
+ sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ }
+
+ /* NOT YET ...
+ * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+ * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+ * first set it to the P_HAND_SHAKE timeout,
+ * which we set to 4x the configured ping_timeout. */
+ sock->sk->sk_sndtimeo =
+ sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10;
+
+ msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+ msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+
+ /* we don't want delays.
+ * we use TCP_CORK where apropriate, though */
+ drbd_tcp_nodelay(sock);
+ drbd_tcp_nodelay(msock);
+
+ mdev->data.socket = sock;
+ mdev->meta.socket = msock;
+ mdev->last_received = jiffies;
+
+ D_ASSERT(mdev->asender.task == NULL);
+
+ h = drbd_do_handshake(mdev);
+ if (h <= 0)
+ return h;
+
+ if (mdev->cram_hmac_tfm) {
+ /* drbd_request_state(mdev, NS(conn, WFAuth)); */
+ if (!drbd_do_auth(mdev)) {
+ dev_err(DEV, "Authentication of peer failed\n");
+ return -1;
+ }
+ }
+
+ if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
+ return 0;
+
+ sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+ sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+
+ atomic_set(&mdev->packet_seq, 0);
+ mdev->peer_seq = 0;
+
+ drbd_thread_start(&mdev->asender);
+
+ drbd_send_protocol(mdev);
+ drbd_send_sync_param(mdev, &mdev->sync_conf);
+ drbd_send_sizes(mdev, 0);
+ drbd_send_uuids(mdev);
+ drbd_send_state(mdev);
+ clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+ clear_bit(RESIZE_PENDING, &mdev->flags);
+
+ return 1;
+
+out_release_sockets:
+ if (sock)
+ sock_release(sock);
+ if (msock)
+ sock_release(msock);
+ return -1;
+}
+
+static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h)
+{
+ int r;
+
+ r = drbd_recv(mdev, h, sizeof(*h));
+
+ if (unlikely(r != sizeof(*h))) {
+ dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
+ return FALSE;
+ };
+ h->command = be16_to_cpu(h->command);
+ h->length = be16_to_cpu(h->length);
+ if (unlikely(h->magic != BE_DRBD_MAGIC)) {
+ dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n",
+ (long)be32_to_cpu(h->magic),
+ h->command, h->length);
+ return FALSE;
+ }
+ mdev->last_received = jiffies;
+
+ return TRUE;
+}
+
+static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+{
+ int rv;
+
+ if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
+ rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
+ if (rv) {
+ dev_err(DEV, "local disk flush failed with status %d\n", rv);
+ /* would rather check on EOPNOTSUPP, but that is not reliable.
+ * don't try again for ANY return value != 0
+ * if (rv == -EOPNOTSUPP) */
+ drbd_bump_write_ordering(mdev, WO_drain_io);
+ }
+ put_ldev(mdev);
+ }
+
+ return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
+}
+
+static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct flush_work *fw = (struct flush_work *)w;
+ struct drbd_epoch *epoch = fw->epoch;
+
+ kfree(w);
+
+ if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
+ drbd_flush_after_epoch(mdev, epoch);
+
+ drbd_may_finish_epoch(mdev, epoch, EV_PUT |
+ (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
+
+ return 1;
+}
+
+/**
+ * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
+ * @mdev: DRBD device.
+ * @epoch: Epoch object.
+ * @ev: Epoch event.
+ */
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
+ struct drbd_epoch *epoch,
+ enum epoch_event ev)
+{
+ int finish, epoch_size;
+ struct drbd_epoch *next_epoch;
+ int schedule_flush = 0;
+ enum finish_epoch rv = FE_STILL_LIVE;
+
+ spin_lock(&mdev->epoch_lock);
+ do {
+ next_epoch = NULL;
+ finish = 0;
+
+ epoch_size = atomic_read(&epoch->epoch_size);
+
+ switch (ev & ~EV_CLEANUP) {
+ case EV_PUT:
+ atomic_dec(&epoch->active);
+ break;
+ case EV_GOT_BARRIER_NR:
+ set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
+
+ /* Special case: If we just switched from WO_bio_barrier to
+ WO_bdev_flush we should not finish the current epoch */
+ if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
+ mdev->write_ordering != WO_bio_barrier &&
+ epoch == mdev->current_epoch)
+ clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
+ break;
+ case EV_BARRIER_DONE:
+ set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
+ break;
+ case EV_BECAME_LAST:
+ /* nothing to do*/
+ break;
+ }
+
+ if (epoch_size != 0 &&
+ atomic_read(&epoch->active) == 0 &&
+ test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
+ epoch->list.prev == &mdev->current_epoch->list &&
+ !test_bit(DE_IS_FINISHING, &epoch->flags)) {
+ /* Nearly all conditions are met to finish that epoch... */
+ if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
+ mdev->write_ordering == WO_none ||
+ (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
+ ev & EV_CLEANUP) {
+ finish = 1;
+ set_bit(DE_IS_FINISHING, &epoch->flags);
+ } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
+ mdev->write_ordering == WO_bio_barrier) {
+ atomic_inc(&epoch->active);
+ schedule_flush = 1;
+ }
+ }
+ if (finish) {
+ if (!(ev & EV_CLEANUP)) {
+ spin_unlock(&mdev->epoch_lock);
+ drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
+ spin_lock(&mdev->epoch_lock);
+ }
+ dec_unacked(mdev);
+
+ if (mdev->current_epoch != epoch) {
+ next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
+ list_del(&epoch->list);
+ ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
+ mdev->epochs--;
+ kfree(epoch);
+
+ if (rv == FE_STILL_LIVE)
+ rv = FE_DESTROYED;
+ } else {
+ epoch->flags = 0;
+ atomic_set(&epoch->epoch_size, 0);
+ /* atomic_set(&epoch->active, 0); is alrady zero */
+ if (rv == FE_STILL_LIVE)
+ rv = FE_RECYCLED;
+ }
+ }
+
+ if (!next_epoch)
+ break;
+
+ epoch = next_epoch;
+ } while (1);
+
+ spin_unlock(&mdev->epoch_lock);
+
+ if (schedule_flush) {
+ struct flush_work *fw;
+ fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
+ if (fw) {
+ fw->w.cb = w_flush;
+ fw->epoch = epoch;
+ drbd_queue_work(&mdev->data.work, &fw->w);
+ } else {
+ dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
+ set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+ /* That is not a recursion, only one level */
+ drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
+ drbd_may_finish_epoch(mdev, epoch, EV_PUT);
+ }
+ }
+
+ return rv;
+}
+
+/**
+ * drbd_bump_write_ordering() - Fall back to an other write ordering method
+ * @mdev: DRBD device.
+ * @wo: Write ordering method to try.
+ */
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
+{
+ enum write_ordering_e pwo;
+ static char *write_ordering_str[] = {
+ [WO_none] = "none",
+ [WO_drain_io] = "drain",
+ [WO_bdev_flush] = "flush",
+ [WO_bio_barrier] = "barrier",
+ };
+
+ pwo = mdev->write_ordering;
+ wo = min(pwo, wo);
+ if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
+ wo = WO_bdev_flush;
+ if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
+ wo = WO_drain_io;
+ if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
+ wo = WO_none;
+ mdev->write_ordering = wo;
+ if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
+ dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
+}
+
+/**
+ * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways (unused in this callback)
+ */
+int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
+{
+ struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+ struct bio *bio = e->private_bio;
+
+ /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
+ (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
+ so that we can finish that epoch in drbd_may_finish_epoch().
+ That is necessary if we already have a long chain of Epochs, before
+ we realize that BIO_RW_BARRIER is actually not supported */
+
+ /* As long as the -ENOTSUPP on the barrier is reported immediately
+ that will never trigger. If it is reported late, we will just
+ print that warning and continue correctly for all future requests
+ with WO_bdev_flush */
+ if (previous_epoch(mdev, e->epoch))
+ dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
+
+ /* prepare bio for re-submit,
+ * re-init volatile members */
+ /* we still have a local reference,
+ * get_ldev was done in receive_Data. */
+ bio->bi_bdev = mdev->ldev->backing_bdev;
+ bio->bi_sector = e->sector;
+ bio->bi_size = e->size;
+ bio->bi_idx = 0;
+
+ bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+ bio->bi_flags |= 1 << BIO_UPTODATE;
+
+ /* don't know whether this is necessary: */
+ bio->bi_phys_segments = 0;
+ bio->bi_next = NULL;
+
+ /* these should be unchanged: */
+ /* bio->bi_end_io = drbd_endio_write_sec; */
+ /* bio->bi_vcnt = whatever; */
+
+ e->w.cb = e_end_block;
+
+ /* This is no longer a barrier request. */
+ bio->bi_rw &= ~(1UL << BIO_RW_BARRIER);
+
+ drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio);
+
+ return 1;
+}
+
+static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h)
+{
+ int rv, issue_flush;
+ struct p_barrier *p = (struct p_barrier *)h;
+ struct drbd_epoch *epoch;
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+
+ rv = drbd_recv(mdev, h->payload, h->length);
+ ERR_IF(rv != h->length) return FALSE;
+
+ inc_unacked(mdev);
+
+ if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
+ drbd_kick_lo(mdev);
+
+ mdev->current_epoch->barrier_nr = p->barrier;
+ rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
+
+ /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
+ * the activity log, which means it would not be resynced in case the
+ * R_PRIMARY crashes now.
+ * Therefore we must send the barrier_ack after the barrier request was
+ * completed. */
+ switch (mdev->write_ordering) {
+ case WO_bio_barrier:
+ case WO_none:
+ if (rv == FE_RECYCLED)
+ return TRUE;
+ break;
+
+ case WO_bdev_flush:
+ case WO_drain_io:
+ D_ASSERT(rv == FE_STILL_LIVE);
+ set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
+ drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+ rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+ if (rv == FE_RECYCLED)
+ return TRUE;
+
+ /* The asender will send all the ACKs and barrier ACKs out, since
+ all EEs moved from the active_ee to the done_ee. We need to
+ provide a new epoch object for the EEs that come in soon */
+ break;
+ }
+
+ /* receiver context, in the writeout path of the other node.
+ * avoid potential distributed deadlock */
+ epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+ if (!epoch) {
+ dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+ issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+ drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+ if (issue_flush) {
+ rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+ if (rv == FE_RECYCLED)
+ return TRUE;
+ }
+
+ drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
+
+ return TRUE;
+ }
+
+ epoch->flags = 0;
+ atomic_set(&epoch->epoch_size, 0);
+ atomic_set(&epoch->active, 0);
+
+ spin_lock(&mdev->epoch_lock);
+ if (atomic_read(&mdev->current_epoch->epoch_size)) {
+ list_add(&epoch->list, &mdev->current_epoch->list);
+ mdev->current_epoch = epoch;
+ mdev->epochs++;
+ } else {
+ /* The current_epoch got recycled while we allocated this one... */
+ kfree(epoch);
+ }
+ spin_unlock(&mdev->epoch_lock);
+
+ return TRUE;
+}
+
+/* used from receive_RSDataReply (recv_resync_read)
+ * and from receive_Data */
+static struct drbd_epoch_entry *
+read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
+{
+ struct drbd_epoch_entry *e;
+ struct bio_vec *bvec;
+ struct page *page;
+ struct bio *bio;
+ int dgs, ds, i, rr;
+ void *dig_in = mdev->int_dig_in;
+ void *dig_vv = mdev->int_dig_vv;
+
+ dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+ crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+ if (dgs) {
+ rr = drbd_recv(mdev, dig_in, dgs);
+ if (rr != dgs) {
+ dev_warn(DEV, "short read receiving data digest: read %d expected %d\n",
+ rr, dgs);
+ return NULL;
+ }
+ }
+
+ data_size -= dgs;
+
+ ERR_IF(data_size & 0x1ff) return NULL;
+ ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL;
+
+ /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+ * "criss-cross" setup, that might cause write-out on some other DRBD,
+ * which in turn might block on the other node at this very place. */
+ e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
+ if (!e)
+ return NULL;
+ bio = e->private_bio;
+ ds = data_size;
+ bio_for_each_segment(bvec, bio, i) {
+ page = bvec->bv_page;
+ rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE));
+ kunmap(page);
+ if (rr != min_t(int, ds, PAGE_SIZE)) {
+ drbd_free_ee(mdev, e);
+ dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+ rr, min_t(int, ds, PAGE_SIZE));
+ return NULL;
+ }
+ ds -= rr;
+ }
+
+ if (dgs) {
+ drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+ if (memcmp(dig_in, dig_vv, dgs)) {
+ dev_err(DEV, "Digest integrity check FAILED.\n");
+ drbd_bcast_ee(mdev, "digest failed",
+ dgs, dig_in, dig_vv, e);
+ drbd_free_ee(mdev, e);
+ return NULL;
+ }
+ }
+ mdev->recv_cnt += data_size>>9;
+ return e;
+}
+
+/* drbd_drain_block() just takes a data block
+ * out of the socket input buffer, and discards it.
+ */
+static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
+{
+ struct page *page;
+ int rr, rv = 1;
+ void *data;
+
+ page = drbd_pp_alloc(mdev, 1);
+
+ data = kmap(page);
+ while (data_size) {
+ rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
+ if (rr != min_t(int, data_size, PAGE_SIZE)) {
+ rv = 0;
+ dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+ rr, min_t(int, data_size, PAGE_SIZE));
+ break;
+ }
+ data_size -= rr;
+ }
+ kunmap(page);
+ drbd_pp_free(mdev, page);
+ return rv;
+}
+
+static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
+ sector_t sector, int data_size)
+{
+ struct bio_vec *bvec;
+ struct bio *bio;
+ int dgs, rr, i, expect;
+ void *dig_in = mdev->int_dig_in;
+ void *dig_vv = mdev->int_dig_vv;
+
+ dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+ crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+ if (dgs) {
+ rr = drbd_recv(mdev, dig_in, dgs);
+ if (rr != dgs) {
+ dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n",
+ rr, dgs);
+ return 0;
+ }
+ }
+
+ data_size -= dgs;
+
+ /* optimistically update recv_cnt. if receiving fails below,
+ * we disconnect anyways, and counters will be reset. */
+ mdev->recv_cnt += data_size>>9;
+
+ bio = req->master_bio;
+ D_ASSERT(sector == bio->bi_sector);
+
+ bio_for_each_segment(bvec, bio, i) {
+ expect = min_t(int, data_size, bvec->bv_len);
+ rr = drbd_recv(mdev,
+ kmap(bvec->bv_page)+bvec->bv_offset,
+ expect);
+ kunmap(bvec->bv_page);
+ if (rr != expect) {
+ dev_warn(DEV, "short read receiving data reply: "
+ "read %d expected %d\n",
+ rr, expect);
+ return 0;
+ }
+ data_size -= rr;
+ }
+
+ if (dgs) {
+ drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+ if (memcmp(dig_in, dig_vv, dgs)) {
+ dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
+ return 0;
+ }
+ }
+
+ D_ASSERT(data_size == 0);
+ return 1;
+}
+
+/* e_end_resync_block() is called via
+ * drbd_process_done_ee() by asender only */
+static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+ sector_t sector = e->sector;
+ int ok;
+
+ D_ASSERT(hlist_unhashed(&e->colision));
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ drbd_set_in_sync(mdev, sector, e->size);
+ ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
+ } else {
+ /* Record failure to sync */
+ drbd_rs_failed_io(mdev, sector, e->size);
+
+ ok = drbd_send_ack(mdev, P_NEG_ACK, e);
+ }
+ dec_unacked(mdev);
+
+ return ok;
+}
+
+static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
+{
+ struct drbd_epoch_entry *e;
+
+ e = read_in_block(mdev, ID_SYNCER, sector, data_size);
+ if (!e) {
+ put_ldev(mdev);
+ return FALSE;
+ }
+
+ dec_rs_pending(mdev);
+
+ e->private_bio->bi_end_io = drbd_endio_write_sec;
+ e->private_bio->bi_rw = WRITE;
+ e->w.cb = e_end_resync_block;
+
+ inc_unacked(mdev);
+ /* corresponding dec_unacked() in e_end_resync_block()
+ * respective _drbd_clear_done_ee */
+
+ spin_lock_irq(&mdev->req_lock);
+ list_add(&e->w.list, &mdev->sync_ee);
+ spin_unlock_irq(&mdev->req_lock);
+
+ drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio);
+ /* accounting done in endio */
+
+ maybe_kick_lo(mdev);
+ return TRUE;
+}
+
+static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct drbd_request *req;
+ sector_t sector;
+ unsigned int header_size, data_size;
+ int ok;
+ struct p_data *p = (struct p_data *)h;
+
+ header_size = sizeof(*p) - sizeof(*h);
+ data_size = h->length - header_size;
+
+ ERR_IF(data_size == 0) return FALSE;
+
+ if (drbd_recv(mdev, h->payload, header_size) != header_size)
+ return FALSE;
+
+ sector = be64_to_cpu(p->sector);
+
+ spin_lock_irq(&mdev->req_lock);
+ req = _ar_id_to_req(mdev, p->block_id, sector);
+ spin_unlock_irq(&mdev->req_lock);
+ if (unlikely(!req)) {
+ dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
+ return FALSE;
+ }
+
+ /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+ * special casing it there for the various failure cases.
+ * still no race with drbd_fail_pending_reads */
+ ok = recv_dless_read(mdev, req, sector, data_size);
+
+ if (ok)
+ req_mod(req, data_received);
+ /* else: nothing. handled from drbd_disconnect...
+ * I don't think we may complete this just yet
+ * in case we are "on-disconnect: freeze" */
+
+ return ok;
+}
+
+static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
+{
+ sector_t sector;
+ unsigned int header_size, data_size;
+ int ok;
+ struct p_data *p = (struct p_data *)h;
+
+ header_size = sizeof(*p) - sizeof(*h);
+ data_size = h->length - header_size;
+
+ ERR_IF(data_size == 0) return FALSE;
+
+ if (drbd_recv(mdev, h->payload, header_size) != header_size)
+ return FALSE;
+
+ sector = be64_to_cpu(p->sector);
+ D_ASSERT(p->block_id == ID_SYNCER);
+
+ if (get_ldev(mdev)) {
+ /* data is submitted to disk within recv_resync_read.
+ * corresponding put_ldev done below on error,
+ * or in drbd_endio_write_sec. */
+ ok = recv_resync_read(mdev, sector, data_size);
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Can not write resync data to local disk.\n");
+
+ ok = drbd_drain_block(mdev, data_size);
+
+ drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+ }
+
+ return ok;
+}
+
+/* e_end_block() is called via drbd_process_done_ee().
+ * this means this function only runs in the asender thread
+ */
+static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+ sector_t sector = e->sector;
+ struct drbd_epoch *epoch;
+ int ok = 1, pcmd;
+
+ if (e->flags & EE_IS_BARRIER) {
+ epoch = previous_epoch(mdev, e->epoch);
+ if (epoch)
+ drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
+ }
+
+ if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
+ mdev->state.conn <= C_PAUSED_SYNC_T &&
+ e->flags & EE_MAY_SET_IN_SYNC) ?
+ P_RS_WRITE_ACK : P_WRITE_ACK;
+ ok &= drbd_send_ack(mdev, pcmd, e);
+ if (pcmd == P_RS_WRITE_ACK)
+ drbd_set_in_sync(mdev, sector, e->size);
+ } else {
+ ok = drbd_send_ack(mdev, P_NEG_ACK, e);
+ /* we expect it to be marked out of sync anyways...
+ * maybe assert this? */
+ }
+ dec_unacked(mdev);
+ }
+ /* we delete from the conflict detection hash _after_ we sent out the
+ * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
+ if (mdev->net_conf->two_primaries) {
+ spin_lock_irq(&mdev->req_lock);
+ D_ASSERT(!hlist_unhashed(&e->colision));
+ hlist_del_init(&e->colision);
+ spin_unlock_irq(&mdev->req_lock);
+ } else {
+ D_ASSERT(hlist_unhashed(&e->colision));
+ }
+
+ drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+
+ return ok;
+}
+
+static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+ struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+ int ok = 1;
+
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+ ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
+
+ spin_lock_irq(&mdev->req_lock);
+ D_ASSERT(!hlist_unhashed(&e->colision));
+ hlist_del_init(&e->colision);
+ spin_unlock_irq(&mdev->req_lock);
+
+ dec_unacked(mdev);
+
+ return ok;
+}
+
+/* Called from receive_Data.
+ * Synchronize packets on sock with packets on msock.
+ *
+ * This is here so even when a P_DATA packet traveling via sock overtook an Ack
+ * packet traveling on msock, they are still processed in the order they have
+ * been sent.
+ *
+ * Note: we don't care for Ack packets overtaking P_DATA packets.
+ *
+ * In case packet_seq is larger than mdev->peer_seq number, there are
+ * outstanding packets on the msock. We wait for them to arrive.
+ * In case we are the logically next packet, we update mdev->peer_seq
+ * ourselves. Correctly handles 32bit wrap around.
+ *
+ * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
+ * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
+ * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
+ * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
+ *
+ * returns 0 if we may process the packet,
+ * -ERESTARTSYS if we were interrupted (by disconnect signal). */
+static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
+{
+ DEFINE_WAIT(wait);
+ unsigned int p_seq;
+ long timeout;
+ int ret = 0;
+ spin_lock(&mdev->peer_seq_lock);
+ for (;;) {
+ prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
+ if (seq_le(packet_seq, mdev->peer_seq+1))
+ break;
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ p_seq = mdev->peer_seq;
+ spin_unlock(&mdev->peer_seq_lock);
+ timeout = schedule_timeout(30*HZ);
+ spin_lock(&mdev->peer_seq_lock);
+ if (timeout == 0 && p_seq == mdev->peer_seq) {
+ ret = -ETIMEDOUT;
+ dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
+ break;
+ }
+ }
+ finish_wait(&mdev->seq_wait, &wait);
+ if (mdev->peer_seq+1 == packet_seq)
+ mdev->peer_seq++;
+ spin_unlock(&mdev->peer_seq_lock);
+ return ret;
+}
+
+/* mirrored write */
+static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
+{
+ sector_t sector;
+ struct drbd_epoch_entry *e;
+ struct p_data *p = (struct p_data *)h;
+ int header_size, data_size;
+ int rw = WRITE;
+ u32 dp_flags;
+
+ header_size = sizeof(*p) - sizeof(*h);
+ data_size = h->length - header_size;
+
+ ERR_IF(data_size == 0) return FALSE;
+
+ if (drbd_recv(mdev, h->payload, header_size) != header_size)
+ return FALSE;
+
+ if (!get_ldev(mdev)) {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Can not write mirrored data block "
+ "to local disk.\n");
+ spin_lock(&mdev->peer_seq_lock);
+ if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
+ mdev->peer_seq++;
+ spin_unlock(&mdev->peer_seq_lock);
+
+ drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+ atomic_inc(&mdev->current_epoch->epoch_size);
+ return drbd_drain_block(mdev, data_size);
+ }
+
+ /* get_ldev(mdev) successful.
+ * Corresponding put_ldev done either below (on various errors),
+ * or in drbd_endio_write_sec, if we successfully submit the data at
+ * the end of this function. */
+
+ sector = be64_to_cpu(p->sector);
+ e = read_in_block(mdev, p->block_id, sector, data_size);
+ if (!e) {
+ put_ldev(mdev);
+ return FALSE;
+ }
+
+ e->private_bio->bi_end_io = drbd_endio_write_sec;
+ e->w.cb = e_end_block;
+
+ spin_lock(&mdev->epoch_lock);
+ e->epoch = mdev->current_epoch;
+ atomic_inc(&e->epoch->epoch_size);
+ atomic_inc(&e->epoch->active);
+
+ if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
+ struct drbd_epoch *epoch;
+ /* Issue a barrier if we start a new epoch, and the previous epoch
+ was not a epoch containing a single request which already was
+ a Barrier. */
+ epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
+ if (epoch == e->epoch) {
+ set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
+ rw |= (1<<BIO_RW_BARRIER);
+ e->flags |= EE_IS_BARRIER;
+ } else {
+ if (atomic_read(&epoch->epoch_size) > 1 ||
+ !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
+ set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
+ set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
+ rw |= (1<<BIO_RW_BARRIER);
+ e->flags |= EE_IS_BARRIER;
+ }
+ }
+ }
+ spin_unlock(&mdev->epoch_lock);
+
+ dp_flags = be32_to_cpu(p->dp_flags);
+ if (dp_flags & DP_HARDBARRIER) {
+ dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
+ /* rw |= (1<<BIO_RW_BARRIER); */
+ }
+ if (dp_flags & DP_RW_SYNC)
+ rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+ if (dp_flags & DP_MAY_SET_IN_SYNC)
+ e->flags |= EE_MAY_SET_IN_SYNC;
+
+ /* I'm the receiver, I do hold a net_cnt reference. */
+ if (!mdev->net_conf->two_primaries) {
+ spin_lock_irq(&mdev->req_lock);
+ } else {
+ /* don't get the req_lock yet,
+ * we may sleep in drbd_wait_peer_seq */
+ const int size = e->size;
+ const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+ DEFINE_WAIT(wait);
+ struct drbd_request *i;
+ struct hlist_node *n;
+ struct hlist_head *slot;
+ int first;
+
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+ BUG_ON(mdev->ee_hash == NULL);
+ BUG_ON(mdev->tl_hash == NULL);
+
+ /* conflict detection and handling:
+ * 1. wait on the sequence number,
+ * in case this data packet overtook ACK packets.
+ * 2. check our hash tables for conflicting requests.
+ * we only need to walk the tl_hash, since an ee can not
+ * have a conflict with an other ee: on the submitting
+ * node, the corresponding req had already been conflicting,
+ * and a conflicting req is never sent.
+ *
+ * Note: for two_primaries, we are protocol C,
+ * so there cannot be any request that is DONE
+ * but still on the transfer log.
+ *
+ * unconditionally add to the ee_hash.
+ *
+ * if no conflicting request is found:
+ * submit.
+ *
+ * if any conflicting request is found
+ * that has not yet been acked,
+ * AND I have the "discard concurrent writes" flag:
+ * queue (via done_ee) the P_DISCARD_ACK; OUT.
+ *
+ * if any conflicting request is found:
+ * block the receiver, waiting on misc_wait
+ * until no more conflicting requests are there,
+ * or we get interrupted (disconnect).
+ *
+ * we do not just write after local io completion of those
+ * requests, but only after req is done completely, i.e.
+ * we wait for the P_DISCARD_ACK to arrive!
+ *
+ * then proceed normally, i.e. submit.
+ */
+ if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
+ goto out_interrupted;
+
+ spin_lock_irq(&mdev->req_lock);
+
+ hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+ slot = tl_hash_slot(mdev, sector);
+ first = 1;
+ for (;;) {
+ int have_unacked = 0;
+ int have_conflict = 0;
+ prepare_to_wait(&mdev->misc_wait, &wait,
+ TASK_INTERRUPTIBLE);
+ hlist_for_each_entry(i, n, slot, colision) {
+ if (OVERLAPS) {
+ /* only ALERT on first iteration,
+ * we may be woken up early... */
+ if (first)
+ dev_alert(DEV, "%s[%u] Concurrent local write detected!"
+ " new: %llus +%u; pending: %llus +%u\n",
+ current->comm, current->pid,
+ (unsigned long long)sector, size,
+ (unsigned long long)i->sector, i->size);
+ if (i->rq_state & RQ_NET_PENDING)
+ ++have_unacked;
+ ++have_conflict;
+ }
+ }
+#undef OVERLAPS
+ if (!have_conflict)
+ break;
+
+ /* Discard Ack only for the _first_ iteration */
+ if (first && discard && have_unacked) {
+ dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
+ (unsigned long long)sector);
+ inc_unacked(mdev);
+ e->w.cb = e_send_discard_ack;
+ list_add_tail(&e->w.list, &mdev->done_ee);
+
+ spin_unlock_irq(&mdev->req_lock);
+
+ /* we could probably send that P_DISCARD_ACK ourselves,
+ * but I don't like the receiver using the msock */
+
+ put_ldev(mdev);
+ wake_asender(mdev);
+ finish_wait(&mdev->misc_wait, &wait);
+ return TRUE;
+ }
+
+ if (signal_pending(current)) {
+ hlist_del_init(&e->colision);
+
+ spin_unlock_irq(&mdev->req_lock);
+
+ finish_wait(&mdev->misc_wait, &wait);
+ goto out_interrupted;
+ }
+
+ spin_unlock_irq(&mdev->req_lock);
+ if (first) {
+ first = 0;
+ dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
+ "sec=%llus\n", (unsigned long long)sector);
+ } else if (discard) {
+ /* we had none on the first iteration.
+ * there must be none now. */
+ D_ASSERT(have_unacked == 0);
+ }
+ schedule();
+ spin_lock_irq(&mdev->req_lock);
+ }
+ finish_wait(&mdev->misc_wait, &wait);
+ }
+
+ list_add(&e->w.list, &mdev->active_ee);
+ spin_unlock_irq(&mdev->req_lock);
+
+ switch (mdev->net_conf->wire_protocol) {
+ case DRBD_PROT_C:
+ inc_unacked(mdev);
+ /* corresponding dec_unacked() in e_end_block()
+ * respective _drbd_clear_done_ee */
+ break;
+ case DRBD_PROT_B:
+ /* I really don't like it that the receiver thread
+ * sends on the msock, but anyways */
+ drbd_send_ack(mdev, P_RECV_ACK, e);
+ break;
+ case DRBD_PROT_A:
+ /* nothing to do */
+ break;
+ }
+
+ if (mdev->state.pdsk == D_DISKLESS) {
+ /* In case we have the only disk of the cluster, */
+ drbd_set_out_of_sync(mdev, e->sector, e->size);
+ e->flags |= EE_CALL_AL_COMPLETE_IO;
+ drbd_al_begin_io(mdev, e->sector);
+ }
+
+ e->private_bio->bi_rw = rw;
+ drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio);
+ /* accounting done in endio */
+
+ maybe_kick_lo(mdev);
+ return TRUE;
+
+out_interrupted:
+ /* yes, the epoch_size now is imbalanced.
+ * but we drop the connection anyways, so we don't have a chance to
+ * receive a barrier... atomic_inc(&mdev->epoch_size); */
+ put_ldev(mdev);
+ drbd_free_ee(mdev, e);
+ return FALSE;
+}
+
+static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
+{
+ sector_t sector;
+ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+ struct drbd_epoch_entry *e;
+ struct digest_info *di = NULL;
+ int size, digest_size;
+ unsigned int fault_type;
+ struct p_block_req *p =
+ (struct p_block_req *)h;
+ const int brps = sizeof(*p)-sizeof(*h);
+
+ if (drbd_recv(mdev, h->payload, brps) != brps)
+ return FALSE;
+
+ sector = be64_to_cpu(p->sector);
+ size = be32_to_cpu(p->blksize);
+
+ if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+ dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+ (unsigned long long)sector, size);
+ return FALSE;
+ }
+ if (sector + (size>>9) > capacity) {
+ dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+ (unsigned long long)sector, size);
+ return FALSE;
+ }
+
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Can not satisfy peer's read request, "
+ "no local data.\n");
+ drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY :
+ P_NEG_RS_DREPLY , p);
+ return TRUE;
+ }
+
+ /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+ * "criss-cross" setup, that might cause write-out on some other DRBD,
+ * which in turn might block on the other node at this very place. */
+ e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
+ if (!e) {
+ put_ldev(mdev);
+ return FALSE;
+ }
+
+ e->private_bio->bi_rw = READ;
+ e->private_bio->bi_end_io = drbd_endio_read_sec;
+
+ switch (h->command) {
+ case P_DATA_REQUEST:
+ e->w.cb = w_e_end_data_req;
+ fault_type = DRBD_FAULT_DT_RD;
+ break;
+ case P_RS_DATA_REQUEST:
+ e->w.cb = w_e_end_rsdata_req;
+ fault_type = DRBD_FAULT_RS_RD;
+ /* Eventually this should become asynchronously. Currently it
+ * blocks the whole receiver just to delay the reading of a
+ * resync data block.
+ * the drbd_work_queue mechanism is made for this...
+ */
+ if (!drbd_rs_begin_io(mdev, sector)) {
+ /* we have been interrupted,
+ * probably connection lost! */
+ D_ASSERT(signal_pending(current));
+ goto out_free_e;
+ }
+ break;
+
+ case P_OV_REPLY:
+ case P_CSUM_RS_REQUEST:
+ fault_type = DRBD_FAULT_RS_RD;
+ digest_size = h->length - brps ;
+ di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
+ if (!di)
+ goto out_free_e;
+
+ di->digest_size = digest_size;
+ di->digest = (((char *)di)+sizeof(struct digest_info));
+
+ if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
+ goto out_free_e;
+
+ e->block_id = (u64)(unsigned long)di;
+ if (h->command == P_CSUM_RS_REQUEST) {
+ D_ASSERT(mdev->agreed_pro_version >= 89);
+ e->w.cb = w_e_end_csum_rs_req;
+ } else if (h->command == P_OV_REPLY) {
+ e->w.cb = w_e_end_ov_reply;
+ dec_rs_pending(mdev);
+ break;
+ }
+
+ if (!drbd_rs_begin_io(mdev, sector)) {
+ /* we have been interrupted, probably connection lost! */
+ D_ASSERT(signal_pending(current));
+ goto out_free_e;
+ }
+ break;
+
+ case P_OV_REQUEST:
+ if (mdev->state.conn >= C_CONNECTED &&
+ mdev->state.conn != C_VERIFY_T)
+ dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
+ drbd_conn_str(mdev->state.conn));
+ if (mdev->ov_start_sector == ~(sector_t)0 &&
+ mdev->agreed_pro_version >= 90) {
+ mdev->ov_start_sector = sector;
+ mdev->ov_position = sector;
+ mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector);
+ dev_info(DEV, "Online Verify start sector: %llu\n",
+ (unsigned long long)sector);
+ }
+ e->w.cb = w_e_end_ov_req;
+ fault_type = DRBD_FAULT_RS_RD;
+ /* Eventually this should become asynchronous. Currently it
+ * blocks the whole receiver just to delay the reading of a
+ * resync data block.
+ * the drbd_work_queue mechanism is made for this...
+ */
+ if (!drbd_rs_begin_io(mdev, sector)) {
+ /* we have been interrupted,
+ * probably connection lost! */
+ D_ASSERT(signal_pending(current));
+ goto out_free_e;
+ }
+ break;
+
+
+ default:
+ dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+ cmdname(h->command));
+ fault_type = DRBD_FAULT_MAX;
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ list_add(&e->w.list, &mdev->read_ee);
+ spin_unlock_irq(&mdev->req_lock);
+
+ inc_unacked(mdev);
+
+ drbd_generic_make_request(mdev, fault_type, e->private_bio);
+ maybe_kick_lo(mdev);
+
+ return TRUE;
+
+out_free_e:
+ kfree(di);
+ put_ldev(mdev);
+ drbd_free_ee(mdev, e);
+ return FALSE;
+}
+
+static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
+{
+ int self, peer, rv = -100;
+ unsigned long ch_self, ch_peer;
+
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+ peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+ ch_peer = mdev->p_uuid[UI_SIZE];
+ ch_self = mdev->comm_bm_set;
+
+ switch (mdev->net_conf->after_sb_0p) {
+ case ASB_CONSENSUS:
+ case ASB_DISCARD_SECONDARY:
+ case ASB_CALL_HELPER:
+ dev_err(DEV, "Configuration error.\n");
+ break;
+ case ASB_DISCONNECT:
+ break;
+ case ASB_DISCARD_YOUNGER_PRI:
+ if (self == 0 && peer == 1) {
+ rv = -1;
+ break;
+ }
+ if (self == 1 && peer == 0) {
+ rv = 1;
+ break;
+ }
+ /* Else fall through to one of the other strategies... */
+ case ASB_DISCARD_OLDER_PRI:
+ if (self == 0 && peer == 1) {
+ rv = 1;
+ break;
+ }
+ if (self == 1 && peer == 0) {
+ rv = -1;
+ break;
+ }
+ /* Else fall through to one of the other strategies... */
+ dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
+ "Using discard-least-changes instead\n");
+ case ASB_DISCARD_ZERO_CHG:
+ if (ch_peer == 0 && ch_self == 0) {
+ rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+ ? -1 : 1;
+ break;
+ } else {
+ if (ch_peer == 0) { rv = 1; break; }
+ if (ch_self == 0) { rv = -1; break; }
+ }
+ if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
+ break;
+ case ASB_DISCARD_LEAST_CHG:
+ if (ch_self < ch_peer)
+ rv = -1;
+ else if (ch_self > ch_peer)
+ rv = 1;
+ else /* ( ch_self == ch_peer ) */
+ /* Well, then use something else. */
+ rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+ ? -1 : 1;
+ break;
+ case ASB_DISCARD_LOCAL:
+ rv = -1;
+ break;
+ case ASB_DISCARD_REMOTE:
+ rv = 1;
+ }
+
+ return rv;
+}
+
+static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
+{
+ int self, peer, hg, rv = -100;
+
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+ peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+ switch (mdev->net_conf->after_sb_1p) {
+ case ASB_DISCARD_YOUNGER_PRI:
+ case ASB_DISCARD_OLDER_PRI:
+ case ASB_DISCARD_LEAST_CHG:
+ case ASB_DISCARD_LOCAL:
+ case ASB_DISCARD_REMOTE:
+ dev_err(DEV, "Configuration error.\n");
+ break;
+ case ASB_DISCONNECT:
+ break;
+ case ASB_CONSENSUS:
+ hg = drbd_asb_recover_0p(mdev);
+ if (hg == -1 && mdev->state.role == R_SECONDARY)
+ rv = hg;
+ if (hg == 1 && mdev->state.role == R_PRIMARY)
+ rv = hg;
+ break;
+ case ASB_VIOLENTLY:
+ rv = drbd_asb_recover_0p(mdev);
+ break;
+ case ASB_DISCARD_SECONDARY:
+ return mdev->state.role == R_PRIMARY ? 1 : -1;
+ case ASB_CALL_HELPER:
+ hg = drbd_asb_recover_0p(mdev);
+ if (hg == -1 && mdev->state.role == R_PRIMARY) {
+ self = drbd_set_role(mdev, R_SECONDARY, 0);
+ /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+ * we might be here in C_WF_REPORT_PARAMS which is transient.
+ * we do not need to wait for the after state change work either. */
+ self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+ if (self != SS_SUCCESS) {
+ drbd_khelper(mdev, "pri-lost-after-sb");
+ } else {
+ dev_warn(DEV, "Successfully gave up primary role.\n");
+ rv = hg;
+ }
+ } else
+ rv = hg;
+ }
+
+ return rv;
+}
+
+static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
+{
+ int self, peer, hg, rv = -100;
+
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+ peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+ switch (mdev->net_conf->after_sb_2p) {
+ case ASB_DISCARD_YOUNGER_PRI:
+ case ASB_DISCARD_OLDER_PRI:
+ case ASB_DISCARD_LEAST_CHG:
+ case ASB_DISCARD_LOCAL:
+ case ASB_DISCARD_REMOTE:
+ case ASB_CONSENSUS:
+ case ASB_DISCARD_SECONDARY:
+ dev_err(DEV, "Configuration error.\n");
+ break;
+ case ASB_VIOLENTLY:
+ rv = drbd_asb_recover_0p(mdev);
+ break;
+ case ASB_DISCONNECT:
+ break;
+ case ASB_CALL_HELPER:
+ hg = drbd_asb_recover_0p(mdev);
+ if (hg == -1) {
+ /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+ * we might be here in C_WF_REPORT_PARAMS which is transient.
+ * we do not need to wait for the after state change work either. */
+ self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+ if (self != SS_SUCCESS) {
+ drbd_khelper(mdev, "pri-lost-after-sb");
+ } else {
+ dev_warn(DEV, "Successfully gave up primary role.\n");
+ rv = hg;
+ }
+ } else
+ rv = hg;
+ }
+
+ return rv;
+}
+
+static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
+ u64 bits, u64 flags)
+{
+ if (!uuid) {
+ dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
+ return;
+ }
+ dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
+ text,
+ (unsigned long long)uuid[UI_CURRENT],
+ (unsigned long long)uuid[UI_BITMAP],
+ (unsigned long long)uuid[UI_HISTORY_START],
+ (unsigned long long)uuid[UI_HISTORY_END],
+ (unsigned long long)bits,
+ (unsigned long long)flags);
+}
+
+/*
+ 100 after split brain try auto recover
+ 2 C_SYNC_SOURCE set BitMap
+ 1 C_SYNC_SOURCE use BitMap
+ 0 no Sync
+ -1 C_SYNC_TARGET use BitMap
+ -2 C_SYNC_TARGET set BitMap
+ -100 after split brain, disconnect
+-1000 unrelated data
+ */
+static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
+{
+ u64 self, peer;
+ int i, j;
+
+ self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+ peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+
+ *rule_nr = 10;
+ if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
+ return 0;
+
+ *rule_nr = 20;
+ if ((self == UUID_JUST_CREATED || self == (u64)0) &&
+ peer != UUID_JUST_CREATED)
+ return -2;
+
+ *rule_nr = 30;
+ if (self != UUID_JUST_CREATED &&
+ (peer == UUID_JUST_CREATED || peer == (u64)0))
+ return 2;
+
+ if (self == peer) {
+ int rct, dc; /* roles at crash time */
+
+ if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
+
+ if (mdev->agreed_pro_version < 91)
+ return -1001;
+
+ if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
+ (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
+ dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
+ drbd_uuid_set_bm(mdev, 0UL);
+
+ drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+ mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+ *rule_nr = 34;
+ } else {
+ dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
+ *rule_nr = 36;
+ }
+
+ return 1;
+ }
+
+ if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
+
+ if (mdev->agreed_pro_version < 91)
+ return -1001;
+
+ if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
+ (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
+ dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
+
+ mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
+ mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
+ mdev->p_uuid[UI_BITMAP] = 0UL;
+
+ drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+ *rule_nr = 35;
+ } else {
+ dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
+ *rule_nr = 37;
+ }
+
+ return -1;
+ }
+
+ /* Common power [off|failure] */
+ rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
+ (mdev->p_uuid[UI_FLAGS] & 2);
+ /* lowest bit is set when we were primary,
+ * next bit (weight 2) is set when peer was primary */
+ *rule_nr = 40;
+
+ switch (rct) {
+ case 0: /* !self_pri && !peer_pri */ return 0;
+ case 1: /* self_pri && !peer_pri */ return 1;
+ case 2: /* !self_pri && peer_pri */ return -1;
+ case 3: /* self_pri && peer_pri */
+ dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+ return dc ? -1 : 1;
+ }
+ }
+
+ *rule_nr = 50;
+ peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+ if (self == peer)
+ return -1;
+
+ *rule_nr = 51;
+ peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
+ if (self == peer) {
+ self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
+ peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1);
+ if (self == peer) {
+ /* The last P_SYNC_UUID did not get though. Undo the last start of
+ resync as sync source modifications of the peer's UUIDs. */
+
+ if (mdev->agreed_pro_version < 91)
+ return -1001;
+
+ mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
+ mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
+ return -1;
+ }
+ }
+
+ *rule_nr = 60;
+ self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+ for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+ peer = mdev->p_uuid[i] & ~((u64)1);
+ if (self == peer)
+ return -2;
+ }
+
+ *rule_nr = 70;
+ self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+ peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+ if (self == peer)
+ return 1;
+
+ *rule_nr = 71;
+ self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
+ if (self == peer) {
+ self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1);
+ peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
+ if (self == peer) {
+ /* The last P_SYNC_UUID did not get though. Undo the last start of
+ resync as sync source modifications of our UUIDs. */
+
+ if (mdev->agreed_pro_version < 91)
+ return -1001;
+
+ _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
+ _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
+
+ dev_info(DEV, "Undid last start of resync:\n");
+
+ drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+ mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+
+ return 1;
+ }
+ }
+
+
+ *rule_nr = 80;
+ peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+ for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+ self = mdev->ldev->md.uuid[i] & ~((u64)1);
+ if (self == peer)
+ return 2;
+ }
+
+ *rule_nr = 90;
+ self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+ peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+ if (self == peer && self != ((u64)0))
+ return 100;
+
+ *rule_nr = 100;
+ for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+ self = mdev->ldev->md.uuid[i] & ~((u64)1);
+ for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
+ peer = mdev->p_uuid[j] & ~((u64)1);
+ if (self == peer)
+ return -100;
+ }
+ }
+
+ return -1000;
+}
+
+/* drbd_sync_handshake() returns the new conn state on success, or
+ CONN_MASK (-1) on failure.
+ */
+static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
+ enum drbd_disk_state peer_disk) __must_hold(local)
+{
+ int hg, rule_nr;
+ enum drbd_conns rv = C_MASK;
+ enum drbd_disk_state mydisk;
+
+ mydisk = mdev->state.disk;
+ if (mydisk == D_NEGOTIATING)
+ mydisk = mdev->new_state_tmp.disk;
+
+ dev_info(DEV, "drbd_sync_handshake:\n");
+ drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
+ drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
+ mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
+ hg = drbd_uuid_compare(mdev, &rule_nr);
+
+ dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
+
+ if (hg == -1000) {
+ dev_alert(DEV, "Unrelated data, aborting!\n");
+ return C_MASK;
+ }
+ if (hg == -1001) {
+ dev_alert(DEV, "To resolve this both sides have to support at least protocol\n");
+ return C_MASK;
+ }
+
+ if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
+ (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
+ int f = (hg == -100) || abs(hg) == 2;
+ hg = mydisk > D_INCONSISTENT ? 1 : -1;
+ if (f)
+ hg = hg*2;
+ dev_info(DEV, "Becoming sync %s due to disk states.\n",
+ hg > 0 ? "source" : "target");
+ }
+
+ if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) {
+ int pcount = (mdev->state.role == R_PRIMARY)
+ + (peer_role == R_PRIMARY);
+ int forced = (hg == -100);
+
+ switch (pcount) {
+ case 0:
+ hg = drbd_asb_recover_0p(mdev);
+ break;
+ case 1:
+ hg = drbd_asb_recover_1p(mdev);
+ break;
+ case 2:
+ hg = drbd_asb_recover_2p(mdev);
+ break;
+ }
+ if (abs(hg) < 100) {
+ dev_warn(DEV, "Split-Brain detected, %d primaries, "
+ "automatically solved. Sync from %s node\n",
+ pcount, (hg < 0) ? "peer" : "this");
+ if (forced) {
+ dev_warn(DEV, "Doing a full sync, since"
+ " UUIDs where ambiguous.\n");
+ hg = hg*2;
+ }
+ }
+ }
+
+ if (hg == -100) {
+ if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+ hg = -1;
+ if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+ hg = 1;
+
+ if (abs(hg) < 100)
+ dev_warn(DEV, "Split-Brain detected, manually solved. "
+ "Sync from %s node\n",
+ (hg < 0) ? "peer" : "this");
+ }
+
+ if (hg == -100) {
+ dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
+ drbd_khelper(mdev, "split-brain");
+ return C_MASK;
+ }
+
+ if (hg > 0 && mydisk <= D_INCONSISTENT) {
+ dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
+ return C_MASK;
+ }
+
+ if (hg < 0 && /* by intention we do not use mydisk here. */
+ mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
+ switch (mdev->net_conf->rr_conflict) {
+ case ASB_CALL_HELPER:
+ drbd_khelper(mdev, "pri-lost");
+ /* fall through */
+ case ASB_DISCONNECT:
+ dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
+ return C_MASK;
+ case ASB_VIOLENTLY:
+ dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
+ "assumption\n");
+ }
+ }
+
+ if (abs(hg) >= 2) {
+ dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
+ if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
+ return C_MASK;
+ }
+
+ if (hg > 0) { /* become sync source. */
+ rv = C_WF_BITMAP_S;
+ } else if (hg < 0) { /* become sync target */
+ rv = C_WF_BITMAP_T;
+ } else {
+ rv = C_CONNECTED;
+ if (drbd_bm_total_weight(mdev)) {
+ dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
+ drbd_bm_total_weight(mdev));
+ }
+ }
+
+ return rv;
+}
+
+/* returns 1 if invalid */
+static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
+{
+ /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
+ if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
+ (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
+ return 0;
+
+ /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
+ if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
+ self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
+ return 1;
+
+ /* everything else is valid if they are equal on both sides. */
+ if (peer == self)
+ return 0;
+
+ /* everything es is invalid. */
+ return 1;
+}
+
+static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_protocol *p = (struct p_protocol *)h;
+ int header_size, data_size;
+ int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
+ int p_want_lose, p_two_primaries;
+ char p_integrity_alg[SHARED_SECRET_MAX] = "";
+
+ header_size = sizeof(*p) - sizeof(*h);
+ data_size = h->length - header_size;
+
+ if (drbd_recv(mdev, h->payload, header_size) != header_size)
+ return FALSE;
+
+ p_proto = be32_to_cpu(p->protocol);
+ p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
+ p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
+ p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
+ p_want_lose = be32_to_cpu(p->want_lose);
+ p_two_primaries = be32_to_cpu(p->two_primaries);
+
+ if (p_proto != mdev->net_conf->wire_protocol) {
+ dev_err(DEV, "incompatible communication protocols\n");
+ goto disconnect;
+ }
+
+ if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) {
+ dev_err(DEV, "incompatible after-sb-0pri settings\n");
+ goto disconnect;
+ }
+
+ if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) {
+ dev_err(DEV, "incompatible after-sb-1pri settings\n");
+ goto disconnect;
+ }
+
+ if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) {
+ dev_err(DEV, "incompatible after-sb-2pri settings\n");
+ goto disconnect;
+ }
+
+ if (p_want_lose && mdev->net_conf->want_lose) {
+ dev_err(DEV, "both sides have the 'want_lose' flag set\n");
+ goto disconnect;
+ }
+
+ if (p_two_primaries != mdev->net_conf->two_primaries) {
+ dev_err(DEV, "incompatible setting of the two-primaries options\n");
+ goto disconnect;
+ }
+
+ if (mdev->agreed_pro_version >= 87) {
+ unsigned char *my_alg = mdev->net_conf->integrity_alg;
+
+ if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
+ return FALSE;
+
+ p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
+ if (strcmp(p_integrity_alg, my_alg)) {
+ dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
+ goto disconnect;
+ }
+ dev_info(DEV, "data-integrity-alg: %s\n",
+ my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
+ }
+
+ return TRUE;
+
+disconnect:
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+}
+
+/* helper function
+ * input: alg name, feature name
+ * return: NULL (alg name was "")
+ * ERR_PTR(error) if something goes wrong
+ * or the crypto hash ptr, if it worked out ok. */
+struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
+ const char *alg, const char *name)
+{
+ struct crypto_hash *tfm;
+
+ if (!alg[0])
+ return NULL;
+
+ tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm)) {
+ dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
+ alg, name, PTR_ERR(tfm));
+ return tfm;
+ }
+ if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
+ crypto_free_hash(tfm);
+ dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
+ return ERR_PTR(-EINVAL);
+ }
+ return tfm;
+}
+
+static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
+{
+ int ok = TRUE;
+ struct p_rs_param_89 *p = (struct p_rs_param_89 *)h;
+ unsigned int header_size, data_size, exp_max_sz;
+ struct crypto_hash *verify_tfm = NULL;
+ struct crypto_hash *csums_tfm = NULL;
+ const int apv = mdev->agreed_pro_version;
+
+ exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
+ : apv == 88 ? sizeof(struct p_rs_param)
+ + SHARED_SECRET_MAX
+ : /* 89 */ sizeof(struct p_rs_param_89);
+
+ if (h->length > exp_max_sz) {
+ dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
+ h->length, exp_max_sz);
+ return FALSE;
+ }
+
+ if (apv <= 88) {
+ header_size = sizeof(struct p_rs_param) - sizeof(*h);
+ data_size = h->length - header_size;
+ } else /* apv >= 89 */ {
+ header_size = sizeof(struct p_rs_param_89) - sizeof(*h);
+ data_size = h->length - header_size;
+ D_ASSERT(data_size == 0);
+ }
+
+ /* initialize verify_alg and csums_alg */
+ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+ if (drbd_recv(mdev, h->payload, header_size) != header_size)
+ return FALSE;
+
+ mdev->sync_conf.rate = be32_to_cpu(p->rate);
+
+ if (apv >= 88) {
+ if (apv == 88) {
+ if (data_size > SHARED_SECRET_MAX) {
+ dev_err(DEV, "verify-alg too long, "
+ "peer wants %u, accepting only %u byte\n",
+ data_size, SHARED_SECRET_MAX);
+ return FALSE;
+ }
+
+ if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
+ return FALSE;
+
+ /* we expect NUL terminated string */
+ /* but just in case someone tries to be evil */
+ D_ASSERT(p->verify_alg[data_size-1] == 0);
+ p->verify_alg[data_size-1] = 0;
+
+ } else /* apv >= 89 */ {
+ /* we still expect NUL terminated strings */
+ /* but just in case someone tries to be evil */
+ D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
+ D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
+ p->verify_alg[SHARED_SECRET_MAX-1] = 0;
+ p->csums_alg[SHARED_SECRET_MAX-1] = 0;
+ }
+
+ if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+ if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+ dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
+ mdev->sync_conf.verify_alg, p->verify_alg);
+ goto disconnect;
+ }
+ verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
+ p->verify_alg, "verify-alg");
+ if (IS_ERR(verify_tfm)) {
+ verify_tfm = NULL;
+ goto disconnect;
+ }
+ }
+
+ if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+ if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+ dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
+ mdev->sync_conf.csums_alg, p->csums_alg);
+ goto disconnect;
+ }
+ csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
+ p->csums_alg, "csums-alg");
+ if (IS_ERR(csums_tfm)) {
+ csums_tfm = NULL;
+ goto disconnect;
+ }
+ }
+
+
+ spin_lock(&mdev->peer_seq_lock);
+ /* lock against drbd_nl_syncer_conf() */
+ if (verify_tfm) {
+ strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
+ mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
+ crypto_free_hash(mdev->verify_tfm);
+ mdev->verify_tfm = verify_tfm;
+ dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+ }
+ if (csums_tfm) {
+ strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
+ mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
+ crypto_free_hash(mdev->csums_tfm);
+ mdev->csums_tfm = csums_tfm;
+ dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+ }
+ spin_unlock(&mdev->peer_seq_lock);
+ }
+
+ return ok;
+disconnect:
+ /* just for completeness: actually not needed,
+ * as this is not reached if csums_tfm was ok. */
+ crypto_free_hash(csums_tfm);
+ /* but free the verify_tfm again, if csums_tfm did not work out */
+ crypto_free_hash(verify_tfm);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+}
+
+static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
+{
+ /* sorry, we currently have no working implementation
+ * of distributed TCQ */
+}
+
+/* warn if the arguments differ by more than 12.5% */
+static void warn_if_differ_considerably(struct drbd_conf *mdev,
+ const char *s, sector_t a, sector_t b)
+{
+ sector_t d;
+ if (a == 0 || b == 0)
+ return;
+ d = (a > b) ? (a - b) : (b - a);
+ if (d > (a>>3) || d > (b>>3))
+ dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
+ (unsigned long long)a, (unsigned long long)b);
+}
+
+static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_sizes *p = (struct p_sizes *)h;
+ enum determine_dev_size dd = unchanged;
+ unsigned int max_seg_s;
+ sector_t p_size, p_usize, my_usize;
+ int ldsc = 0; /* local disk size changed */
+ enum drbd_conns nconn;
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+ if (drbd_recv(mdev, h->payload, h->length) != h->length)
+ return FALSE;
+
+ p_size = be64_to_cpu(p->d_size);
+ p_usize = be64_to_cpu(p->u_size);
+
+ if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
+ dev_err(DEV, "some backing storage is needed\n");
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+
+ /* just store the peer's disk size for now.
+ * we still need to figure out whether we accept that. */
+ mdev->p_size = p_size;
+
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+ if (get_ldev(mdev)) {
+ warn_if_differ_considerably(mdev, "lower level device sizes",
+ p_size, drbd_get_max_capacity(mdev->ldev));
+ warn_if_differ_considerably(mdev, "user requested size",
+ p_usize, mdev->ldev->dc.disk_size);
+
+ /* if this is the first connect, or an otherwise expected
+ * param exchange, choose the minimum */
+ if (mdev->state.conn == C_WF_REPORT_PARAMS)
+ p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
+ p_usize);
+
+ my_usize = mdev->ldev->dc.disk_size;
+
+ if (mdev->ldev->dc.disk_size != p_usize) {
+ mdev->ldev->dc.disk_size = p_usize;
+ dev_info(DEV, "Peer sets u_size to %lu sectors\n",
+ (unsigned long)mdev->ldev->dc.disk_size);
+ }
+
+ /* Never shrink a device with usable data during connect.
+ But allow online shrinking if we are connected. */
+ if (drbd_new_dev_size(mdev, mdev->ldev) <
+ drbd_get_capacity(mdev->this_bdev) &&
+ mdev->state.disk >= D_OUTDATED &&
+ mdev->state.conn < C_CONNECTED) {
+ dev_err(DEV, "The peer's disk size is too small!\n");
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ mdev->ldev->dc.disk_size = my_usize;
+ put_ldev(mdev);
+ return FALSE;
+ }
+ put_ldev(mdev);
+ }
+#undef min_not_zero
+
+ if (get_ldev(mdev)) {
+ dd = drbd_determin_dev_size(mdev);
+ put_ldev(mdev);
+ if (dd == dev_size_error)
+ return FALSE;
+ drbd_md_sync(mdev);
+ } else {
+ /* I am diskless, need to accept the peer's size. */
+ drbd_set_my_capacity(mdev, p_size);
+ }
+
+ if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+ nconn = drbd_sync_handshake(mdev,
+ mdev->state.peer, mdev->state.pdsk);
+ put_ldev(mdev);
+
+ if (nconn == C_MASK) {
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+
+ if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) {
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+ }
+
+ if (get_ldev(mdev)) {
+ if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+ mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+ ldsc = 1;
+ }
+
+ max_seg_s = be32_to_cpu(p->max_segment_size);
+ if (max_seg_s != queue_max_segment_size(mdev->rq_queue))
+ drbd_setup_queue_param(mdev, max_seg_s);
+
+ drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type));
+ put_ldev(mdev);
+ }
+
+ if (mdev->state.conn > C_WF_REPORT_PARAMS) {
+ if (be64_to_cpu(p->c_size) !=
+ drbd_get_capacity(mdev->this_bdev) || ldsc) {
+ /* we have different sizes, probably peer
+ * needs to know my new size... */
+ drbd_send_sizes(mdev, 0);
+ }
+ if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
+ (dd == grew && mdev->state.conn == C_CONNECTED)) {
+ if (mdev->state.pdsk >= D_INCONSISTENT &&
+ mdev->state.disk >= D_INCONSISTENT)
+ resync_after_online_grow(mdev);
+ else
+ set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+ }
+ }
+
+ return TRUE;
+}
+
+static int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_uuids *p = (struct p_uuids *)h;
+ u64 *p_uuid;
+ int i;
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+ if (drbd_recv(mdev, h->payload, h->length) != h->length)
+ return FALSE;
+
+ p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+
+ for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
+ p_uuid[i] = be64_to_cpu(p->uuid[i]);
+
+ kfree(mdev->p_uuid);
+ mdev->p_uuid = p_uuid;
+
+ if (mdev->state.conn < C_CONNECTED &&
+ mdev->state.disk < D_INCONSISTENT &&
+ mdev->state.role == R_PRIMARY &&
+ (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
+ dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
+ (unsigned long long)mdev->ed_uuid);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+
+ if (get_ldev(mdev)) {
+ int skip_initial_sync =
+ mdev->state.conn == C_CONNECTED &&
+ mdev->agreed_pro_version >= 90 &&
+ mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
+ (p_uuid[UI_FLAGS] & 8);
+ if (skip_initial_sync) {
+ dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
+ drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+ "clear_n_write from receive_uuids");
+ _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
+ _drbd_uuid_set(mdev, UI_BITMAP, 0);
+ _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+ CS_VERBOSE, NULL);
+ drbd_md_sync(mdev);
+ }
+ put_ldev(mdev);
+ }
+
+ /* Before we test for the disk state, we should wait until an eventually
+ ongoing cluster wide state change is finished. That is important if
+ we are primary and are detaching from our disk. We need to see the
+ new disk state... */
+ wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+ if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
+ drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+
+ return TRUE;
+}
+
+/**
+ * convert_state() - Converts the peer's view of the cluster state to our point of view
+ * @ps: The state as seen by the peer.
+ */
+static union drbd_state convert_state(union drbd_state ps)
+{
+ union drbd_state ms;
+
+ static enum drbd_conns c_tab[] = {
+ [C_CONNECTED] = C_CONNECTED,
+
+ [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
+ [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
+ [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
+ [C_VERIFY_S] = C_VERIFY_T,
+ [C_MASK] = C_MASK,
+ };
+
+ ms.i = ps.i;
+
+ ms.conn = c_tab[ps.conn];
+ ms.peer = ps.role;
+ ms.role = ps.peer;
+ ms.pdsk = ps.disk;
+ ms.disk = ps.pdsk;
+ ms.peer_isp = (ps.aftr_isp | ps.user_isp);
+
+ return ms;
+}
+
+static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_req_state *p = (struct p_req_state *)h;
+ union drbd_state mask, val;
+ int rv;
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+ if (drbd_recv(mdev, h->payload, h->length) != h->length)
+ return FALSE;
+
+ mask.i = be32_to_cpu(p->mask);
+ val.i = be32_to_cpu(p->val);
+
+ if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
+ test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
+ drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
+ return TRUE;
+ }
+
+ mask = convert_state(mask);
+ val = convert_state(val);
+
+ rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
+
+ drbd_send_sr_reply(mdev, rv);
+ drbd_md_sync(mdev);
+
+ return TRUE;
+}
+
+static int receive_state(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_state *p = (struct p_state *)h;
+ enum drbd_conns nconn, oconn;
+ union drbd_state ns, peer_state;
+ enum drbd_disk_state real_peer_disk;
+ int rv;
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h)))
+ return FALSE;
+
+ if (drbd_recv(mdev, h->payload, h->length) != h->length)
+ return FALSE;
+
+ peer_state.i = be32_to_cpu(p->state);
+
+ real_peer_disk = peer_state.disk;
+ if (peer_state.disk == D_NEGOTIATING) {
+ real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
+ dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ retry:
+ oconn = nconn = mdev->state.conn;
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (nconn == C_WF_REPORT_PARAMS)
+ nconn = C_CONNECTED;
+
+ if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
+ get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ int cr; /* consider resync */
+
+ /* if we established a new connection */
+ cr = (oconn < C_CONNECTED);
+ /* if we had an established connection
+ * and one of the nodes newly attaches a disk */
+ cr |= (oconn == C_CONNECTED &&
+ (peer_state.disk == D_NEGOTIATING ||
+ mdev->state.disk == D_NEGOTIATING));
+ /* if we have both been inconsistent, and the peer has been
+ * forced to be UpToDate with --overwrite-data */
+ cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
+ /* if we had been plain connected, and the admin requested to
+ * start a sync by "invalidate" or "invalidate-remote" */
+ cr |= (oconn == C_CONNECTED &&
+ (peer_state.conn >= C_STARTING_SYNC_S &&
+ peer_state.conn <= C_WF_BITMAP_T));
+
+ if (cr)
+ nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+
+ put_ldev(mdev);
+ if (nconn == C_MASK) {
+ if (mdev->state.disk == D_NEGOTIATING) {
+ drbd_force_state(mdev, NS(disk, D_DISKLESS));
+ nconn = C_CONNECTED;
+ } else if (peer_state.disk == D_NEGOTIATING) {
+ dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
+ peer_state.disk = D_DISKLESS;
+ } else {
+ D_ASSERT(oconn == C_WF_REPORT_PARAMS);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+ }
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.conn != oconn)
+ goto retry;
+ clear_bit(CONSIDER_RESYNC, &mdev->flags);
+ ns.i = mdev->state.i;
+ ns.conn = nconn;
+ ns.peer = peer_state.role;
+ ns.pdsk = real_peer_disk;
+ ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
+ if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+ ns.disk = mdev->new_state_tmp.disk;
+
+ rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL);
+ ns = mdev->state;
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (rv < SS_SUCCESS) {
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return FALSE;
+ }
+
+ if (oconn > C_WF_REPORT_PARAMS) {
+ if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+ peer_state.disk != D_NEGOTIATING ) {
+ /* we want resync, peer has not yet decided to sync... */
+ /* Nowadays only used when forcing a node into primary role and
+ setting its disk to UpToDate with that */
+ drbd_send_uuids(mdev);
+ drbd_send_state(mdev);
+ }
+ }
+
+ mdev->net_conf->want_lose = 0;
+
+ drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
+
+ return TRUE;
+}
+
+static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_rs_uuid *p = (struct p_rs_uuid *)h;
+
+ wait_event(mdev->misc_wait,
+ mdev->state.conn == C_WF_SYNC_UUID ||
+ mdev->state.conn < C_CONNECTED ||
+ mdev->state.disk < D_NEGOTIATING);
+
+ /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
+
+ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
+ if (drbd_recv(mdev, h->payload, h->length) != h->length)
+ return FALSE;
+
+ /* Here the _drbd_uuid_ functions are right, current should
+ _not_ be rotated into the history */
+ if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
+ _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
+
+ drbd_start_resync(mdev, C_SYNC_TARGET);
+
+ put_ldev(mdev);
+ } else
+ dev_err(DEV, "Ignoring SyncUUID packet!\n");
+
+ return TRUE;
+}
+
+enum receive_bitmap_ret { OK, DONE, FAILED };
+
+static enum receive_bitmap_ret
+receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h,
+ unsigned long *buffer, struct bm_xfer_ctx *c)
+{
+ unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+ unsigned want = num_words * sizeof(long);
+
+ if (want != h->length) {
+ dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length);
+ return FAILED;
+ }
+ if (want == 0)
+ return DONE;
+ if (drbd_recv(mdev, buffer, want) != want)
+ return FAILED;
+
+ drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
+
+ c->word_offset += num_words;
+ c->bit_offset = c->word_offset * BITS_PER_LONG;
+ if (c->bit_offset > c->bm_bits)
+ c->bit_offset = c->bm_bits;
+
+ return OK;
+}
+
+static enum receive_bitmap_ret
+recv_bm_rle_bits(struct drbd_conf *mdev,
+ struct p_compressed_bm *p,
+ struct bm_xfer_ctx *c)
+{
+ struct bitstream bs;
+ u64 look_ahead;
+ u64 rl;
+ u64 tmp;
+ unsigned long s = c->bit_offset;
+ unsigned long e;
+ int len = p->head.length - (sizeof(*p) - sizeof(p->head));
+ int toggle = DCBP_get_start(p);
+ int have;
+ int bits;
+
+ bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
+
+ bits = bitstream_get_bits(&bs, &look_ahead, 64);
+ if (bits < 0)
+ return FAILED;
+
+ for (have = bits; have > 0; s += rl, toggle = !toggle) {
+ bits = vli_decode_bits(&rl, look_ahead);
+ if (bits <= 0)
+ return FAILED;
+
+ if (toggle) {
+ e = s + rl -1;
+ if (e >= c->bm_bits) {
+ dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
+ return FAILED;
+ }
+ _drbd_bm_set_bits(mdev, s, e);
+ }
+
+ if (have < bits) {
+ dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
+ have, bits, look_ahead,
+ (unsigned int)(bs.cur.b - p->code),
+ (unsigned int)bs.buf_len);
+ return FAILED;
+ }
+ look_ahead >>= bits;
+ have -= bits;
+
+ bits = bitstream_get_bits(&bs, &tmp, 64 - have);
+ if (bits < 0)
+ return FAILED;
+ look_ahead |= tmp << have;
+ have += bits;
+ }
+
+ c->bit_offset = s;
+ bm_xfer_ctx_bit_to_word_offset(c);
+
+ return (s == c->bm_bits) ? DONE : OK;
+}
+
+static enum receive_bitmap_ret
+decode_bitmap_c(struct drbd_conf *mdev,
+ struct p_compressed_bm *p,
+ struct bm_xfer_ctx *c)
+{
+ if (DCBP_get_code(p) == RLE_VLI_Bits)
+ return recv_bm_rle_bits(mdev, p, c);
+
+ /* other variants had been implemented for evaluation,
+ * but have been dropped as this one turned out to be "best"
+ * during all our tests. */
+
+ dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+ return FAILED;
+}
+
+void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+ const char *direction, struct bm_xfer_ctx *c)
+{
+ /* what would it take to transfer it "plaintext" */
+ unsigned plain = sizeof(struct p_header) *
+ ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
+ + c->bm_words * sizeof(long);
+ unsigned total = c->bytes[0] + c->bytes[1];
+ unsigned r;
+
+ /* total can not be zero. but just in case: */
+ if (total == 0)
+ return;
+
+ /* don't report if not compressed */
+ if (total >= plain)
+ return;
+
+ /* total < plain. check for overflow, still */
+ r = (total > UINT_MAX/1000) ? (total / (plain/1000))
+ : (1000 * total / plain);
+
+ if (r > 1000)
+ r = 1000;
+
+ r = 1000 - r;
+ dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
+ "total %u; compression: %u.%u%%\n",
+ direction,
+ c->bytes[1], c->packets[1],
+ c->bytes[0], c->packets[0],
+ total, r/10, r % 10);
+}
+
+/* Since we are processing the bitfield from lower addresses to higher,
+ it does not matter if the process it in 32 bit chunks or 64 bit
+ chunks as long as it is little endian. (Understand it as byte stream,
+ beginning with the lowest byte...) If we would use big endian
+ we would need to process it from the highest address to the lowest,
+ in order to be agnostic to the 32 vs 64 bits issue.
+
+ returns 0 on failure, 1 if we successfully received it. */
+static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct bm_xfer_ctx c;
+ void *buffer;
+ enum receive_bitmap_ret ret;
+ int ok = FALSE;
+
+ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+
+ drbd_bm_lock(mdev, "receive bitmap");
+
+ /* maybe we should use some per thread scratch page,
+ * and allocate that during initial device creation? */
+ buffer = (unsigned long *) __get_free_page(GFP_NOIO);
+ if (!buffer) {
+ dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+ goto out;
+ }
+
+ c = (struct bm_xfer_ctx) {
+ .bm_bits = drbd_bm_bits(mdev),
+ .bm_words = drbd_bm_words(mdev),
+ };
+
+ do {
+ if (h->command == P_BITMAP) {
+ ret = receive_bitmap_plain(mdev, h, buffer, &c);
+ } else if (h->command == P_COMPRESSED_BITMAP) {
+ /* MAYBE: sanity check that we speak proto >= 90,
+ * and the feature is enabled! */
+ struct p_compressed_bm *p;
+
+ if (h->length > BM_PACKET_PAYLOAD_BYTES) {
+ dev_err(DEV, "ReportCBitmap packet too large\n");
+ goto out;
+ }
+ /* use the page buff */
+ p = buffer;
+ memcpy(p, h, sizeof(*h));
+ if (drbd_recv(mdev, p->head.payload, h->length) != h->length)
+ goto out;
+ if (p->head.length <= (sizeof(*p) - sizeof(p->head))) {
+ dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length);
+ return FAILED;
+ }
+ ret = decode_bitmap_c(mdev, p, &c);
+ } else {
+ dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command);
+ goto out;
+ }
+
+ c.packets[h->command == P_BITMAP]++;
+ c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length;
+
+ if (ret != OK)
+ break;
+
+ if (!drbd_recv_header(mdev, h))
+ goto out;
+ } while (ret == OK);
+ if (ret == FAILED)
+ goto out;
+
+ INFO_bm_xfer_stats(mdev, "receive", &c);
+
+ if (mdev->state.conn == C_WF_BITMAP_T) {
+ ok = !drbd_send_bitmap(mdev);
+ if (!ok)
+ goto out;
+ /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
+ ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+ D_ASSERT(ok == SS_SUCCESS);
+ } else if (mdev->state.conn != C_WF_BITMAP_S) {
+ /* admin may have requested C_DISCONNECTING,
+ * other threads may have noticed network errors */
+ dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
+ drbd_conn_str(mdev->state.conn));
+ }
+
+ ok = TRUE;
+ out:
+ drbd_bm_unlock(mdev);
+ if (ok && mdev->state.conn == C_WF_BITMAP_S)
+ drbd_start_resync(mdev, C_SYNC_SOURCE);
+ free_page((unsigned long) buffer);
+ return ok;
+}
+
+static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+{
+ /* TODO zero copy sink :) */
+ static char sink[128];
+ int size, want, r;
+
+ dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
+ h->command, h->length);
+
+ size = h->length;
+ while (size > 0) {
+ want = min_t(int, size, sizeof(sink));
+ r = drbd_recv(mdev, sink, want);
+ ERR_IF(r <= 0) break;
+ size -= r;
+ }
+ return size == 0;
+}
+
+static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
+{
+ if (mdev->state.disk >= D_INCONSISTENT)
+ drbd_kick_lo(mdev);
+
+ /* Make sure we've acked all the TCP data associated
+ * with the data requests being unplugged */
+ drbd_tcp_quickack(mdev->data.socket);
+
+ return TRUE;
+}
+
+typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
+
+static drbd_cmd_handler_f drbd_default_handler[] = {
+ [P_DATA] = receive_Data,
+ [P_DATA_REPLY] = receive_DataReply,
+ [P_RS_DATA_REPLY] = receive_RSDataReply,
+ [P_BARRIER] = receive_Barrier,
+ [P_BITMAP] = receive_bitmap,
+ [P_COMPRESSED_BITMAP] = receive_bitmap,
+ [P_UNPLUG_REMOTE] = receive_UnplugRemote,
+ [P_DATA_REQUEST] = receive_DataRequest,
+ [P_RS_DATA_REQUEST] = receive_DataRequest,
+ [P_SYNC_PARAM] = receive_SyncParam,
+ [P_SYNC_PARAM89] = receive_SyncParam,
+ [P_PROTOCOL] = receive_protocol,
+ [P_UUIDS] = receive_uuids,
+ [P_SIZES] = receive_sizes,
+ [P_STATE] = receive_state,
+ [P_STATE_CHG_REQ] = receive_req_state,
+ [P_SYNC_UUID] = receive_sync_uuid,
+ [P_OV_REQUEST] = receive_DataRequest,
+ [P_OV_REPLY] = receive_DataRequest,
+ [P_CSUM_RS_REQUEST] = receive_DataRequest,
+ /* anything missing from this table is in
+ * the asender_tbl, see get_asender_cmd */
+ [P_MAX_CMD] = NULL,
+};
+
+static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler;
+static drbd_cmd_handler_f *drbd_opt_cmd_handler;
+
+static void drbdd(struct drbd_conf *mdev)
+{
+ drbd_cmd_handler_f handler;
+ struct p_header *header = &mdev->data.rbuf.header;
+
+ while (get_t_state(&mdev->receiver) == Running) {
+ drbd_thread_current_set_cpu(mdev);
+ if (!drbd_recv_header(mdev, header)) {
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+ break;
+ }
+
+ if (header->command < P_MAX_CMD)
+ handler = drbd_cmd_handler[header->command];
+ else if (P_MAY_IGNORE < header->command
+ && header->command < P_MAX_OPT_CMD)
+ handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE];
+ else if (header->command > P_MAX_OPT_CMD)
+ handler = receive_skip;
+ else
+ handler = NULL;
+
+ if (unlikely(!handler)) {
+ dev_err(DEV, "unknown packet type %d, l: %d!\n",
+ header->command, header->length);
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+ break;
+ }
+ if (unlikely(!handler(mdev, header))) {
+ dev_err(DEV, "error receiving %s, l: %d!\n",
+ cmdname(header->command), header->length);
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+ break;
+ }
+ }
+}
+
+static void drbd_fail_pending_reads(struct drbd_conf *mdev)
+{
+ struct hlist_head *slot;
+ struct hlist_node *pos;
+ struct hlist_node *tmp;
+ struct drbd_request *req;
+ int i;
+
+ /*
+ * Application READ requests
+ */
+ spin_lock_irq(&mdev->req_lock);
+ for (i = 0; i < APP_R_HSIZE; i++) {
+ slot = mdev->app_reads_hash+i;
+ hlist_for_each_entry_safe(req, pos, tmp, slot, colision) {
+ /* it may (but should not any longer!)
+ * be on the work queue; if that assert triggers,
+ * we need to also grab the
+ * spin_lock_irq(&mdev->data.work.q_lock);
+ * and list_del_init here. */
+ D_ASSERT(list_empty(&req->w.list));
+ /* It would be nice to complete outside of spinlock.
+ * But this is easier for now. */
+ _req_mod(req, connection_lost_while_pending);
+ }
+ }
+ for (i = 0; i < APP_R_HSIZE; i++)
+ if (!hlist_empty(mdev->app_reads_hash+i))
+ dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: "
+ "%p, should be NULL\n", i, mdev->app_reads_hash[i].first);
+
+ memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+void drbd_flush_workqueue(struct drbd_conf *mdev)
+{
+ struct drbd_wq_barrier barr;
+
+ barr.w.cb = w_prev_work_done;
+ init_completion(&barr.done);
+ drbd_queue_work(&mdev->data.work, &barr.w);
+ wait_for_completion(&barr.done);
+}
+
+static void drbd_disconnect(struct drbd_conf *mdev)
+{
+ enum drbd_fencing_p fp;
+ union drbd_state os, ns;
+ int rv = SS_UNKNOWN_ERROR;
+ unsigned int i;
+
+ if (mdev->state.conn == C_STANDALONE)
+ return;
+ if (mdev->state.conn >= C_WF_CONNECTION)
+ dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n",
+ drbd_conn_str(mdev->state.conn));
+
+ /* asender does not clean up anything. it must not interfere, either */
+ drbd_thread_stop(&mdev->asender);
+
+ mutex_lock(&mdev->data.mutex);
+ drbd_free_sock(mdev);
+ mutex_unlock(&mdev->data.mutex);
+
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+ _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
+ _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
+ spin_unlock_irq(&mdev->req_lock);
+
+ /* We do not have data structures that would allow us to
+ * get the rs_pending_cnt down to 0 again.
+ * * On C_SYNC_TARGET we do not have any data structures describing
+ * the pending RSDataRequest's we have sent.
+ * * On C_SYNC_SOURCE there is no data structure that tracks
+ * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
+ * And no, it is not the sum of the reference counts in the
+ * resync_LRU. The resync_LRU tracks the whole operation including
+ * the disk-IO, while the rs_pending_cnt only tracks the blocks
+ * on the fly. */
+ drbd_rs_cancel_all(mdev);
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+ wake_up(&mdev->misc_wait);
+
+ /* make sure syncer is stopped and w_resume_next_sg queued */
+ del_timer_sync(&mdev->resync_timer);
+ set_bit(STOP_SYNC_TIMER, &mdev->flags);
+ resync_timer_fn((unsigned long)mdev);
+
+ /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
+ * w_make_resync_request etc. which may still be on the worker queue
+ * to be "canceled" */
+ drbd_flush_workqueue(mdev);
+
+ /* This also does reclaim_net_ee(). If we do this too early, we might
+ * miss some resync ee and pages.*/
+ drbd_process_done_ee(mdev);
+
+ kfree(mdev->p_uuid);
+ mdev->p_uuid = NULL;
+
+ if (!mdev->state.susp)
+ tl_clear(mdev);
+
+ drbd_fail_pending_reads(mdev);
+
+ dev_info(DEV, "Connection closed\n");
+
+ drbd_md_sync(mdev);
+
+ fp = FP_DONT_CARE;
+ if (get_ldev(mdev)) {
+ fp = mdev->ldev->dc.fencing;
+ put_ldev(mdev);
+ }
+
+ if (mdev->state.role == R_PRIMARY) {
+ if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) {
+ enum drbd_disk_state nps = drbd_try_outdate_peer(mdev);
+ drbd_request_state(mdev, NS(pdsk, nps));
+ }
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ os = mdev->state;
+ if (os.conn >= C_UNCONNECTED) {
+ /* Do not restart in case we are C_DISCONNECTING */
+ ns = os;
+ ns.conn = C_UNCONNECTED;
+ rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+ }
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (os.conn == C_DISCONNECTING) {
+ struct hlist_head *h;
+ wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0);
+
+ /* we must not free the tl_hash
+ * while application io is still on the fly */
+ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0);
+
+ spin_lock_irq(&mdev->req_lock);
+ /* paranoia code */
+ for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
+ if (h->first)
+ dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
+ (int)(h - mdev->ee_hash), h->first);
+ kfree(mdev->ee_hash);
+ mdev->ee_hash = NULL;
+ mdev->ee_hash_s = 0;
+
+ /* paranoia code */
+ for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
+ if (h->first)
+ dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
+ (int)(h - mdev->tl_hash), h->first);
+ kfree(mdev->tl_hash);
+ mdev->tl_hash = NULL;
+ mdev->tl_hash_s = 0;
+ spin_unlock_irq(&mdev->req_lock);
+
+ crypto_free_hash(mdev->cram_hmac_tfm);
+ mdev->cram_hmac_tfm = NULL;
+
+ kfree(mdev->net_conf);
+ mdev->net_conf = NULL;
+ drbd_request_state(mdev, NS(conn, C_STANDALONE));
+ }
+
+ /* tcp_close and release of sendpage pages can be deferred. I don't
+ * want to use SO_LINGER, because apparently it can be deferred for
+ * more than 20 seconds (longest time I checked).
+ *
+ * Actually we don't care for exactly when the network stack does its
+ * put_page(), but release our reference on these pages right here.
+ */
+ i = drbd_release_ee(mdev, &mdev->net_ee);
+ if (i)
+ dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
+ i = atomic_read(&mdev->pp_in_use);
+ if (i)
+ dev_info(DEV, "pp_in_use = %u, expected 0\n", i);
+
+ D_ASSERT(list_empty(&mdev->read_ee));
+ D_ASSERT(list_empty(&mdev->active_ee));
+ D_ASSERT(list_empty(&mdev->sync_ee));
+ D_ASSERT(list_empty(&mdev->done_ee));
+
+ /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
+ atomic_set(&mdev->current_epoch->epoch_size, 0);
+ D_ASSERT(list_empty(&mdev->current_epoch->list));
+}
+
+/*
+ * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
+ * we can agree on is stored in agreed_pro_version.
+ *
+ * feature flags and the reserved array should be enough room for future
+ * enhancements of the handshake protocol, and possible plugins...
+ *
+ * for now, they are expected to be zero, but ignored.
+ */
+static int drbd_send_handshake(struct drbd_conf *mdev)
+{
+ /* ASSERT current == mdev->receiver ... */
+ struct p_handshake *p = &mdev->data.sbuf.handshake;
+ int ok;
+
+ if (mutex_lock_interruptible(&mdev->data.mutex)) {
+ dev_err(DEV, "interrupted during initial handshake\n");
+ return 0; /* interrupted. not ok. */
+ }
+
+ if (mdev->data.socket == NULL) {
+ mutex_unlock(&mdev->data.mutex);
+ return 0;
+ }
+
+ memset(p, 0, sizeof(*p));
+ p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
+ p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
+ ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
+ (struct p_header *)p, sizeof(*p), 0 );
+ mutex_unlock(&mdev->data.mutex);
+ return ok;
+}
+
+/*
+ * return values:
+ * 1 yes, we have a valid connection
+ * 0 oops, did not work out, please try again
+ * -1 peer talks different language,
+ * no point in trying again, please go standalone.
+ */
+static int drbd_do_handshake(struct drbd_conf *mdev)
+{
+ /* ASSERT current == mdev->receiver ... */
+ struct p_handshake *p = &mdev->data.rbuf.handshake;
+ const int expect = sizeof(struct p_handshake)
+ -sizeof(struct p_header);
+ int rv;
+
+ rv = drbd_send_handshake(mdev);
+ if (!rv)
+ return 0;
+
+ rv = drbd_recv_header(mdev, &p->head);
+ if (!rv)
+ return 0;
+
+ if (p->head.command != P_HAND_SHAKE) {
+ dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
+ cmdname(p->head.command), p->head.command);
+ return -1;
+ }
+
+ if (p->head.length != expect) {
+ dev_err(DEV, "expected HandShake length: %u, received: %u\n",
+ expect, p->head.length);
+ return -1;
+ }
+
+ rv = drbd_recv(mdev, &p->head.payload, expect);
+
+ if (rv != expect) {
+ dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv);
+ return 0;
+ }
+
+ p->protocol_min = be32_to_cpu(p->protocol_min);
+ p->protocol_max = be32_to_cpu(p->protocol_max);
+ if (p->protocol_max == 0)
+ p->protocol_max = p->protocol_min;
+
+ if (PRO_VERSION_MAX < p->protocol_min ||
+ PRO_VERSION_MIN > p->protocol_max)
+ goto incompat;
+
+ mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+
+ dev_info(DEV, "Handshake successful: "
+ "Agreed network protocol version %d\n", mdev->agreed_pro_version);
+
+ return 1;
+
+ incompat:
+ dev_err(DEV, "incompatible DRBD dialects: "
+ "I support %d-%d, peer supports %d-%d\n",
+ PRO_VERSION_MIN, PRO_VERSION_MAX,
+ p->protocol_min, p->protocol_max);
+ return -1;
+}
+
+#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+ dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+ dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+ return 0;
+}
+#else
+#define CHALLENGE_LEN 64
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+ char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
+ struct scatterlist sg;
+ char *response = NULL;
+ char *right_response = NULL;
+ char *peers_ch = NULL;
+ struct p_header p;
+ unsigned int key_len = strlen(mdev->net_conf->shared_secret);
+ unsigned int resp_size;
+ struct hash_desc desc;
+ int rv;
+
+ desc.tfm = mdev->cram_hmac_tfm;
+ desc.flags = 0;
+
+ rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
+ (u8 *)mdev->net_conf->shared_secret, key_len);
+ if (rv) {
+ dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
+ rv = 0;
+ goto fail;
+ }
+
+ get_random_bytes(my_challenge, CHALLENGE_LEN);
+
+ rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
+ if (!rv)
+ goto fail;
+
+ rv = drbd_recv_header(mdev, &p);
+ if (!rv)
+ goto fail;
+
+ if (p.command != P_AUTH_CHALLENGE) {
+ dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
+ cmdname(p.command), p.command);
+ rv = 0;
+ goto fail;
+ }
+
+ if (p.length > CHALLENGE_LEN*2) {
+ dev_err(DEV, "expected AuthChallenge payload too big.\n");
+ rv = 0;
+ goto fail;
+ }
+
+ peers_ch = kmalloc(p.length, GFP_NOIO);
+ if (peers_ch == NULL) {
+ dev_err(DEV, "kmalloc of peers_ch failed\n");
+ rv = 0;
+ goto fail;
+ }
+
+ rv = drbd_recv(mdev, peers_ch, p.length);
+
+ if (rv != p.length) {
+ dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
+ rv = 0;
+ goto fail;
+ }
+
+ resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+ response = kmalloc(resp_size, GFP_NOIO);
+ if (response == NULL) {
+ dev_err(DEV, "kmalloc of response failed\n");
+ rv = 0;
+ goto fail;
+ }
+
+ sg_init_table(&sg, 1);
+ sg_set_buf(&sg, peers_ch, p.length);
+
+ rv = crypto_hash_digest(&desc, &sg, sg.length, response);
+ if (rv) {
+ dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+ rv = 0;
+ goto fail;
+ }
+
+ rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
+ if (!rv)
+ goto fail;
+
+ rv = drbd_recv_header(mdev, &p);
+ if (!rv)
+ goto fail;
+
+ if (p.command != P_AUTH_RESPONSE) {
+ dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
+ cmdname(p.command), p.command);
+ rv = 0;
+ goto fail;
+ }
+
+ if (p.length != resp_size) {
+ dev_err(DEV, "expected AuthResponse payload of wrong size\n");
+ rv = 0;
+ goto fail;
+ }
+
+ rv = drbd_recv(mdev, response , resp_size);
+
+ if (rv != resp_size) {
+ dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+ rv = 0;
+ goto fail;
+ }
+
+ right_response = kmalloc(resp_size, GFP_NOIO);
+ if (response == NULL) {
+ dev_err(DEV, "kmalloc of right_response failed\n");
+ rv = 0;
+ goto fail;
+ }
+
+ sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
+
+ rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
+ if (rv) {
+ dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+ rv = 0;
+ goto fail;
+ }
+
+ rv = !memcmp(response, right_response, resp_size);
+
+ if (rv)
+ dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
+ resp_size, mdev->net_conf->cram_hmac_alg);
+
+ fail:
+ kfree(peers_ch);
+ kfree(response);
+ kfree(right_response);
+
+ return rv;
+}
+#endif
+
+int drbdd_init(struct drbd_thread *thi)
+{
+ struct drbd_conf *mdev = thi->mdev;
+ unsigned int minor = mdev_to_minor(mdev);
+ int h;
+
+ sprintf(current->comm, "drbd%d_receiver", minor);
+
+ dev_info(DEV, "receiver (re)started\n");
+
+ do {
+ h = drbd_connect(mdev);
+ if (h == 0) {
+ drbd_disconnect(mdev);
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ }
+ if (h == -1) {
+ dev_warn(DEV, "Discarding network configuration.\n");
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ }
+ } while (h == 0);
+
+ if (h > 0) {
+ if (get_net_conf(mdev)) {
+ drbdd(mdev);
+ put_net_conf(mdev);
+ }
+ }
+
+ drbd_disconnect(mdev);
+
+ dev_info(DEV, "receiver terminated\n");
+ return 0;
+}
+
+/* ********* acknowledge sender ******** */
+
+static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_req_state_reply *p = (struct p_req_state_reply *)h;
+
+ int retcode = be32_to_cpu(p->retcode);
+
+ if (retcode >= SS_SUCCESS) {
+ set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
+ } else {
+ set_bit(CL_ST_CHG_FAIL, &mdev->flags);
+ dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
+ drbd_set_st_err_str(retcode), retcode);
+ }
+ wake_up(&mdev->state_wait);
+
+ return TRUE;
+}
+
+static int got_Ping(struct drbd_conf *mdev, struct p_header *h)
+{
+ return drbd_send_ping_ack(mdev);
+
+}
+
+static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
+{
+ /* restore idle timeout */
+ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+
+ return TRUE;
+}
+
+static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_block_ack *p = (struct p_block_ack *)h;
+ sector_t sector = be64_to_cpu(p->sector);
+ int blksize = be32_to_cpu(p->blksize);
+
+ D_ASSERT(mdev->agreed_pro_version >= 89);
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+ drbd_rs_complete_io(mdev, sector);
+ drbd_set_in_sync(mdev, sector, blksize);
+ /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
+ mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+ dec_rs_pending(mdev);
+
+ return TRUE;
+}
+
+/* when we receive the ACK for a write request,
+ * verify that we actually know about it */
+static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
+ u64 id, sector_t sector)
+{
+ struct hlist_head *slot = tl_hash_slot(mdev, sector);
+ struct hlist_node *n;
+ struct drbd_request *req;
+
+ hlist_for_each_entry(req, n, slot, colision) {
+ if ((unsigned long)req == (unsigned long)id) {
+ if (req->sector != sector) {
+ dev_err(DEV, "_ack_id_to_req: found req %p but it has "
+ "wrong sector (%llus versus %llus)\n", req,
+ (unsigned long long)req->sector,
+ (unsigned long long)sector);
+ break;
+ }
+ return req;
+ }
+ }
+ dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n",
+ (void *)(unsigned long)id, (unsigned long long)sector);
+ return NULL;
+}
+
+typedef struct drbd_request *(req_validator_fn)
+ (struct drbd_conf *mdev, u64 id, sector_t sector);
+
+static int validate_req_change_req_state(struct drbd_conf *mdev,
+ u64 id, sector_t sector, req_validator_fn validator,
+ const char *func, enum drbd_req_event what)
+{
+ struct drbd_request *req;
+ struct bio_and_error m;
+
+ spin_lock_irq(&mdev->req_lock);
+ req = validator(mdev, id, sector);
+ if (unlikely(!req)) {
+ spin_unlock_irq(&mdev->req_lock);
+ dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func);
+ return FALSE;
+ }
+ __req_mod(req, what, &m);
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (m.bio)
+ complete_master_bio(mdev, &m);
+ return TRUE;
+}
+
+static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_block_ack *p = (struct p_block_ack *)h;
+ sector_t sector = be64_to_cpu(p->sector);
+ int blksize = be32_to_cpu(p->blksize);
+ enum drbd_req_event what;
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+ if (is_syncer_block_id(p->block_id)) {
+ drbd_set_in_sync(mdev, sector, blksize);
+ dec_rs_pending(mdev);
+ return TRUE;
+ }
+ switch (be16_to_cpu(h->command)) {
+ case P_RS_WRITE_ACK:
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+ what = write_acked_by_peer_and_sis;
+ break;
+ case P_WRITE_ACK:
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+ what = write_acked_by_peer;
+ break;
+ case P_RECV_ACK:
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B);
+ what = recv_acked_by_peer;
+ break;
+ case P_DISCARD_ACK:
+ D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+ what = conflict_discarded_by_peer;
+ break;
+ default:
+ D_ASSERT(0);
+ return FALSE;
+ }
+
+ return validate_req_change_req_state(mdev, p->block_id, sector,
+ _ack_id_to_req, __func__ , what);
+}
+
+static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_block_ack *p = (struct p_block_ack *)h;
+ sector_t sector = be64_to_cpu(p->sector);
+
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n");
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+ if (is_syncer_block_id(p->block_id)) {
+ int size = be32_to_cpu(p->blksize);
+ dec_rs_pending(mdev);
+ drbd_rs_failed_io(mdev, sector, size);
+ return TRUE;
+ }
+ return validate_req_change_req_state(mdev, p->block_id, sector,
+ _ack_id_to_req, __func__ , neg_acked);
+}
+
+static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_block_ack *p = (struct p_block_ack *)h;
+ sector_t sector = be64_to_cpu(p->sector);
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+ dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+ (unsigned long long)sector, be32_to_cpu(p->blksize));
+
+ return validate_req_change_req_state(mdev, p->block_id, sector,
+ _ar_id_to_req, __func__ , neg_acked);
+}
+
+static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
+{
+ sector_t sector;
+ int size;
+ struct p_block_ack *p = (struct p_block_ack *)h;
+
+ sector = be64_to_cpu(p->sector);
+ size = be32_to_cpu(p->blksize);
+ D_ASSERT(p->block_id == ID_SYNCER);
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+ dec_rs_pending(mdev);
+
+ if (get_ldev_if_state(mdev, D_FAILED)) {
+ drbd_rs_complete_io(mdev, sector);
+ drbd_rs_failed_io(mdev, sector, size);
+ put_ldev(mdev);
+ }
+
+ return TRUE;
+}
+
+static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_barrier_ack *p = (struct p_barrier_ack *)h;
+
+ tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
+
+ return TRUE;
+}
+
+static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
+{
+ struct p_block_ack *p = (struct p_block_ack *)h;
+ struct drbd_work *w;
+ sector_t sector;
+ int size;
+
+ sector = be64_to_cpu(p->sector);
+ size = be32_to_cpu(p->blksize);
+
+ update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+ if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
+ drbd_ov_oos_found(mdev, sector, size);
+ else
+ ov_oos_print(mdev);
+
+ drbd_rs_complete_io(mdev, sector);
+ dec_rs_pending(mdev);
+
+ if (--mdev->ov_left == 0) {
+ w = kmalloc(sizeof(*w), GFP_NOIO);
+ if (w) {
+ w->cb = w_ov_finished;
+ drbd_queue_work_front(&mdev->data.work, w);
+ } else {
+ dev_err(DEV, "kmalloc(w) failed.");
+ ov_oos_print(mdev);
+ drbd_resync_finished(mdev);
+ }
+ }
+ return TRUE;
+}
+
+struct asender_cmd {
+ size_t pkt_size;
+ int (*process)(struct drbd_conf *mdev, struct p_header *h);
+};
+
+static struct asender_cmd *get_asender_cmd(int cmd)
+{
+ static struct asender_cmd asender_tbl[] = {
+ /* anything missing from this table is in
+ * the drbd_cmd_handler (drbd_default_handler) table,
+ * see the beginning of drbdd() */
+ [P_PING] = { sizeof(struct p_header), got_Ping },
+ [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
+ [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
+ [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
+ [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
+ [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
+ [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
+ [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
+ [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
+ [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
+ [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
+ [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
+ [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
+ [P_MAX_CMD] = { 0, NULL },
+ };
+ if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
+ return NULL;
+ return &asender_tbl[cmd];
+}
+
+int drbd_asender(struct drbd_thread *thi)
+{
+ struct drbd_conf *mdev = thi->mdev;
+ struct p_header *h = &mdev->meta.rbuf.header;
+ struct asender_cmd *cmd = NULL;
+
+ int rv, len;
+ void *buf = h;
+ int received = 0;
+ int expect = sizeof(struct p_header);
+ int empty;
+
+ sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
+
+ current->policy = SCHED_RR; /* Make this a realtime task! */
+ current->rt_priority = 2; /* more important than all other tasks */
+
+ while (get_t_state(thi) == Running) {
+ drbd_thread_current_set_cpu(mdev);
+ if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
+ ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
+ mdev->meta.socket->sk->sk_rcvtimeo =
+ mdev->net_conf->ping_timeo*HZ/10;
+ }
+
+ /* conditionally cork;
+ * it may hurt latency if we cork without much to send */
+ if (!mdev->net_conf->no_cork &&
+ 3 < atomic_read(&mdev->unacked_cnt))
+ drbd_tcp_cork(mdev->meta.socket);
+ while (1) {
+ clear_bit(SIGNAL_ASENDER, &mdev->flags);
+ flush_signals(current);
+ if (!drbd_process_done_ee(mdev)) {
+ dev_err(DEV, "process_done_ee() = NOT_OK\n");
+ goto reconnect;
+ }
+ /* to avoid race with newly queued ACKs */
+ set_bit(SIGNAL_ASENDER, &mdev->flags);
+ spin_lock_irq(&mdev->req_lock);
+ empty = list_empty(&mdev->done_ee);
+ spin_unlock_irq(&mdev->req_lock);
+ /* new ack may have been queued right here,
+ * but then there is also a signal pending,
+ * and we start over... */
+ if (empty)
+ break;
+ }
+ /* but unconditionally uncork unless disabled */
+ if (!mdev->net_conf->no_cork)
+ drbd_tcp_uncork(mdev->meta.socket);
+
+ /* short circuit, recv_msg would return EINTR anyways. */
+ if (signal_pending(current))
+ continue;
+
+ rv = drbd_recv_short(mdev, mdev->meta.socket,
+ buf, expect-received, 0);
+ clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+ flush_signals(current);
+
+ /* Note:
+ * -EINTR (on meta) we got a signal
+ * -EAGAIN (on meta) rcvtimeo expired
+ * -ECONNRESET other side closed the connection
+ * -ERESTARTSYS (on data) we got a signal
+ * rv < 0 other than above: unexpected error!
+ * rv == expected: full header or command
+ * rv < expected: "woken" by signal during receive
+ * rv == 0 : "connection shut down by peer"
+ */
+ if (likely(rv > 0)) {
+ received += rv;
+ buf += rv;
+ } else if (rv == 0) {
+ dev_err(DEV, "meta connection shut down by peer.\n");
+ goto reconnect;
+ } else if (rv == -EAGAIN) {
+ if (mdev->meta.socket->sk->sk_rcvtimeo ==
+ mdev->net_conf->ping_timeo*HZ/10) {
+ dev_err(DEV, "PingAck did not arrive in time.\n");
+ goto reconnect;
+ }
+ set_bit(SEND_PING, &mdev->flags);
+ continue;
+ } else if (rv == -EINTR) {
+ continue;
+ } else {
+ dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+ goto reconnect;
+ }
+
+ if (received == expect && cmd == NULL) {
+ if (unlikely(h->magic != BE_DRBD_MAGIC)) {
+ dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n",
+ (long)be32_to_cpu(h->magic),
+ h->command, h->length);
+ goto reconnect;
+ }
+ cmd = get_asender_cmd(be16_to_cpu(h->command));
+ len = be16_to_cpu(h->length);
+ if (unlikely(cmd == NULL)) {
+ dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n",
+ (long)be32_to_cpu(h->magic),
+ h->command, h->length);
+ goto disconnect;
+ }
+ expect = cmd->pkt_size;
+ ERR_IF(len != expect-sizeof(struct p_header))
+ goto reconnect;
+ }
+ if (received == expect) {
+ D_ASSERT(cmd != NULL);
+ if (!cmd->process(mdev, h))
+ goto reconnect;
+
+ buf = h;
+ received = 0;
+ expect = sizeof(struct p_header);
+ cmd = NULL;
+ }
+ }
+
+ if (0) {
+reconnect:
+ drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+ }
+ if (0) {
+disconnect:
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ }
+ clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+ D_ASSERT(mdev->state.conn < C_CONNECTED);
+ dev_info(DEV, "asender terminated\n");
+
+ return 0;
+}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
new file mode 100644
index 000000000000..de81ab7b4627
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.c
@@ -0,0 +1,1125 @@
+/*
+ drbd_req.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+
+/* Update disk stats at start of I/O request */
+static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
+{
+ const int rw = bio_data_dir(bio);
+ int cpu;
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
+ part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+ part_inc_in_flight(&mdev->vdisk->part0, rw);
+ part_stat_unlock();
+}
+
+/* Update disk stats when completing request upwards */
+static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
+{
+ int rw = bio_data_dir(req->master_bio);
+ unsigned long duration = jiffies - req->start_time;
+ int cpu;
+ cpu = part_stat_lock();
+ part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
+ part_round_stats(cpu, &mdev->vdisk->part0);
+ part_dec_in_flight(&mdev->vdisk->part0, rw);
+ part_stat_unlock();
+}
+
+static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
+{
+ const unsigned long s = req->rq_state;
+ /* if it was a write, we may have to set the corresponding
+ * bit(s) out-of-sync first. If it had a local part, we need to
+ * release the reference to the activity log. */
+ if (rw == WRITE) {
+ /* remove it from the transfer log.
+ * well, only if it had been there in the first
+ * place... if it had not (local only or conflicting
+ * and never sent), it should still be "empty" as
+ * initialized in drbd_req_new(), so we can list_del() it
+ * here unconditionally */
+ list_del(&req->tl_requests);
+ /* Set out-of-sync unless both OK flags are set
+ * (local only or remote failed).
+ * Other places where we set out-of-sync:
+ * READ with local io-error */
+ if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
+ drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+ if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
+ drbd_set_in_sync(mdev, req->sector, req->size);
+
+ /* one might be tempted to move the drbd_al_complete_io
+ * to the local io completion callback drbd_endio_pri.
+ * but, if this was a mirror write, we may only
+ * drbd_al_complete_io after this is RQ_NET_DONE,
+ * otherwise the extent could be dropped from the al
+ * before it has actually been written on the peer.
+ * if we crash before our peer knows about the request,
+ * but after the extent has been dropped from the al,
+ * we would forget to resync the corresponding extent.
+ */
+ if (s & RQ_LOCAL_MASK) {
+ if (get_ldev_if_state(mdev, D_FAILED)) {
+ drbd_al_complete_io(mdev, req->sector);
+ put_ldev(mdev);
+ } else if (__ratelimit(&drbd_ratelimit_state)) {
+ dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
+ "but my Disk seems to have failed :(\n",
+ (unsigned long long) req->sector);
+ }
+ }
+ }
+
+ /* if it was a local io error, we want to notify our
+ * peer about that, and see if we need to
+ * detach the disk and stuff.
+ * to avoid allocating some special work
+ * struct, reuse the request. */
+
+ /* THINK
+ * why do we do this not when we detect the error,
+ * but delay it until it is "done", i.e. possibly
+ * until the next barrier ack? */
+
+ if (rw == WRITE &&
+ ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
+ if (!(req->w.list.next == LIST_POISON1 ||
+ list_empty(&req->w.list))) {
+ /* DEBUG ASSERT only; if this triggers, we
+ * probably corrupt the worker list here */
+ dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
+ dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
+ }
+ req->w.cb = w_io_error;
+ drbd_queue_work(&mdev->data.work, &req->w);
+ /* drbd_req_free() is done in w_io_error */
+ } else {
+ drbd_req_free(req);
+ }
+}
+
+static void queue_barrier(struct drbd_conf *mdev)
+{
+ struct drbd_tl_epoch *b;
+
+ /* We are within the req_lock. Once we queued the barrier for sending,
+ * we set the CREATE_BARRIER bit. It is cleared as soon as a new
+ * barrier/epoch object is added. This is the only place this bit is
+ * set. It indicates that the barrier for this epoch is already queued,
+ * and no new epoch has been created yet. */
+ if (test_bit(CREATE_BARRIER, &mdev->flags))
+ return;
+
+ b = mdev->newest_tle;
+ b->w.cb = w_send_barrier;
+ /* inc_ap_pending done here, so we won't
+ * get imbalanced on connection loss.
+ * dec_ap_pending will be done in got_BarrierAck
+ * or (on connection loss) in tl_clear. */
+ inc_ap_pending(mdev);
+ drbd_queue_work(&mdev->data.work, &b->w);
+ set_bit(CREATE_BARRIER, &mdev->flags);
+}
+
+static void _about_to_complete_local_write(struct drbd_conf *mdev,
+ struct drbd_request *req)
+{
+ const unsigned long s = req->rq_state;
+ struct drbd_request *i;
+ struct drbd_epoch_entry *e;
+ struct hlist_node *n;
+ struct hlist_head *slot;
+
+ /* before we can signal completion to the upper layers,
+ * we may need to close the current epoch */
+ if (mdev->state.conn >= C_CONNECTED &&
+ req->epoch == mdev->newest_tle->br_number)
+ queue_barrier(mdev);
+
+ /* we need to do the conflict detection stuff,
+ * if we have the ee_hash (two_primaries) and
+ * this has been on the network */
+ if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) {
+ const sector_t sector = req->sector;
+ const int size = req->size;
+
+ /* ASSERT:
+ * there must be no conflicting requests, since
+ * they must have been failed on the spot */
+#define OVERLAPS overlaps(sector, size, i->sector, i->size)
+ slot = tl_hash_slot(mdev, sector);
+ hlist_for_each_entry(i, n, slot, colision) {
+ if (OVERLAPS) {
+ dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
+ "other: %p %llus +%u\n",
+ req, (unsigned long long)sector, size,
+ i, (unsigned long long)i->sector, i->size);
+ }
+ }
+
+ /* maybe "wake" those conflicting epoch entries
+ * that wait for this request to finish.
+ *
+ * currently, there can be only _one_ such ee
+ * (well, or some more, which would be pending
+ * P_DISCARD_ACK not yet sent by the asender...),
+ * since we block the receiver thread upon the
+ * first conflict detection, which will wait on
+ * misc_wait. maybe we want to assert that?
+ *
+ * anyways, if we found one,
+ * we just have to do a wake_up. */
+#undef OVERLAPS
+#define OVERLAPS overlaps(sector, size, e->sector, e->size)
+ slot = ee_hash_slot(mdev, req->sector);
+ hlist_for_each_entry(e, n, slot, colision) {
+ if (OVERLAPS) {
+ wake_up(&mdev->misc_wait);
+ break;
+ }
+ }
+ }
+#undef OVERLAPS
+}
+
+void complete_master_bio(struct drbd_conf *mdev,
+ struct bio_and_error *m)
+{
+ bio_endio(m->bio, m->error);
+ dec_ap_bio(mdev);
+}
+
+/* Helper for __req_mod().
+ * Set m->bio to the master bio, if it is fit to be completed,
+ * or leave it alone (it is initialized to NULL in __req_mod),
+ * if it has already been completed, or cannot be completed yet.
+ * If m->bio is set, the error status to be returned is placed in m->error.
+ */
+void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
+{
+ const unsigned long s = req->rq_state;
+ struct drbd_conf *mdev = req->mdev;
+ /* only WRITES may end up here without a master bio (on barrier ack) */
+ int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
+
+ /* we must not complete the master bio, while it is
+ * still being processed by _drbd_send_zc_bio (drbd_send_dblock)
+ * not yet acknowledged by the peer
+ * not yet completed by the local io subsystem
+ * these flags may get cleared in any order by
+ * the worker,
+ * the receiver,
+ * the bio_endio completion callbacks.
+ */
+ if (s & RQ_NET_QUEUED)
+ return;
+ if (s & RQ_NET_PENDING)
+ return;
+ if (s & RQ_LOCAL_PENDING)
+ return;
+
+ if (req->master_bio) {
+ /* this is data_received (remote read)
+ * or protocol C P_WRITE_ACK
+ * or protocol B P_RECV_ACK
+ * or protocol A "handed_over_to_network" (SendAck)
+ * or canceled or failed,
+ * or killed from the transfer log due to connection loss.
+ */
+
+ /*
+ * figure out whether to report success or failure.
+ *
+ * report success when at least one of the operations succeeded.
+ * or, to put the other way,
+ * only report failure, when both operations failed.
+ *
+ * what to do about the failures is handled elsewhere.
+ * what we need to do here is just: complete the master_bio.
+ *
+ * local completion error, if any, has been stored as ERR_PTR
+ * in private_bio within drbd_endio_pri.
+ */
+ int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
+ int error = PTR_ERR(req->private_bio);
+
+ /* remove the request from the conflict detection
+ * respective block_id verification hash */
+ if (!hlist_unhashed(&req->colision))
+ hlist_del(&req->colision);
+ else
+ D_ASSERT((s & RQ_NET_MASK) == 0);
+
+ /* for writes we need to do some extra housekeeping */
+ if (rw == WRITE)
+ _about_to_complete_local_write(mdev, req);
+
+ /* Update disk stats */
+ _drbd_end_io_acct(mdev, req);
+
+ m->error = ok ? 0 : (error ?: -EIO);
+ m->bio = req->master_bio;
+ req->master_bio = NULL;
+ }
+
+ if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
+ /* this is disconnected (local only) operation,
+ * or protocol C P_WRITE_ACK,
+ * or protocol A or B P_BARRIER_ACK,
+ * or killed from the transfer log due to connection loss. */
+ _req_is_done(mdev, req, rw);
+ }
+ /* else: network part and not DONE yet. that is
+ * protocol A or B, barrier ack still pending... */
+}
+
+/*
+ * checks whether there was an overlapping request
+ * or ee already registered.
+ *
+ * if so, return 1, in which case this request is completed on the spot,
+ * without ever being submitted or send.
+ *
+ * return 0 if it is ok to submit this request.
+ *
+ * NOTE:
+ * paranoia: assume something above us is broken, and issues different write
+ * requests for the same block simultaneously...
+ *
+ * To ensure these won't be reordered differently on both nodes, resulting in
+ * diverging data sets, we discard the later one(s). Not that this is supposed
+ * to happen, but this is the rationale why we also have to check for
+ * conflicting requests with local origin, and why we have to do so regardless
+ * of whether we allowed multiple primaries.
+ *
+ * BTW, in case we only have one primary, the ee_hash is empty anyways, and the
+ * second hlist_for_each_entry becomes a noop. This is even simpler than to
+ * grab a reference on the net_conf, and check for the two_primaries flag...
+ */
+static int _req_conflicts(struct drbd_request *req)
+{
+ struct drbd_conf *mdev = req->mdev;
+ const sector_t sector = req->sector;
+ const int size = req->size;
+ struct drbd_request *i;
+ struct drbd_epoch_entry *e;
+ struct hlist_node *n;
+ struct hlist_head *slot;
+
+ D_ASSERT(hlist_unhashed(&req->colision));
+
+ if (!get_net_conf(mdev))
+ return 0;
+
+ /* BUG_ON */
+ ERR_IF (mdev->tl_hash_s == 0)
+ goto out_no_conflict;
+ BUG_ON(mdev->tl_hash == NULL);
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+ slot = tl_hash_slot(mdev, sector);
+ hlist_for_each_entry(i, n, slot, colision) {
+ if (OVERLAPS) {
+ dev_alert(DEV, "%s[%u] Concurrent local write detected! "
+ "[DISCARD L] new: %llus +%u; "
+ "pending: %llus +%u\n",
+ current->comm, current->pid,
+ (unsigned long long)sector, size,
+ (unsigned long long)i->sector, i->size);
+ goto out_conflict;
+ }
+ }
+
+ if (mdev->ee_hash_s) {
+ /* now, check for overlapping requests with remote origin */
+ BUG_ON(mdev->ee_hash == NULL);
+#undef OVERLAPS
+#define OVERLAPS overlaps(e->sector, e->size, sector, size)
+ slot = ee_hash_slot(mdev, sector);
+ hlist_for_each_entry(e, n, slot, colision) {
+ if (OVERLAPS) {
+ dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+ " [DISCARD L] new: %llus +%u; "
+ "pending: %llus +%u\n",
+ current->comm, current->pid,
+ (unsigned long long)sector, size,
+ (unsigned long long)e->sector, e->size);
+ goto out_conflict;
+ }
+ }
+ }
+#undef OVERLAPS
+
+out_no_conflict:
+ /* this is like it should be, and what we expected.
+ * our users do behave after all... */
+ put_net_conf(mdev);
+ return 0;
+
+out_conflict:
+ put_net_conf(mdev);
+ return 1;
+}
+
+/* obviously this could be coded as many single functions
+ * instead of one huge switch,
+ * or by putting the code directly in the respective locations
+ * (as it has been before).
+ *
+ * but having it this way
+ * enforces that it is all in this one place, where it is easier to audit,
+ * it makes it obvious that whatever "event" "happens" to a request should
+ * happen "atomically" within the req_lock,
+ * and it enforces that we have to think in a very structured manner
+ * about the "events" that may happen to a request during its life time ...
+ */
+void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+ struct bio_and_error *m)
+{
+ struct drbd_conf *mdev = req->mdev;
+ m->bio = NULL;
+
+ switch (what) {
+ default:
+ dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
+ break;
+
+ /* does not happen...
+ * initialization done in drbd_req_new
+ case created:
+ break;
+ */
+
+ case to_be_send: /* via network */
+ /* reached via drbd_make_request_common
+ * and from w_read_retry_remote */
+ D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+ req->rq_state |= RQ_NET_PENDING;
+ inc_ap_pending(mdev);
+ break;
+
+ case to_be_submitted: /* locally */
+ /* reached via drbd_make_request_common */
+ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
+ req->rq_state |= RQ_LOCAL_PENDING;
+ break;
+
+ case completed_ok:
+ if (bio_data_dir(req->master_bio) == WRITE)
+ mdev->writ_cnt += req->size>>9;
+ else
+ mdev->read_cnt += req->size>>9;
+
+ req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
+ req->rq_state &= ~RQ_LOCAL_PENDING;
+
+ _req_may_be_done(req, m);
+ put_ldev(mdev);
+ break;
+
+ case write_completed_with_error:
+ req->rq_state |= RQ_LOCAL_COMPLETED;
+ req->rq_state &= ~RQ_LOCAL_PENDING;
+
+ dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
+ (unsigned long long)req->sector, req->size);
+ /* and now: check how to handle local io error. */
+ __drbd_chk_io_error(mdev, FALSE);
+ _req_may_be_done(req, m);
+ put_ldev(mdev);
+ break;
+
+ case read_ahead_completed_with_error:
+ /* it is legal to fail READA */
+ req->rq_state |= RQ_LOCAL_COMPLETED;
+ req->rq_state &= ~RQ_LOCAL_PENDING;
+ _req_may_be_done(req, m);
+ put_ldev(mdev);
+ break;
+
+ case read_completed_with_error:
+ drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+ req->rq_state |= RQ_LOCAL_COMPLETED;
+ req->rq_state &= ~RQ_LOCAL_PENDING;
+
+ dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
+ (unsigned long long)req->sector, req->size);
+ /* _req_mod(req,to_be_send); oops, recursion... */
+ D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+ req->rq_state |= RQ_NET_PENDING;
+ inc_ap_pending(mdev);
+
+ __drbd_chk_io_error(mdev, FALSE);
+ put_ldev(mdev);
+ /* NOTE: if we have no connection,
+ * or know the peer has no good data either,
+ * then we don't actually need to "queue_for_net_read",
+ * but we do so anyways, since the drbd_io_error()
+ * and the potential state change to "Diskless"
+ * needs to be done from process context */
+
+ /* fall through: _req_mod(req,queue_for_net_read); */
+
+ case queue_for_net_read:
+ /* READ or READA, and
+ * no local disk,
+ * or target area marked as invalid,
+ * or just got an io-error. */
+ /* from drbd_make_request_common
+ * or from bio_endio during read io-error recovery */
+
+ /* so we can verify the handle in the answer packet
+ * corresponding hlist_del is in _req_may_be_done() */
+ hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+
+ set_bit(UNPLUG_REMOTE, &mdev->flags);
+
+ D_ASSERT(req->rq_state & RQ_NET_PENDING);
+ req->rq_state |= RQ_NET_QUEUED;
+ req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
+ ? w_read_retry_remote
+ : w_send_read_req;
+ drbd_queue_work(&mdev->data.work, &req->w);
+ break;
+
+ case queue_for_net_write:
+ /* assert something? */
+ /* from drbd_make_request_common only */
+
+ hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
+ /* corresponding hlist_del is in _req_may_be_done() */
+
+ /* NOTE
+ * In case the req ended up on the transfer log before being
+ * queued on the worker, it could lead to this request being
+ * missed during cleanup after connection loss.
+ * So we have to do both operations here,
+ * within the same lock that protects the transfer log.
+ *
+ * _req_add_to_epoch(req); this has to be after the
+ * _maybe_start_new_epoch(req); which happened in
+ * drbd_make_request_common, because we now may set the bit
+ * again ourselves to close the current epoch.
+ *
+ * Add req to the (now) current epoch (barrier). */
+
+ /* otherwise we may lose an unplug, which may cause some remote
+ * io-scheduler timeout to expire, increasing maximum latency,
+ * hurting performance. */
+ set_bit(UNPLUG_REMOTE, &mdev->flags);
+
+ /* see drbd_make_request_common,
+ * just after it grabs the req_lock */
+ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
+
+ req->epoch = mdev->newest_tle->br_number;
+ list_add_tail(&req->tl_requests,
+ &mdev->newest_tle->requests);
+
+ /* increment size of current epoch */
+ mdev->newest_tle->n_req++;
+
+ /* queue work item to send data */
+ D_ASSERT(req->rq_state & RQ_NET_PENDING);
+ req->rq_state |= RQ_NET_QUEUED;
+ req->w.cb = w_send_dblock;
+ drbd_queue_work(&mdev->data.work, &req->w);
+
+ /* close the epoch, in case it outgrew the limit */
+ if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
+ queue_barrier(mdev);
+
+ break;
+
+ case send_canceled:
+ /* treat it the same */
+ case send_failed:
+ /* real cleanup will be done from tl_clear. just update flags
+ * so it is no longer marked as on the worker queue */
+ req->rq_state &= ~RQ_NET_QUEUED;
+ /* if we did it right, tl_clear should be scheduled only after
+ * this, so this should not be necessary! */
+ _req_may_be_done(req, m);
+ break;
+
+ case handed_over_to_network:
+ /* assert something? */
+ if (bio_data_dir(req->master_bio) == WRITE &&
+ mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+ /* this is what is dangerous about protocol A:
+ * pretend it was successfully written on the peer. */
+ if (req->rq_state & RQ_NET_PENDING) {
+ dec_ap_pending(mdev);
+ req->rq_state &= ~RQ_NET_PENDING;
+ req->rq_state |= RQ_NET_OK;
+ } /* else: neg-ack was faster... */
+ /* it is still not yet RQ_NET_DONE until the
+ * corresponding epoch barrier got acked as well,
+ * so we know what to dirty on connection loss */
+ }
+ req->rq_state &= ~RQ_NET_QUEUED;
+ req->rq_state |= RQ_NET_SENT;
+ /* because _drbd_send_zc_bio could sleep, and may want to
+ * dereference the bio even after the "write_acked_by_peer" and
+ * "completed_ok" events came in, once we return from
+ * _drbd_send_zc_bio (drbd_send_dblock), we have to check
+ * whether it is done already, and end it. */
+ _req_may_be_done(req, m);
+ break;
+
+ case connection_lost_while_pending:
+ /* transfer log cleanup after connection loss */
+ /* assert something? */
+ if (req->rq_state & RQ_NET_PENDING)
+ dec_ap_pending(mdev);
+ req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+ req->rq_state |= RQ_NET_DONE;
+ /* if it is still queued, we may not complete it here.
+ * it will be canceled soon. */
+ if (!(req->rq_state & RQ_NET_QUEUED))
+ _req_may_be_done(req, m);
+ break;
+
+ case write_acked_by_peer_and_sis:
+ req->rq_state |= RQ_NET_SIS;
+ case conflict_discarded_by_peer:
+ /* for discarded conflicting writes of multiple primaries,
+ * there is no need to keep anything in the tl, potential
+ * node crashes are covered by the activity log. */
+ if (what == conflict_discarded_by_peer)
+ dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
+ " DRBD is not a random data generator!\n",
+ (unsigned long long)req->sector, req->size);
+ req->rq_state |= RQ_NET_DONE;
+ /* fall through */
+ case write_acked_by_peer:
+ /* protocol C; successfully written on peer.
+ * Nothing to do here.
+ * We want to keep the tl in place for all protocols, to cater
+ * for volatile write-back caches on lower level devices.
+ *
+ * A barrier request is expected to have forced all prior
+ * requests onto stable storage, so completion of a barrier
+ * request could set NET_DONE right here, and not wait for the
+ * P_BARRIER_ACK, but that is an unnecessary optimization. */
+
+ /* this makes it effectively the same as for: */
+ case recv_acked_by_peer:
+ /* protocol B; pretends to be successfully written on peer.
+ * see also notes above in handed_over_to_network about
+ * protocol != C */
+ req->rq_state |= RQ_NET_OK;
+ D_ASSERT(req->rq_state & RQ_NET_PENDING);
+ dec_ap_pending(mdev);
+ req->rq_state &= ~RQ_NET_PENDING;
+ _req_may_be_done(req, m);
+ break;
+
+ case neg_acked:
+ /* assert something? */
+ if (req->rq_state & RQ_NET_PENDING)
+ dec_ap_pending(mdev);
+ req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+
+ req->rq_state |= RQ_NET_DONE;
+ _req_may_be_done(req, m);
+ /* else: done by handed_over_to_network */
+ break;
+
+ case barrier_acked:
+ if (req->rq_state & RQ_NET_PENDING) {
+ /* barrier came in before all requests have been acked.
+ * this is bad, because if the connection is lost now,
+ * we won't be able to clean them up... */
+ dev_err(DEV, "FIXME (barrier_acked but pending)\n");
+ list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
+ }
+ D_ASSERT(req->rq_state & RQ_NET_SENT);
+ req->rq_state |= RQ_NET_DONE;
+ _req_may_be_done(req, m);
+ break;
+
+ case data_received:
+ D_ASSERT(req->rq_state & RQ_NET_PENDING);
+ dec_ap_pending(mdev);
+ req->rq_state &= ~RQ_NET_PENDING;
+ req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
+ _req_may_be_done(req, m);
+ break;
+ };
+}
+
+/* we may do a local read if:
+ * - we are consistent (of course),
+ * - or we are generally inconsistent,
+ * BUT we are still/already IN SYNC for this area.
+ * since size may be bigger than BM_BLOCK_SIZE,
+ * we may need to check several bits.
+ */
+static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+{
+ unsigned long sbnr, ebnr;
+ sector_t esector, nr_sectors;
+
+ if (mdev->state.disk == D_UP_TO_DATE)
+ return 1;
+ if (mdev->state.disk >= D_OUTDATED)
+ return 0;
+ if (mdev->state.disk < D_INCONSISTENT)
+ return 0;
+ /* state.disk == D_INCONSISTENT We will have a look at the BitMap */
+ nr_sectors = drbd_get_capacity(mdev->this_bdev);
+ esector = sector + (size >> 9) - 1;
+
+ D_ASSERT(sector < nr_sectors);
+ D_ASSERT(esector < nr_sectors);
+
+ sbnr = BM_SECT_TO_BIT(sector);
+ ebnr = BM_SECT_TO_BIT(esector);
+
+ return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
+}
+
+static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
+{
+ const int rw = bio_rw(bio);
+ const int size = bio->bi_size;
+ const sector_t sector = bio->bi_sector;
+ struct drbd_tl_epoch *b = NULL;
+ struct drbd_request *req;
+ int local, remote;
+ int err = -EIO;
+
+ /* allocate outside of all locks; */
+ req = drbd_req_new(mdev, bio);
+ if (!req) {
+ dec_ap_bio(mdev);
+ /* only pass the error to the upper layers.
+ * if user cannot handle io errors, that's not our business. */
+ dev_err(DEV, "could not kmalloc() req\n");
+ bio_endio(bio, -ENOMEM);
+ return 0;
+ }
+
+ local = get_ldev(mdev);
+ if (!local) {
+ bio_put(req->private_bio); /* or we get a bio leak */
+ req->private_bio = NULL;
+ }
+ if (rw == WRITE) {
+ remote = 1;
+ } else {
+ /* READ || READA */
+ if (local) {
+ if (!drbd_may_do_local_read(mdev, sector, size)) {
+ /* we could kick the syncer to
+ * sync this extent asap, wait for
+ * it, then continue locally.
+ * Or just issue the request remotely.
+ */
+ local = 0;
+ bio_put(req->private_bio);
+ req->private_bio = NULL;
+ put_ldev(mdev);
+ }
+ }
+ remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
+ }
+
+ /* If we have a disk, but a READA request is mapped to remote,
+ * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
+ * Just fail that READA request right here.
+ *
+ * THINK: maybe fail all READA when not local?
+ * or make this configurable...
+ * if network is slow, READA won't do any good.
+ */
+ if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
+ err = -EWOULDBLOCK;
+ goto fail_and_free_req;
+ }
+
+ /* For WRITES going to the local disk, grab a reference on the target
+ * extent. This waits for any resync activity in the corresponding
+ * resync extent to finish, and, if necessary, pulls in the target
+ * extent into the activity log, which involves further disk io because
+ * of transactional on-disk meta data updates. */
+ if (rw == WRITE && local)
+ drbd_al_begin_io(mdev, sector);
+
+ remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
+ (mdev->state.pdsk == D_INCONSISTENT &&
+ mdev->state.conn >= C_CONNECTED));
+
+ if (!(local || remote)) {
+ dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+ goto fail_free_complete;
+ }
+
+ /* For WRITE request, we have to make sure that we have an
+ * unused_spare_tle, in case we need to start a new epoch.
+ * I try to be smart and avoid to pre-allocate always "just in case",
+ * but there is a race between testing the bit and pointer outside the
+ * spinlock, and grabbing the spinlock.
+ * if we lost that race, we retry. */
+ if (rw == WRITE && remote &&
+ mdev->unused_spare_tle == NULL &&
+ test_bit(CREATE_BARRIER, &mdev->flags)) {
+allocate_barrier:
+ b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
+ if (!b) {
+ dev_err(DEV, "Failed to alloc barrier.\n");
+ err = -ENOMEM;
+ goto fail_free_complete;
+ }
+ }
+
+ /* GOOD, everything prepared, grab the spin_lock */
+ spin_lock_irq(&mdev->req_lock);
+
+ if (remote) {
+ remote = (mdev->state.pdsk == D_UP_TO_DATE ||
+ (mdev->state.pdsk == D_INCONSISTENT &&
+ mdev->state.conn >= C_CONNECTED));
+ if (!remote)
+ dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
+ if (!(local || remote)) {
+ dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+ spin_unlock_irq(&mdev->req_lock);
+ goto fail_free_complete;
+ }
+ }
+
+ if (b && mdev->unused_spare_tle == NULL) {
+ mdev->unused_spare_tle = b;
+ b = NULL;
+ }
+ if (rw == WRITE && remote &&
+ mdev->unused_spare_tle == NULL &&
+ test_bit(CREATE_BARRIER, &mdev->flags)) {
+ /* someone closed the current epoch
+ * while we were grabbing the spinlock */
+ spin_unlock_irq(&mdev->req_lock);
+ goto allocate_barrier;
+ }
+
+
+ /* Update disk stats */
+ _drbd_start_io_acct(mdev, req, bio);
+
+ /* _maybe_start_new_epoch(mdev);
+ * If we need to generate a write barrier packet, we have to add the
+ * new epoch (barrier) object, and queue the barrier packet for sending,
+ * and queue the req's data after it _within the same lock_, otherwise
+ * we have race conditions were the reorder domains could be mixed up.
+ *
+ * Even read requests may start a new epoch and queue the corresponding
+ * barrier packet. To get the write ordering right, we only have to
+ * make sure that, if this is a write request and it triggered a
+ * barrier packet, this request is queued within the same spinlock. */
+ if (remote && mdev->unused_spare_tle &&
+ test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+ _tl_add_barrier(mdev, mdev->unused_spare_tle);
+ mdev->unused_spare_tle = NULL;
+ } else {
+ D_ASSERT(!(remote && rw == WRITE &&
+ test_bit(CREATE_BARRIER, &mdev->flags)));
+ }
+
+ /* NOTE
+ * Actually, 'local' may be wrong here already, since we may have failed
+ * to write to the meta data, and may become wrong anytime because of
+ * local io-error for some other request, which would lead to us
+ * "detaching" the local disk.
+ *
+ * 'remote' may become wrong any time because the network could fail.
+ *
+ * This is a harmless race condition, though, since it is handled
+ * correctly at the appropriate places; so it just defers the failure
+ * of the respective operation.
+ */
+
+ /* mark them early for readability.
+ * this just sets some state flags. */
+ if (remote)
+ _req_mod(req, to_be_send);
+ if (local)
+ _req_mod(req, to_be_submitted);
+
+ /* check this request on the collision detection hash tables.
+ * if we have a conflict, just complete it here.
+ * THINK do we want to check reads, too? (I don't think so...) */
+ if (rw == WRITE && _req_conflicts(req)) {
+ /* this is a conflicting request.
+ * even though it may have been only _partially_
+ * overlapping with one of the currently pending requests,
+ * without even submitting or sending it, we will
+ * pretend that it was successfully served right now.
+ */
+ if (local) {
+ bio_put(req->private_bio);
+ req->private_bio = NULL;
+ drbd_al_complete_io(mdev, req->sector);
+ put_ldev(mdev);
+ local = 0;
+ }
+ if (remote)
+ dec_ap_pending(mdev);
+ _drbd_end_io_acct(mdev, req);
+ /* THINK: do we want to fail it (-EIO), or pretend success? */
+ bio_endio(req->master_bio, 0);
+ req->master_bio = NULL;
+ dec_ap_bio(mdev);
+ drbd_req_free(req);
+ remote = 0;
+ }
+
+ /* NOTE remote first: to get the concurrent write detection right,
+ * we must register the request before start of local IO. */
+ if (remote) {
+ /* either WRITE and C_CONNECTED,
+ * or READ, and no local disk,
+ * or READ, but not in sync.
+ */
+ _req_mod(req, (rw == WRITE)
+ ? queue_for_net_write
+ : queue_for_net_read);
+ }
+ spin_unlock_irq(&mdev->req_lock);
+ kfree(b); /* if someone else has beaten us to it... */
+
+ if (local) {
+ req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+
+ if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
+ : rw == READ ? DRBD_FAULT_DT_RD
+ : DRBD_FAULT_DT_RA))
+ bio_endio(req->private_bio, -EIO);
+ else
+ generic_make_request(req->private_bio);
+ }
+
+ /* we need to plug ALWAYS since we possibly need to kick lo_dev.
+ * we plug after submit, so we won't miss an unplug event */
+ drbd_plug_device(mdev);
+
+ return 0;
+
+fail_free_complete:
+ if (rw == WRITE && local)
+ drbd_al_complete_io(mdev, sector);
+fail_and_free_req:
+ if (local) {
+ bio_put(req->private_bio);
+ req->private_bio = NULL;
+ put_ldev(mdev);
+ }
+ bio_endio(bio, err);
+ drbd_req_free(req);
+ dec_ap_bio(mdev);
+ kfree(b);
+
+ return 0;
+}
+
+/* helper function for drbd_make_request
+ * if we can determine just by the mdev (state) that this request will fail,
+ * return 1
+ * otherwise return 0
+ */
+static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
+{
+ /* Unconfigured */
+ if (mdev->state.conn == C_DISCONNECTING &&
+ mdev->state.disk == D_DISKLESS)
+ return 1;
+
+ if (mdev->state.role != R_PRIMARY &&
+ (!allow_oos || is_write)) {
+ if (__ratelimit(&drbd_ratelimit_state)) {
+ dev_err(DEV, "Process %s[%u] tried to %s; "
+ "since we are not in Primary state, "
+ "we cannot allow this\n",
+ current->comm, current->pid,
+ is_write ? "WRITE" : "READ");
+ }
+ return 1;
+ }
+
+ /*
+ * Paranoia: we might have been primary, but sync target, or
+ * even diskless, then lost the connection.
+ * This should have been handled (panic? suspend?) somewhere
+ * else. But maybe it was not, so check again here.
+ * Caution: as long as we do not have a read/write lock on mdev,
+ * to serialize state changes, this is racy, since we may lose
+ * the connection *after* we test for the cstate.
+ */
+ if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+int drbd_make_request_26(struct request_queue *q, struct bio *bio)
+{
+ unsigned int s_enr, e_enr;
+ struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+
+ if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
+ bio_endio(bio, -EPERM);
+ return 0;
+ }
+
+ /* Reject barrier requests if we know the underlying device does
+ * not support them.
+ * XXX: Need to get this info from peer as well some how so we
+ * XXX: reject if EITHER side/data/metadata area does not support them.
+ *
+ * because of those XXX, this is not yet enabled,
+ * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
+ */
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) {
+ /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
+ bio_endio(bio, -EOPNOTSUPP);
+ return 0;
+ }
+
+ /*
+ * what we "blindly" assume:
+ */
+ D_ASSERT(bio->bi_size > 0);
+ D_ASSERT((bio->bi_size & 0x1ff) == 0);
+ D_ASSERT(bio->bi_idx == 0);
+
+ /* to make some things easier, force alignment of requests within the
+ * granularity of our hash tables */
+ s_enr = bio->bi_sector >> HT_SHIFT;
+ e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+
+ if (likely(s_enr == e_enr)) {
+ inc_ap_bio(mdev, 1);
+ return drbd_make_request_common(mdev, bio);
+ }
+
+ /* can this bio be split generically?
+ * Maybe add our own split-arbitrary-bios function. */
+ if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) {
+ /* rather error out here than BUG in bio_split */
+ dev_err(DEV, "bio would need to, but cannot, be split: "
+ "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
+ bio->bi_vcnt, bio->bi_idx, bio->bi_size,
+ (unsigned long long)bio->bi_sector);
+ bio_endio(bio, -EINVAL);
+ } else {
+ /* This bio crosses some boundary, so we have to split it. */
+ struct bio_pair *bp;
+ /* works for the "do not cross hash slot boundaries" case
+ * e.g. sector 262269, size 4096
+ * s_enr = 262269 >> 6 = 4097
+ * e_enr = (262269+8-1) >> 6 = 4098
+ * HT_SHIFT = 6
+ * sps = 64, mask = 63
+ * first_sectors = 64 - (262269 & 63) = 3
+ */
+ const sector_t sect = bio->bi_sector;
+ const int sps = 1 << HT_SHIFT; /* sectors per slot */
+ const int mask = sps - 1;
+ const sector_t first_sectors = sps - (sect & mask);
+ bp = bio_split(bio,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+ bio_split_pool,
+#endif
+ first_sectors);
+
+ /* we need to get a "reference count" (ap_bio_cnt)
+ * to avoid races with the disconnect/reconnect/suspend code.
+ * In case we need to split the bio here, we need to get two references
+ * atomically, otherwise we might deadlock when trying to submit the
+ * second one! */
+ inc_ap_bio(mdev, 2);
+
+ D_ASSERT(e_enr == s_enr + 1);
+
+ drbd_make_request_common(mdev, &bp->bio1);
+ drbd_make_request_common(mdev, &bp->bio2);
+ bio_pair_release(bp);
+ }
+ return 0;
+}
+
+/* This is called by bio_add_page(). With this function we reduce
+ * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs
+ * units (was AL_EXTENTs).
+ *
+ * we do the calculation within the lower 32bit of the byte offsets,
+ * since we don't care for actual offset, but only check whether it
+ * would cross "activity log extent" boundaries.
+ *
+ * As long as the BIO is empty we have to allow at least one bvec,
+ * regardless of size and offset. so the resulting bio may still
+ * cross extent boundaries. those are dealt with (bio_split) in
+ * drbd_make_request_26.
+ */
+int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
+{
+ struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+ unsigned int bio_offset =
+ (unsigned int)bvm->bi_sector << 9; /* 32 bit */
+ unsigned int bio_size = bvm->bi_size;
+ int limit, backing_limit;
+
+ limit = DRBD_MAX_SEGMENT_SIZE
+ - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size);
+ if (limit < 0)
+ limit = 0;
+ if (bio_size == 0) {
+ if (limit <= bvec->bv_len)
+ limit = bvec->bv_len;
+ } else if (limit && get_ldev(mdev)) {
+ struct request_queue * const b =
+ mdev->ldev->backing_bdev->bd_disk->queue;
+ if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) {
+ backing_limit = b->merge_bvec_fn(b, bvm, bvec);
+ limit = min(limit, backing_limit);
+ }
+ put_ldev(mdev);
+ }
+ return limit;
+}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
new file mode 100644
index 000000000000..f22c1bc8ec7e
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.h
@@ -0,0 +1,326 @@
+/*
+ drbd_req.h
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+ Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+
+ DRBD is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ DRBD is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_REQ_H
+#define _DRBD_REQ_H
+
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+
+/* The request callbacks will be called in irq context by the IDE drivers,
+ and in Softirqs/Tasklets/BH context by the SCSI drivers,
+ and by the receiver and worker in kernel-thread context.
+ Try to get the locking right :) */
+
+/*
+ * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
+ * associated with IO requests originating from the block layer above us.
+ *
+ * There are quite a few things that may happen to a drbd request
+ * during its lifetime.
+ *
+ * It will be created.
+ * It will be marked with the intention to be
+ * submitted to local disk and/or
+ * send via the network.
+ *
+ * It has to be placed on the transfer log and other housekeeping lists,
+ * In case we have a network connection.
+ *
+ * It may be identified as a concurrent (write) request
+ * and be handled accordingly.
+ *
+ * It may me handed over to the local disk subsystem.
+ * It may be completed by the local disk subsystem,
+ * either sucessfully or with io-error.
+ * In case it is a READ request, and it failed locally,
+ * it may be retried remotely.
+ *
+ * It may be queued for sending.
+ * It may be handed over to the network stack,
+ * which may fail.
+ * It may be acknowledged by the "peer" according to the wire_protocol in use.
+ * this may be a negative ack.
+ * It may receive a faked ack when the network connection is lost and the
+ * transfer log is cleaned up.
+ * Sending may be canceled due to network connection loss.
+ * When it finally has outlived its time,
+ * corresponding dirty bits in the resync-bitmap may be cleared or set,
+ * it will be destroyed,
+ * and completion will be signalled to the originator,
+ * with or without "success".
+ */
+
+enum drbd_req_event {
+ created,
+ to_be_send,
+ to_be_submitted,
+
+ /* XXX yes, now I am inconsistent...
+ * these two are not "events" but "actions"
+ * oh, well... */
+ queue_for_net_write,
+ queue_for_net_read,
+
+ send_canceled,
+ send_failed,
+ handed_over_to_network,
+ connection_lost_while_pending,
+ recv_acked_by_peer,
+ write_acked_by_peer,
+ write_acked_by_peer_and_sis, /* and set_in_sync */
+ conflict_discarded_by_peer,
+ neg_acked,
+ barrier_acked, /* in protocol A and B */
+ data_received, /* (remote read) */
+
+ read_completed_with_error,
+ read_ahead_completed_with_error,
+ write_completed_with_error,
+ completed_ok,
+ nothing, /* for tracing only */
+};
+
+/* encoding of request states for now. we don't actually need that many bits.
+ * we don't need to do atomic bit operations either, since most of the time we
+ * need to look at the connection state and/or manipulate some lists at the
+ * same time, so we should hold the request lock anyways.
+ */
+enum drbd_req_state_bits {
+ /* 210
+ * 000: no local possible
+ * 001: to be submitted
+ * UNUSED, we could map: 011: submitted, completion still pending
+ * 110: completed ok
+ * 010: completed with error
+ */
+ __RQ_LOCAL_PENDING,
+ __RQ_LOCAL_COMPLETED,
+ __RQ_LOCAL_OK,
+
+ /* 76543
+ * 00000: no network possible
+ * 00001: to be send
+ * 00011: to be send, on worker queue
+ * 00101: sent, expecting recv_ack (B) or write_ack (C)
+ * 11101: sent,
+ * recv_ack (B) or implicit "ack" (A),
+ * still waiting for the barrier ack.
+ * master_bio may already be completed and invalidated.
+ * 11100: write_acked (C),
+ * data_received (for remote read, any protocol)
+ * or finally the barrier ack has arrived (B,A)...
+ * request can be freed
+ * 01100: neg-acked (write, protocol C)
+ * or neg-d-acked (read, any protocol)
+ * or killed from the transfer log
+ * during cleanup after connection loss
+ * request can be freed
+ * 01000: canceled or send failed...
+ * request can be freed
+ */
+
+ /* if "SENT" is not set, yet, this can still fail or be canceled.
+ * if "SENT" is set already, we still wait for an Ack packet.
+ * when cleared, the master_bio may be completed.
+ * in (B,A) the request object may still linger on the transaction log
+ * until the corresponding barrier ack comes in */
+ __RQ_NET_PENDING,
+
+ /* If it is QUEUED, and it is a WRITE, it is also registered in the
+ * transfer log. Currently we need this flag to avoid conflicts between
+ * worker canceling the request and tl_clear_barrier killing it from
+ * transfer log. We should restructure the code so this conflict does
+ * no longer occur. */
+ __RQ_NET_QUEUED,
+
+ /* well, actually only "handed over to the network stack".
+ *
+ * TODO can potentially be dropped because of the similar meaning
+ * of RQ_NET_SENT and ~RQ_NET_QUEUED.
+ * however it is not exactly the same. before we drop it
+ * we must ensure that we can tell a request with network part
+ * from a request without, regardless of what happens to it. */
+ __RQ_NET_SENT,
+
+ /* when set, the request may be freed (if RQ_NET_QUEUED is clear).
+ * basically this means the corresponding P_BARRIER_ACK was received */
+ __RQ_NET_DONE,
+
+ /* whether or not we know (C) or pretend (B,A) that the write
+ * was successfully written on the peer.
+ */
+ __RQ_NET_OK,
+
+ /* peer called drbd_set_in_sync() for this write */
+ __RQ_NET_SIS,
+
+ /* keep this last, its for the RQ_NET_MASK */
+ __RQ_NET_MAX,
+};
+
+#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
+#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
+#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
+
+#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+
+#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
+#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
+#define RQ_NET_SENT (1UL << __RQ_NET_SENT)
+#define RQ_NET_DONE (1UL << __RQ_NET_DONE)
+#define RQ_NET_OK (1UL << __RQ_NET_OK)
+#define RQ_NET_SIS (1UL << __RQ_NET_SIS)
+
+/* 0x1f8 */
+#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
+
+/* epoch entries */
+static inline
+struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+ BUG_ON(mdev->ee_hash_s == 0);
+ return mdev->ee_hash +
+ ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
+}
+
+/* transfer log (drbd_request objects) */
+static inline
+struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+ BUG_ON(mdev->tl_hash_s == 0);
+ return mdev->tl_hash +
+ ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
+}
+
+/* application reads (drbd_request objects) */
+static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+ return mdev->app_reads_hash
+ + ((unsigned int)(sector) % APP_R_HSIZE);
+}
+
+/* when we receive the answer for a read request,
+ * verify that we actually know about it */
+static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
+ u64 id, sector_t sector)
+{
+ struct hlist_head *slot = ar_hash_slot(mdev, sector);
+ struct hlist_node *n;
+ struct drbd_request *req;
+
+ hlist_for_each_entry(req, n, slot, colision) {
+ if ((unsigned long)req == (unsigned long)id) {
+ D_ASSERT(req->sector == sector);
+ return req;
+ }
+ }
+ return NULL;
+}
+
+static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
+ struct bio *bio_src)
+{
+ struct bio *bio;
+ struct drbd_request *req =
+ mempool_alloc(drbd_request_mempool, GFP_NOIO);
+ if (likely(req)) {
+ bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+
+ req->rq_state = 0;
+ req->mdev = mdev;
+ req->master_bio = bio_src;
+ req->private_bio = bio;
+ req->epoch = 0;
+ req->sector = bio->bi_sector;
+ req->size = bio->bi_size;
+ req->start_time = jiffies;
+ INIT_HLIST_NODE(&req->colision);
+ INIT_LIST_HEAD(&req->tl_requests);
+ INIT_LIST_HEAD(&req->w.list);
+
+ bio->bi_private = req;
+ bio->bi_end_io = drbd_endio_pri;
+ bio->bi_next = NULL;
+ }
+ return req;
+}
+
+static inline void drbd_req_free(struct drbd_request *req)
+{
+ mempool_free(req, drbd_request_mempool);
+}
+
+static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+ return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
+}
+
+/* Short lived temporary struct on the stack.
+ * We could squirrel the error to be returned into
+ * bio->bi_size, or similar. But that would be too ugly. */
+struct bio_and_error {
+ struct bio *bio;
+ int error;
+};
+
+extern void _req_may_be_done(struct drbd_request *req,
+ struct bio_and_error *m);
+extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+ struct bio_and_error *m);
+extern void complete_master_bio(struct drbd_conf *mdev,
+ struct bio_and_error *m);
+
+/* use this if you don't want to deal with calling complete_master_bio()
+ * outside the spinlock, e.g. when walking some list on cleanup. */
+static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
+{
+ struct drbd_conf *mdev = req->mdev;
+ struct bio_and_error m;
+
+ /* __req_mod possibly frees req, do not touch req after that! */
+ __req_mod(req, what, &m);
+ if (m.bio)
+ complete_master_bio(mdev, &m);
+}
+
+/* completion of master bio is outside of spinlock.
+ * If you need it irqsave, do it your self! */
+static inline void req_mod(struct drbd_request *req,
+ enum drbd_req_event what)
+{
+ struct drbd_conf *mdev = req->mdev;
+ struct bio_and_error m;
+ spin_lock_irq(&mdev->req_lock);
+ __req_mod(req, what, &m);
+ spin_unlock_irq(&mdev->req_lock);
+
+ if (m.bio)
+ complete_master_bio(mdev, &m);
+}
+#endif
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
new file mode 100644
index 000000000000..76863e3f05be
--- /dev/null
+++ b/drivers/block/drbd/drbd_strings.c
@@ -0,0 +1,113 @@
+/*
+ drbd.h
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/drbd.h>
+
+static const char *drbd_conn_s_names[] = {
+ [C_STANDALONE] = "StandAlone",
+ [C_DISCONNECTING] = "Disconnecting",
+ [C_UNCONNECTED] = "Unconnected",
+ [C_TIMEOUT] = "Timeout",
+ [C_BROKEN_PIPE] = "BrokenPipe",
+ [C_NETWORK_FAILURE] = "NetworkFailure",
+ [C_PROTOCOL_ERROR] = "ProtocolError",
+ [C_WF_CONNECTION] = "WFConnection",
+ [C_WF_REPORT_PARAMS] = "WFReportParams",
+ [C_TEAR_DOWN] = "TearDown",
+ [C_CONNECTED] = "Connected",
+ [C_STARTING_SYNC_S] = "StartingSyncS",
+ [C_STARTING_SYNC_T] = "StartingSyncT",
+ [C_WF_BITMAP_S] = "WFBitMapS",
+ [C_WF_BITMAP_T] = "WFBitMapT",
+ [C_WF_SYNC_UUID] = "WFSyncUUID",
+ [C_SYNC_SOURCE] = "SyncSource",
+ [C_SYNC_TARGET] = "SyncTarget",
+ [C_PAUSED_SYNC_S] = "PausedSyncS",
+ [C_PAUSED_SYNC_T] = "PausedSyncT",
+ [C_VERIFY_S] = "VerifyS",
+ [C_VERIFY_T] = "VerifyT",
+};
+
+static const char *drbd_role_s_names[] = {
+ [R_PRIMARY] = "Primary",
+ [R_SECONDARY] = "Secondary",
+ [R_UNKNOWN] = "Unknown"
+};
+
+static const char *drbd_disk_s_names[] = {
+ [D_DISKLESS] = "Diskless",
+ [D_ATTACHING] = "Attaching",
+ [D_FAILED] = "Failed",
+ [D_NEGOTIATING] = "Negotiating",
+ [D_INCONSISTENT] = "Inconsistent",
+ [D_OUTDATED] = "Outdated",
+ [D_UNKNOWN] = "DUnknown",
+ [D_CONSISTENT] = "Consistent",
+ [D_UP_TO_DATE] = "UpToDate",
+};
+
+static const char *drbd_state_sw_errors[] = {
+ [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
+ [-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk",
+ [-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
+ [-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
+ [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
+ [-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
+ [-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
+ [-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
+ [-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
+ [-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
+ [-SS_DEVICE_IN_USE] = "Device is held open by someone",
+ [-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
+ [-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
+ [-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
+ [-SS_NOT_SUPPORTED] = "Peer does not support protocol",
+ [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
+ [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
+ [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+};
+
+const char *drbd_conn_str(enum drbd_conns s)
+{
+ /* enums are unsigned... */
+ return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s];
+}
+
+const char *drbd_role_str(enum drbd_role s)
+{
+ return s > R_SECONDARY ? "TOO_LARGE" : drbd_role_s_names[s];
+}
+
+const char *drbd_disk_str(enum drbd_disk_state s)
+{
+ return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s];
+}
+
+const char *drbd_set_st_err_str(enum drbd_state_ret_codes err)
+{
+ return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
+ err > SS_TWO_PRIMARIES ? "TOO_LARGE"
+ : drbd_state_sw_errors[-err];
+}
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
new file mode 100644
index 000000000000..fc824006e721
--- /dev/null
+++ b/drivers/block/drbd/drbd_vli.h
@@ -0,0 +1,351 @@
+/*
+-*- linux-c -*-
+ drbd_receiver.c
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_VLI_H
+#define _DRBD_VLI_H
+
+/*
+ * At a granularity of 4KiB storage represented per bit,
+ * and stroage sizes of several TiB,
+ * and possibly small-bandwidth replication,
+ * the bitmap transfer time can take much too long,
+ * if transmitted in plain text.
+ *
+ * We try to reduce the transfered bitmap information
+ * by encoding runlengths of bit polarity.
+ *
+ * We never actually need to encode a "zero" (runlengths are positive).
+ * But then we have to store the value of the first bit.
+ * The first bit of information thus shall encode if the first runlength
+ * gives the number of set or unset bits.
+ *
+ * We assume that large areas are either completely set or unset,
+ * which gives good compression with any runlength method,
+ * even when encoding the runlength as fixed size 32bit/64bit integers.
+ *
+ * Still, there may be areas where the polarity flips every few bits,
+ * and encoding the runlength sequence of those areas with fix size
+ * integers would be much worse than plaintext.
+ *
+ * We want to encode small runlength values with minimum code length,
+ * while still being able to encode a Huge run of all zeros.
+ *
+ * Thus we need a Variable Length Integer encoding, VLI.
+ *
+ * For some cases, we produce more code bits than plaintext input.
+ * We need to send incompressible chunks as plaintext, skip over them
+ * and then see if the next chunk compresses better.
+ *
+ * We don't care too much about "excellent" compression ratio for large
+ * runlengths (all set/all clear): whether we achieve a factor of 100
+ * or 1000 is not that much of an issue.
+ * We do not want to waste too much on short runlengths in the "noisy"
+ * parts of the bitmap, though.
+ *
+ * There are endless variants of VLI, we experimented with:
+ * * simple byte-based
+ * * various bit based with different code word length.
+ *
+ * To avoid yet an other configuration parameter (choice of bitmap compression
+ * algorithm) which was difficult to explain and tune, we just chose the one
+ * variant that turned out best in all test cases.
+ * Based on real world usage patterns, with device sizes ranging from a few GiB
+ * to several TiB, file server/mailserver/webserver/mysql/postgress,
+ * mostly idle to really busy, the all time winner (though sometimes only
+ * marginally better) is:
+ */
+
+/*
+ * encoding is "visualised" as
+ * __little endian__ bitstream, least significant bit first (left most)
+ *
+ * this particular encoding is chosen so that the prefix code
+ * starts as unary encoding the level, then modified so that
+ * 10 levels can be described in 8bit, with minimal overhead
+ * for the smaller levels.
+ *
+ * Number of data bits follow fibonacci sequence, with the exception of the
+ * last level (+1 data bit, so it makes 64bit total). The only worse code when
+ * encoding bit polarity runlength is 1 plain bits => 2 code bits.
+prefix data bits max val Nº data bits
+0 x 0x2 1
+10 x 0x4 1
+110 xx 0x8 2
+1110 xxx 0x10 3
+11110 xxx xx 0x30 5
+111110 xx xxxxxx 0x130 8
+11111100 xxxxxxxx xxxxx 0x2130 13
+11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21
+11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34
+11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
+ * maximum encodable value: 0x100000400202130 == 2**56 + some */
+
+/* compression "table":
+ transmitted x 0.29
+ as plaintext x ........................
+ x ........................
+ x ........................
+ x 0.59 0.21........................
+ x ........................................................
+ x .. c ...................................................
+ x 0.44.. o ...................................................
+ x .......... d ...................................................
+ x .......... e ...................................................
+ X............. ...................................................
+ x.............. b ...................................................
+2.0x............... i ...................................................
+ #X................ t ...................................................
+ #................. s ........................... plain bits ..........
+-+-----------------------------------------------------------------------
+ 1 16 32 64
+*/
+
+/* LEVEL: (total bits, prefix bits, prefix value),
+ * sorted ascending by number of total bits.
+ * The rest of the code table is calculated at compiletime from this. */
+
+/* fibonacci data 1, 1, ... */
+#define VLI_L_1_1() do { \
+ LEVEL( 2, 1, 0x00); \
+ LEVEL( 3, 2, 0x01); \
+ LEVEL( 5, 3, 0x03); \
+ LEVEL( 7, 4, 0x07); \
+ LEVEL(10, 5, 0x0f); \
+ LEVEL(14, 6, 0x1f); \
+ LEVEL(21, 8, 0x3f); \
+ LEVEL(29, 8, 0x7f); \
+ LEVEL(42, 8, 0xbf); \
+ LEVEL(64, 8, 0xff); \
+ } while (0)
+
+/* finds a suitable level to decode the least significant part of in.
+ * returns number of bits consumed.
+ *
+ * BUG() for bad input, as that would mean a buggy code table. */
+static inline int vli_decode_bits(u64 *out, const u64 in)
+{
+ u64 adj = 1;
+
+#define LEVEL(t,b,v) \
+ do { \
+ if ((in & ((1 << b) -1)) == v) { \
+ *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
+ return t; \
+ } \
+ adj += 1ULL << (t - b); \
+ } while (0)
+
+ VLI_L_1_1();
+
+ /* NOT REACHED, if VLI_LEVELS code table is defined properly */
+ BUG();
+#undef LEVEL
+}
+
+/* return number of code bits needed,
+ * or negative error number */
+static inline int __vli_encode_bits(u64 *out, const u64 in)
+{
+ u64 max = 0;
+ u64 adj = 1;
+
+ if (in == 0)
+ return -EINVAL;
+
+#define LEVEL(t,b,v) do { \
+ max += 1ULL << (t - b); \
+ if (in <= max) { \
+ if (out) \
+ *out = ((in - adj) << b) | v; \
+ return t; \
+ } \
+ adj = max + 1; \
+ } while (0)
+
+ VLI_L_1_1();
+
+ return -EOVERFLOW;
+#undef LEVEL
+}
+
+#undef VLI_L_1_1
+
+/* code from here down is independend of actually used bit code */
+
+/*
+ * Code length is determined by some unique (e.g. unary) prefix.
+ * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
+ * not a byte stream.
+ */
+
+/* for the bitstream, we need a cursor */
+struct bitstream_cursor {
+ /* the current byte */
+ u8 *b;
+ /* the current bit within *b, nomalized: 0..7 */
+ unsigned int bit;
+};
+
+/* initialize cursor to point to first bit of stream */
+static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
+{
+ cur->b = s;
+ cur->bit = 0;
+}
+
+/* advance cursor by that many bits; maximum expected input value: 64,
+ * but depending on VLI implementation, it may be more. */
+static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
+{
+ bits += cur->bit;
+ cur->b = cur->b + (bits >> 3);
+ cur->bit = bits & 7;
+}
+
+/* the bitstream itself knows its length */
+struct bitstream {
+ struct bitstream_cursor cur;
+ unsigned char *buf;
+ size_t buf_len; /* in bytes */
+
+ /* for input stream:
+ * number of trailing 0 bits for padding
+ * total number of valid bits in stream: buf_len * 8 - pad_bits */
+ unsigned int pad_bits;
+};
+
+static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
+{
+ bs->buf = s;
+ bs->buf_len = len;
+ bs->pad_bits = pad_bits;
+ bitstream_cursor_reset(&bs->cur, bs->buf);
+}
+
+static inline void bitstream_rewind(struct bitstream *bs)
+{
+ bitstream_cursor_reset(&bs->cur, bs->buf);
+ memset(bs->buf, 0, bs->buf_len);
+}
+
+/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
+ * Ignores "pad_bits".
+ * Returns zero if bits == 0 (nothing to do).
+ * Returns number of bits used if successful.
+ *
+ * If there is not enough room left in bitstream,
+ * leaves bitstream unchanged and returns -ENOBUFS.
+ */
+static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
+{
+ unsigned char *b = bs->cur.b;
+ unsigned int tmp;
+
+ if (bits == 0)
+ return 0;
+
+ if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
+ return -ENOBUFS;
+
+ /* paranoia: strip off hi bits; they should not be set anyways. */
+ if (bits < 64)
+ val &= ~0ULL >> (64 - bits);
+
+ *b++ |= (val & 0xff) << bs->cur.bit;
+
+ for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
+ *b++ |= (val >> tmp) & 0xff;
+
+ bitstream_cursor_advance(&bs->cur, bits);
+ return bits;
+}
+
+/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
+ *
+ * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
+ *
+ * If there are less than the requested number of valid bits left in the
+ * bitstream, still fetches all available bits.
+ *
+ * Returns number of actually fetched bits.
+ */
+static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
+{
+ u64 val;
+ unsigned int n;
+
+ if (bits > 64)
+ return -EINVAL;
+
+ if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
+ bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
+ - bs->cur.bit - bs->pad_bits;
+
+ if (bits == 0) {
+ *out = 0;
+ return 0;
+ }
+
+ /* get the high bits */
+ val = 0;
+ n = (bs->cur.bit + bits + 7) >> 3;
+ /* n may be at most 9, if cur.bit + bits > 64 */
+ /* which means this copies at most 8 byte */
+ if (n) {
+ memcpy(&val, bs->cur.b+1, n - 1);
+ val = le64_to_cpu(val) << (8 - bs->cur.bit);
+ }
+
+ /* we still need the low bits */
+ val |= bs->cur.b[0] >> bs->cur.bit;
+
+ /* and mask out bits we don't want */
+ val &= ~0ULL >> (64 - bits);
+
+ bitstream_cursor_advance(&bs->cur, bits);
+ *out = val;
+
+ return bits;
+}
+
+/* encodes @in as vli into @bs;
+
+ * return values
+ * > 0: number of bits successfully stored in bitstream
+ * -ENOBUFS @bs is full
+ * -EINVAL input zero (invalid)
+ * -EOVERFLOW input too large for this vli code (invalid)
+ */
+static inline int vli_encode_bits(struct bitstream *bs, u64 in)
+{
+ u64 code = code;
+ int bits = __vli_encode_bits(&code, in);
+
+ if (bits <= 0)
+ return bits;
+
+ return bitstream_put_bits(bs, code, bits);
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
new file mode 100644
index 000000000000..ed8796f1112d
--- /dev/null
+++ b/drivers/block/drbd/drbd_worker.c
@@ -0,0 +1,1512 @@
+/*
+ drbd_worker.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/drbd.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+#define SLEEP_TIME (HZ/10)
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+
+
+
+/* defined here:
+ drbd_md_io_complete
+ drbd_endio_write_sec
+ drbd_endio_read_sec
+ drbd_endio_pri
+
+ * more endio handlers:
+ atodb_endio in drbd_actlog.c
+ drbd_bm_async_io_complete in drbd_bitmap.c
+
+ * For all these callbacks, note the following:
+ * The callbacks will be called in irq context by the IDE drivers,
+ * and in Softirqs/Tasklets/BH context by the SCSI drivers.
+ * Try to get the locking right :)
+ *
+ */
+
+
+/* About the global_state_lock
+ Each state transition on an device holds a read lock. In case we have
+ to evaluate the sync after dependencies, we grab a write lock, because
+ we need stable states on all devices for that. */
+rwlock_t global_state_lock;
+
+/* used for synchronous meta data and bitmap IO
+ * submitted by drbd_md_sync_page_io()
+ */
+void drbd_md_io_complete(struct bio *bio, int error)
+{
+ struct drbd_md_io *md_io;
+
+ md_io = (struct drbd_md_io *)bio->bi_private;
+ md_io->error = error;
+
+ complete(&md_io->event);
+}
+
+/* reads on behalf of the partner,
+ * "submitted" by the receiver
+ */
+void drbd_endio_read_sec(struct bio *bio, int error) __releases(local)
+{
+ unsigned long flags = 0;
+ struct drbd_epoch_entry *e = NULL;
+ struct drbd_conf *mdev;
+ int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+ e = bio->bi_private;
+ mdev = e->mdev;
+
+ if (error)
+ dev_warn(DEV, "read: error=%d s=%llus\n", error,
+ (unsigned long long)e->sector);
+ if (!error && !uptodate) {
+ dev_warn(DEV, "read: setting error to -EIO s=%llus\n",
+ (unsigned long long)e->sector);
+ /* strange behavior of some lower level drivers...
+ * fail the request by clearing the uptodate flag,
+ * but do not return any error?! */
+ error = -EIO;
+ }
+
+ D_ASSERT(e->block_id != ID_VACANT);
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ mdev->read_cnt += e->size >> 9;
+ list_del(&e->w.list);
+ if (list_empty(&mdev->read_ee))
+ wake_up(&mdev->ee_wait);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ drbd_chk_io_error(mdev, error, FALSE);
+ drbd_queue_work(&mdev->data.work, &e->w);
+ put_ldev(mdev);
+}
+
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver.
+ */
+void drbd_endio_write_sec(struct bio *bio, int error) __releases(local)
+{
+ unsigned long flags = 0;
+ struct drbd_epoch_entry *e = NULL;
+ struct drbd_conf *mdev;
+ sector_t e_sector;
+ int do_wake;
+ int is_syncer_req;
+ int do_al_complete_io;
+ int uptodate = bio_flagged(bio, BIO_UPTODATE);
+ int is_barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
+
+ e = bio->bi_private;
+ mdev = e->mdev;
+
+ if (error)
+ dev_warn(DEV, "write: error=%d s=%llus\n", error,
+ (unsigned long long)e->sector);
+ if (!error && !uptodate) {
+ dev_warn(DEV, "write: setting error to -EIO s=%llus\n",
+ (unsigned long long)e->sector);
+ /* strange behavior of some lower level drivers...
+ * fail the request by clearing the uptodate flag,
+ * but do not return any error?! */
+ error = -EIO;
+ }
+
+ /* error == -ENOTSUPP would be a better test,
+ * alas it is not reliable */
+ if (error && is_barrier && e->flags & EE_IS_BARRIER) {
+ drbd_bump_write_ordering(mdev, WO_bdev_flush);
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ list_del(&e->w.list);
+ e->w.cb = w_e_reissue;
+ /* put_ldev actually happens below, once we come here again. */
+ __release(local);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+ drbd_queue_work(&mdev->data.work, &e->w);
+ return;
+ }
+
+ D_ASSERT(e->block_id != ID_VACANT);
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ mdev->writ_cnt += e->size >> 9;
+ is_syncer_req = is_syncer_block_id(e->block_id);
+
+ /* after we moved e to done_ee,
+ * we may no longer access it,
+ * it may be freed/reused already!
+ * (as soon as we release the req_lock) */
+ e_sector = e->sector;
+ do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO;
+
+ list_del(&e->w.list); /* has been on active_ee or sync_ee */
+ list_add_tail(&e->w.list, &mdev->done_ee);
+
+ /* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
+ * neither did we wake possibly waiting conflicting requests.
+ * done from "drbd_process_done_ee" within the appropriate w.cb
+ * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
+
+ do_wake = is_syncer_req
+ ? list_empty(&mdev->sync_ee)
+ : list_empty(&mdev->active_ee);
+
+ if (error)
+ __drbd_chk_io_error(mdev, FALSE);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ if (is_syncer_req)
+ drbd_rs_complete_io(mdev, e_sector);
+
+ if (do_wake)
+ wake_up(&mdev->ee_wait);
+
+ if (do_al_complete_io)
+ drbd_al_complete_io(mdev, e_sector);
+
+ wake_asender(mdev);
+ put_ldev(mdev);
+
+}
+
+/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
+ */
+void drbd_endio_pri(struct bio *bio, int error)
+{
+ unsigned long flags;
+ struct drbd_request *req = bio->bi_private;
+ struct drbd_conf *mdev = req->mdev;
+ struct bio_and_error m;
+ enum drbd_req_event what;
+ int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+ if (error)
+ dev_warn(DEV, "p %s: error=%d\n",
+ bio_data_dir(bio) == WRITE ? "write" : "read", error);
+ if (!error && !uptodate) {
+ dev_warn(DEV, "p %s: setting error to -EIO\n",
+ bio_data_dir(bio) == WRITE ? "write" : "read");
+ /* strange behavior of some lower level drivers...
+ * fail the request by clearing the uptodate flag,
+ * but do not return any error?! */
+ error = -EIO;
+ }
+
+ /* to avoid recursion in __req_mod */
+ if (unlikely(error)) {
+ what = (bio_data_dir(bio) == WRITE)
+ ? write_completed_with_error
+ : (bio_rw(bio) == READA)
+ ? read_completed_with_error
+ : read_ahead_completed_with_error;
+ } else
+ what = completed_ok;
+
+ bio_put(req->private_bio);
+ req->private_bio = ERR_PTR(error);
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+ __req_mod(req, what, &m);
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ if (m.bio)
+ complete_master_bio(mdev, &m);
+}
+
+int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+ /* NOTE: mdev->ldev can be NULL by the time we get here! */
+ /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
+
+ /* the only way this callback is scheduled is from _req_may_be_done,
+ * when it is done and had a local write error, see comments there */
+ drbd_req_free(req);
+
+ return TRUE;
+}
+
+int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+ /* We should not detach for read io-error,
+ * but try to WRITE the P_DATA_REPLY to the failed location,
+ * to give the disk the chance to relocate that block */
+
+ spin_lock_irq(&mdev->req_lock);
+ if (cancel ||
+ mdev->state.conn < C_CONNECTED ||
+ mdev->state.pdsk <= D_INCONSISTENT) {
+ _req_mod(req, send_canceled);
+ spin_unlock_irq(&mdev->req_lock);
+ dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
+ return 1;
+ }
+ spin_unlock_irq(&mdev->req_lock);
+
+ return w_send_read_req(mdev, w, 0);
+}
+
+int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ ERR_IF(cancel) return 1;
+ dev_err(DEV, "resync inactive, but callback triggered??\n");
+ return 1; /* Simply ignore this! */
+}
+
+void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
+{
+ struct hash_desc desc;
+ struct scatterlist sg;
+ struct bio_vec *bvec;
+ int i;
+
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ sg_init_table(&sg, 1);
+ crypto_hash_init(&desc);
+
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+ crypto_hash_update(&desc, &sg, sg.length);
+ }
+ crypto_hash_final(&desc, digest);
+}
+
+static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ int digest_size;
+ void *digest;
+ int ok;
+
+ D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
+
+ if (unlikely(cancel)) {
+ drbd_free_ee(mdev, e);
+ return 1;
+ }
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+ digest = kmalloc(digest_size, GFP_NOIO);
+ if (digest) {
+ drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+
+ inc_rs_pending(mdev);
+ ok = drbd_send_drequest_csum(mdev,
+ e->sector,
+ e->size,
+ digest,
+ digest_size,
+ P_CSUM_RS_REQUEST);
+ kfree(digest);
+ } else {
+ dev_err(DEV, "kmalloc() of digest failed.\n");
+ ok = 0;
+ }
+ } else
+ ok = 1;
+
+ drbd_free_ee(mdev, e);
+
+ if (unlikely(!ok))
+ dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
+ return ok;
+}
+
+#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
+
+static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
+{
+ struct drbd_epoch_entry *e;
+
+ if (!get_ldev(mdev))
+ return 0;
+
+ /* GFP_TRY, because if there is no memory available right now, this may
+ * be rescheduled for later. It is "only" background resync, after all. */
+ e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
+ if (!e) {
+ put_ldev(mdev);
+ return 2;
+ }
+
+ spin_lock_irq(&mdev->req_lock);
+ list_add(&e->w.list, &mdev->read_ee);
+ spin_unlock_irq(&mdev->req_lock);
+
+ e->private_bio->bi_end_io = drbd_endio_read_sec;
+ e->private_bio->bi_rw = READ;
+ e->w.cb = w_e_send_csum;
+
+ mdev->read_cnt += size >> 9;
+ drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio);
+
+ return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+ unsigned long flags;
+ struct drbd_conf *mdev = (struct drbd_conf *) data;
+ int queue;
+
+ spin_lock_irqsave(&mdev->req_lock, flags);
+
+ if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
+ queue = 1;
+ if (mdev->state.conn == C_VERIFY_S)
+ mdev->resync_work.cb = w_make_ov_request;
+ else
+ mdev->resync_work.cb = w_make_resync_request;
+ } else {
+ queue = 0;
+ mdev->resync_work.cb = w_resync_inactive;
+ }
+
+ spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+ /* harmless race: list_empty outside data.work.q_lock */
+ if (list_empty(&mdev->resync_work.list) && queue)
+ drbd_queue_work(&mdev->data.work, &mdev->resync_work);
+}
+
+int w_make_resync_request(struct drbd_conf *mdev,
+ struct drbd_work *w, int cancel)
+{
+ unsigned long bit;
+ sector_t sector;
+ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+ int max_segment_size = queue_max_segment_size(mdev->rq_queue);
+ int number, i, size, pe, mx;
+ int align, queued, sndbuf;
+
+ if (unlikely(cancel))
+ return 1;
+
+ if (unlikely(mdev->state.conn < C_CONNECTED)) {
+ dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
+ return 0;
+ }
+
+ if (mdev->state.conn != C_SYNC_TARGET)
+ dev_err(DEV, "%s in w_make_resync_request\n",
+ drbd_conn_str(mdev->state.conn));
+
+ if (!get_ldev(mdev)) {
+ /* Since we only need to access mdev->rsync a
+ get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
+ to continue resync with a broken disk makes no sense at
+ all */
+ dev_err(DEV, "Disk broke down during resync!\n");
+ mdev->resync_work.cb = w_resync_inactive;
+ return 1;
+ }
+
+ number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+ pe = atomic_read(&mdev->rs_pending_cnt);
+
+ mutex_lock(&mdev->data.mutex);
+ if (mdev->data.socket)
+ mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
+ else
+ mx = 1;
+ mutex_unlock(&mdev->data.mutex);
+
+ /* For resync rates >160MB/sec, allow more pending RS requests */
+ if (number > mx)
+ mx = number;
+
+ /* Limit the number of pending RS requests to no more than the peer's receive buffer */
+ if ((pe + number) > mx) {
+ number = mx - pe;
+ }
+
+ for (i = 0; i < number; i++) {
+ /* Stop generating RS requests, when half of the send buffer is filled */
+ mutex_lock(&mdev->data.mutex);
+ if (mdev->data.socket) {
+ queued = mdev->data.socket->sk->sk_wmem_queued;
+ sndbuf = mdev->data.socket->sk->sk_sndbuf;
+ } else {
+ queued = 1;
+ sndbuf = 0;
+ }
+ mutex_unlock(&mdev->data.mutex);
+ if (queued > sndbuf / 2)
+ goto requeue;
+
+next_sector:
+ size = BM_BLOCK_SIZE;
+ bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
+
+ if (bit == -1UL) {
+ mdev->bm_resync_fo = drbd_bm_bits(mdev);
+ mdev->resync_work.cb = w_resync_inactive;
+ put_ldev(mdev);
+ return 1;
+ }
+
+ sector = BM_BIT_TO_SECT(bit);
+
+ if (drbd_try_rs_begin_io(mdev, sector)) {
+ mdev->bm_resync_fo = bit;
+ goto requeue;
+ }
+ mdev->bm_resync_fo = bit + 1;
+
+ if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
+ drbd_rs_complete_io(mdev, sector);
+ goto next_sector;
+ }
+
+#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+ /* try to find some adjacent bits.
+ * we stop if we have already the maximum req size.
+ *
+ * Additionally always align bigger requests, in order to
+ * be prepared for all stripe sizes of software RAIDs.
+ *
+ * we _do_ care about the agreed-upon q->max_segment_size
+ * here, as splitting up the requests on the other side is more
+ * difficult. the consequence is, that on lvm and md and other
+ * "indirect" devices, this is dead code, since
+ * q->max_segment_size will be PAGE_SIZE.
+ */
+ align = 1;
+ for (;;) {
+ if (size + BM_BLOCK_SIZE > max_segment_size)
+ break;
+
+ /* Be always aligned */
+ if (sector & ((1<<(align+3))-1))
+ break;
+
+ /* do not cross extent boundaries */
+ if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
+ break;
+ /* now, is it actually dirty, after all?
+ * caution, drbd_bm_test_bit is tri-state for some
+ * obscure reason; ( b == 0 ) would get the out-of-band
+ * only accidentally right because of the "oddly sized"
+ * adjustment below */
+ if (drbd_bm_test_bit(mdev, bit+1) != 1)
+ break;
+ bit++;
+ size += BM_BLOCK_SIZE;
+ if ((BM_BLOCK_SIZE << align) <= size)
+ align++;
+ i++;
+ }
+ /* if we merged some,
+ * reset the offset to start the next drbd_bm_find_next from */
+ if (size > BM_BLOCK_SIZE)
+ mdev->bm_resync_fo = bit + 1;
+#endif
+
+ /* adjust very last sectors, in case we are oddly sized */
+ if (sector + (size>>9) > capacity)
+ size = (capacity-sector)<<9;
+ if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
+ switch (read_for_csum(mdev, sector, size)) {
+ case 0: /* Disk failure*/
+ put_ldev(mdev);
+ return 0;
+ case 2: /* Allocation failed */
+ drbd_rs_complete_io(mdev, sector);
+ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+ goto requeue;
+ /* case 1: everything ok */
+ }
+ } else {
+ inc_rs_pending(mdev);
+ if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
+ sector, size, ID_SYNCER)) {
+ dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
+ dec_rs_pending(mdev);
+ put_ldev(mdev);
+ return 0;
+ }
+ }
+ }
+
+ if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
+ /* last syncer _request_ was sent,
+ * but the P_RS_DATA_REPLY not yet received. sync will end (and
+ * next sync group will resume), as soon as we receive the last
+ * resync data block, and the last bit is cleared.
+ * until then resync "work" is "inactive" ...
+ */
+ mdev->resync_work.cb = w_resync_inactive;
+ put_ldev(mdev);
+ return 1;
+ }
+
+ requeue:
+ mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+ put_ldev(mdev);
+ return 1;
+}
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ int number, i, size;
+ sector_t sector;
+ const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+
+ if (unlikely(cancel))
+ return 1;
+
+ if (unlikely(mdev->state.conn < C_CONNECTED)) {
+ dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
+ return 0;
+ }
+
+ number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
+ if (atomic_read(&mdev->rs_pending_cnt) > number)
+ goto requeue;
+
+ number -= atomic_read(&mdev->rs_pending_cnt);
+
+ sector = mdev->ov_position;
+ for (i = 0; i < number; i++) {
+ if (sector >= capacity) {
+ mdev->resync_work.cb = w_resync_inactive;
+ return 1;
+ }
+
+ size = BM_BLOCK_SIZE;
+
+ if (drbd_try_rs_begin_io(mdev, sector)) {
+ mdev->ov_position = sector;
+ goto requeue;
+ }
+
+ if (sector + (size>>9) > capacity)
+ size = (capacity-sector)<<9;
+
+ inc_rs_pending(mdev);
+ if (!drbd_send_ov_request(mdev, sector, size)) {
+ dec_rs_pending(mdev);
+ return 0;
+ }
+ sector += BM_SECT_PER_BIT;
+ }
+ mdev->ov_position = sector;
+
+ requeue:
+ mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+ return 1;
+}
+
+
+int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ kfree(w);
+ ov_oos_print(mdev);
+ drbd_resync_finished(mdev);
+
+ return 1;
+}
+
+static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ kfree(w);
+
+ drbd_resync_finished(mdev);
+
+ return 1;
+}
+
+int drbd_resync_finished(struct drbd_conf *mdev)
+{
+ unsigned long db, dt, dbdt;
+ unsigned long n_oos;
+ union drbd_state os, ns;
+ struct drbd_work *w;
+ char *khelper_cmd = NULL;
+
+ /* Remove all elements from the resync LRU. Since future actions
+ * might set bits in the (main) bitmap, then the entries in the
+ * resync LRU would be wrong. */
+ if (drbd_rs_del_all(mdev)) {
+ /* In case this is not possible now, most probably because
+ * there are P_RS_DATA_REPLY Packets lingering on the worker's
+ * queue (or even the read operations for those packets
+ * is not finished by now). Retry in 100ms. */
+
+ drbd_kick_lo(mdev);
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / 10);
+ w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
+ if (w) {
+ w->cb = w_resync_finished;
+ drbd_queue_work(&mdev->data.work, w);
+ return 1;
+ }
+ dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
+ }
+
+ dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+ if (dt <= 0)
+ dt = 1;
+ db = mdev->rs_total;
+ dbdt = Bit2KB(db/dt);
+ mdev->rs_paused /= HZ;
+
+ if (!get_ldev(mdev))
+ goto out;
+
+ spin_lock_irq(&mdev->req_lock);
+ os = mdev->state;
+
+ /* This protects us against multiple calls (that can happen in the presence
+ of application IO), and against connectivity loss just before we arrive here. */
+ if (os.conn <= C_CONNECTED)
+ goto out_unlock;
+
+ ns = os;
+ ns.conn = C_CONNECTED;
+
+ dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
+ (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
+ "Online verify " : "Resync",
+ dt + mdev->rs_paused, mdev->rs_paused, dbdt);
+
+ n_oos = drbd_bm_total_weight(mdev);
+
+ if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
+ if (n_oos) {
+ dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
+ n_oos, Bit2KB(1));
+ khelper_cmd = "out-of-sync";
+ }
+ } else {
+ D_ASSERT((n_oos - mdev->rs_failed) == 0);
+
+ if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
+ khelper_cmd = "after-resync-target";
+
+ if (mdev->csums_tfm && mdev->rs_total) {
+ const unsigned long s = mdev->rs_same_csum;
+ const unsigned long t = mdev->rs_total;
+ const int ratio =
+ (t == 0) ? 0 :
+ (t < 100000) ? ((s*100)/t) : (s/(t/100));
+ dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+ "transferred %luK total %luK\n",
+ ratio,
+ Bit2KB(mdev->rs_same_csum),
+ Bit2KB(mdev->rs_total - mdev->rs_same_csum),
+ Bit2KB(mdev->rs_total));
+ }
+ }
+
+ if (mdev->rs_failed) {
+ dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
+
+ if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+ ns.disk = D_INCONSISTENT;
+ ns.pdsk = D_UP_TO_DATE;
+ } else {
+ ns.disk = D_UP_TO_DATE;
+ ns.pdsk = D_INCONSISTENT;
+ }
+ } else {
+ ns.disk = D_UP_TO_DATE;
+ ns.pdsk = D_UP_TO_DATE;
+
+ if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+ if (mdev->p_uuid) {
+ int i;
+ for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
+ _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
+ drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
+ _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
+ } else {
+ dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
+ }
+ }
+
+ drbd_uuid_set_bm(mdev, 0UL);
+
+ if (mdev->p_uuid) {
+ /* Now the two UUID sets are equal, update what we
+ * know of the peer. */
+ int i;
+ for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+ mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+ }
+ }
+
+ _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+out_unlock:
+ spin_unlock_irq(&mdev->req_lock);
+ put_ldev(mdev);
+out:
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ mdev->rs_paused = 0;
+ mdev->ov_start_sector = 0;
+
+ if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
+ dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
+ drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
+ }
+
+ if (khelper_cmd)
+ drbd_khelper(mdev, khelper_cmd);
+
+ return 1;
+}
+
+/* helper */
+static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+ if (drbd_bio_has_active_page(e->private_bio)) {
+ /* This might happen if sendpage() has not finished */
+ spin_lock_irq(&mdev->req_lock);
+ list_add_tail(&e->w.list, &mdev->net_ee);
+ spin_unlock_irq(&mdev->req_lock);
+ } else
+ drbd_free_ee(mdev, e);
+}
+
+/**
+ * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
+ */
+int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ drbd_free_ee(mdev, e);
+ dec_unacked(mdev);
+ return 1;
+ }
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ ok = drbd_send_block(mdev, P_DATA_REPLY, e);
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
+ (unsigned long long)e->sector);
+
+ ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
+ }
+
+ dec_unacked(mdev);
+
+ move_to_net_ee_or_free(mdev, e);
+
+ if (unlikely(!ok))
+ dev_err(DEV, "drbd_send_block() failed\n");
+ return ok;
+}
+
+/**
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
+ */
+int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ drbd_free_ee(mdev, e);
+ dec_unacked(mdev);
+ return 1;
+ }
+
+ if (get_ldev_if_state(mdev, D_FAILED)) {
+ drbd_rs_complete_io(mdev, e->sector);
+ put_ldev(mdev);
+ }
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
+ inc_rs_pending(mdev);
+ ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Not sending RSDataReply, "
+ "partner DISKLESS!\n");
+ ok = 1;
+ }
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
+ (unsigned long long)e->sector);
+
+ ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+
+ /* update resync data with failure */
+ drbd_rs_failed_io(mdev, e->sector, e->size);
+ }
+
+ dec_unacked(mdev);
+
+ move_to_net_ee_or_free(mdev, e);
+
+ if (unlikely(!ok))
+ dev_err(DEV, "drbd_send_block() failed\n");
+ return ok;
+}
+
+int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ struct digest_info *di;
+ int digest_size;
+ void *digest = NULL;
+ int ok, eq = 0;
+
+ if (unlikely(cancel)) {
+ drbd_free_ee(mdev, e);
+ dec_unacked(mdev);
+ return 1;
+ }
+
+ drbd_rs_complete_io(mdev, e->sector);
+
+ di = (struct digest_info *)(unsigned long)e->block_id;
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ /* quick hack to try to avoid a race against reconfiguration.
+ * a real fix would be much more involved,
+ * introducing more locking mechanisms */
+ if (mdev->csums_tfm) {
+ digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+ D_ASSERT(digest_size == di->digest_size);
+ digest = kmalloc(digest_size, GFP_NOIO);
+ }
+ if (digest) {
+ drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+ eq = !memcmp(digest, di->digest, digest_size);
+ kfree(digest);
+ }
+
+ if (eq) {
+ drbd_set_in_sync(mdev, e->sector, e->size);
+ mdev->rs_same_csum++;
+ ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
+ } else {
+ inc_rs_pending(mdev);
+ e->block_id = ID_SYNCER;
+ ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+ }
+ } else {
+ ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
+ }
+
+ dec_unacked(mdev);
+
+ kfree(di);
+
+ move_to_net_ee_or_free(mdev, e);
+
+ if (unlikely(!ok))
+ dev_err(DEV, "drbd_send_block/ack() failed\n");
+ return ok;
+}
+
+int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ int digest_size;
+ void *digest;
+ int ok = 1;
+
+ if (unlikely(cancel))
+ goto out;
+
+ if (unlikely(!drbd_bio_uptodate(e->private_bio)))
+ goto out;
+
+ digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+ /* FIXME if this allocation fails, online verify will not terminate! */
+ digest = kmalloc(digest_size, GFP_NOIO);
+ if (digest) {
+ drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+ inc_rs_pending(mdev);
+ ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+ digest, digest_size, P_OV_REPLY);
+ if (!ok)
+ dec_rs_pending(mdev);
+ kfree(digest);
+ }
+
+out:
+ drbd_free_ee(mdev, e);
+
+ dec_unacked(mdev);
+
+ return ok;
+}
+
+void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
+{
+ if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
+ mdev->ov_last_oos_size += size>>9;
+ } else {
+ mdev->ov_last_oos_start = sector;
+ mdev->ov_last_oos_size = size>>9;
+ }
+ drbd_set_out_of_sync(mdev, sector, size);
+ set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
+}
+
+int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+ struct digest_info *di;
+ int digest_size;
+ void *digest;
+ int ok, eq = 0;
+
+ if (unlikely(cancel)) {
+ drbd_free_ee(mdev, e);
+ dec_unacked(mdev);
+ return 1;
+ }
+
+ /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
+ * the resync lru has been cleaned up already */
+ drbd_rs_complete_io(mdev, e->sector);
+
+ di = (struct digest_info *)(unsigned long)e->block_id;
+
+ if (likely(drbd_bio_uptodate(e->private_bio))) {
+ digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+ digest = kmalloc(digest_size, GFP_NOIO);
+ if (digest) {
+ drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+
+ D_ASSERT(digest_size == di->digest_size);
+ eq = !memcmp(digest, di->digest, digest_size);
+ kfree(digest);
+ }
+ } else {
+ ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+ if (__ratelimit(&drbd_ratelimit_state))
+ dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
+ }
+
+ dec_unacked(mdev);
+
+ kfree(di);
+
+ if (!eq)
+ drbd_ov_oos_found(mdev, e->sector, e->size);
+ else
+ ov_oos_print(mdev);
+
+ ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
+ eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+
+ drbd_free_ee(mdev, e);
+
+ if (--mdev->ov_left == 0) {
+ ov_oos_print(mdev);
+ drbd_resync_finished(mdev);
+ }
+
+ return ok;
+}
+
+int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
+ complete(&b->done);
+ return 1;
+}
+
+int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
+ struct p_barrier *p = &mdev->data.sbuf.barrier;
+ int ok = 1;
+
+ /* really avoid racing with tl_clear. w.cb may have been referenced
+ * just before it was reassigned and re-queued, so double check that.
+ * actually, this race was harmless, since we only try to send the
+ * barrier packet here, and otherwise do nothing with the object.
+ * but compare with the head of w_clear_epoch */
+ spin_lock_irq(&mdev->req_lock);
+ if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
+ cancel = 1;
+ spin_unlock_irq(&mdev->req_lock);
+ if (cancel)
+ return 1;
+
+ if (!drbd_get_data_sock(mdev))
+ return 0;
+ p->barrier = b->br_number;
+ /* inc_ap_pending was done where this was queued.
+ * dec_ap_pending will be done in got_BarrierAck
+ * or (on connection loss) in w_clear_epoch. */
+ ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
+ (struct p_header *)p, sizeof(*p), 0);
+ drbd_put_data_sock(mdev);
+
+ return ok;
+}
+
+int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ if (cancel)
+ return 1;
+ return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
+}
+
+/**
+ * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
+ */
+int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ req_mod(req, send_canceled);
+ return 1;
+ }
+
+ ok = drbd_send_dblock(mdev, req);
+ req_mod(req, ok ? handed_over_to_network : send_failed);
+
+ return ok;
+}
+
+/**
+ * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
+ */
+int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+ struct drbd_request *req = container_of(w, struct drbd_request, w);
+ int ok;
+
+ if (unlikely(cancel)) {
+ req_mod(req, send_canceled);
+ return 1;
+ }
+
+ ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size,
+ (unsigned long)req);
+
+ if (!ok) {
+ /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
+ * so this is probably redundant */
+ if (mdev->state.conn >= C_CONNECTED)
+ drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+ }
+ req_mod(req, ok ? handed_over_to_network : send_failed);
+
+ return ok;
+}
+
+static int _drbd_may_sync_now(struct drbd_conf *mdev)
+{
+ struct drbd_conf *odev = mdev;
+
+ while (1) {
+ if (odev->sync_conf.after == -1)
+ return 1;
+ odev = minor_to_mdev(odev->sync_conf.after);
+ ERR_IF(!odev) return 1;
+ if ((odev->state.conn >= C_SYNC_SOURCE &&
+ odev->state.conn <= C_PAUSED_SYNC_T) ||
+ odev->state.aftr_isp || odev->state.peer_isp ||
+ odev->state.user_isp)
+ return 0;
+ }
+}
+
+/**
+ * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * @mdev: DRBD device.
+ *
+ * Called from process context only (admin command and after_state_ch).
+ */
+static int _drbd_pause_after(struct drbd_conf *mdev)
+{
+ struct drbd_conf *odev;
+ int i, rv = 0;
+
+ for (i = 0; i < minor_count; i++) {
+ odev = minor_to_mdev(i);
+ if (!odev)
+ continue;
+ if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+ continue;
+ if (!_drbd_may_sync_now(odev))
+ rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
+ != SS_NOTHING_TO_DO);
+ }
+
+ return rv;
+}
+
+/**
+ * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * @mdev: DRBD device.
+ *
+ * Called from process context only (admin command and worker).
+ */
+static int _drbd_resume_next(struct drbd_conf *mdev)
+{
+ struct drbd_conf *odev;
+ int i, rv = 0;
+
+ for (i = 0; i < minor_count; i++) {
+ odev = minor_to_mdev(i);
+ if (!odev)
+ continue;
+ if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+ continue;
+ if (odev->state.aftr_isp) {
+ if (_drbd_may_sync_now(odev))
+ rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
+ CS_HARD, NULL)
+ != SS_NOTHING_TO_DO) ;
+ }
+ }
+ return rv;
+}
+
+void resume_next_sg(struct drbd_conf *mdev)
+{
+ write_lock_irq(&global_state_lock);
+ _drbd_resume_next(mdev);
+ write_unlock_irq(&global_state_lock);
+}
+
+void suspend_other_sg(struct drbd_conf *mdev)
+{
+ write_lock_irq(&global_state_lock);
+ _drbd_pause_after(mdev);
+ write_unlock_irq(&global_state_lock);
+}
+
+static int sync_after_error(struct drbd_conf *mdev, int o_minor)
+{
+ struct drbd_conf *odev;
+
+ if (o_minor == -1)
+ return NO_ERROR;
+ if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
+ return ERR_SYNC_AFTER;
+
+ /* check for loops */
+ odev = minor_to_mdev(o_minor);
+ while (1) {
+ if (odev == mdev)
+ return ERR_SYNC_AFTER_CYCLE;
+
+ /* dependency chain ends here, no cycles. */
+ if (odev->sync_conf.after == -1)
+ return NO_ERROR;
+
+ /* follow the dependency chain */
+ odev = minor_to_mdev(odev->sync_conf.after);
+ }
+}
+
+int drbd_alter_sa(struct drbd_conf *mdev, int na)
+{
+ int changes;
+ int retcode;
+
+ write_lock_irq(&global_state_lock);
+ retcode = sync_after_error(mdev, na);
+ if (retcode == NO_ERROR) {
+ mdev->sync_conf.after = na;
+ do {
+ changes = _drbd_pause_after(mdev);
+ changes |= _drbd_resume_next(mdev);
+ } while (changes);
+ }
+ write_unlock_irq(&global_state_lock);
+ return retcode;
+}
+
+/**
+ * drbd_start_resync() - Start the resync process
+ * @mdev: DRBD device.
+ * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
+ *
+ * This function might bring you directly into one of the
+ * C_PAUSED_SYNC_* states.
+ */
+void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
+{
+ union drbd_state ns;
+ int r;
+
+ if (mdev->state.conn >= C_SYNC_SOURCE) {
+ dev_err(DEV, "Resync already running!\n");
+ return;
+ }
+
+ /* In case a previous resync run was aborted by an IO error/detach on the peer. */
+ drbd_rs_cancel_all(mdev);
+
+ if (side == C_SYNC_TARGET) {
+ /* Since application IO was locked out during C_WF_BITMAP_T and
+ C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+ we check that we might make the data inconsistent. */
+ r = drbd_khelper(mdev, "before-resync-target");
+ r = (r >> 8) & 0xff;
+ if (r > 0) {
+ dev_info(DEV, "before-resync-target handler returned %d, "
+ "dropping connection.\n", r);
+ drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ return;
+ }
+ }
+
+ drbd_state_lock(mdev);
+
+ if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ drbd_state_unlock(mdev);
+ return;
+ }
+
+ if (side == C_SYNC_TARGET) {
+ mdev->bm_resync_fo = 0;
+ } else /* side == C_SYNC_SOURCE */ {
+ u64 uuid;
+
+ get_random_bytes(&uuid, sizeof(u64));
+ drbd_uuid_set(mdev, UI_BITMAP, uuid);
+ drbd_send_sync_uuid(mdev, uuid);
+
+ D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+ }
+
+ write_lock_irq(&global_state_lock);
+ ns = mdev->state;
+
+ ns.aftr_isp = !_drbd_may_sync_now(mdev);
+
+ ns.conn = side;
+
+ if (side == C_SYNC_TARGET)
+ ns.disk = D_INCONSISTENT;
+ else /* side == C_SYNC_SOURCE */
+ ns.pdsk = D_INCONSISTENT;
+
+ r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+ ns = mdev->state;
+
+ if (ns.conn < C_CONNECTED)
+ r = SS_UNKNOWN_ERROR;
+
+ if (r == SS_SUCCESS) {
+ mdev->rs_total =
+ mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+ mdev->rs_failed = 0;
+ mdev->rs_paused = 0;
+ mdev->rs_start =
+ mdev->rs_mark_time = jiffies;
+ mdev->rs_same_csum = 0;
+ _drbd_pause_after(mdev);
+ }
+ write_unlock_irq(&global_state_lock);
+ drbd_state_unlock(mdev);
+ put_ldev(mdev);
+
+ if (r == SS_SUCCESS) {
+ dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
+ drbd_conn_str(ns.conn),
+ (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
+ (unsigned long) mdev->rs_total);
+
+ if (mdev->rs_total == 0) {
+ /* Peer still reachable? Beware of failing before-resync-target handlers! */
+ request_ping(mdev);
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
+ drbd_resync_finished(mdev);
+ return;
+ }
+
+ /* ns.conn may already be != mdev->state.conn,
+ * we may have been paused in between, or become paused until
+ * the timer triggers.
+ * No matter, that is handled in resync_timer_fn() */
+ if (ns.conn == C_SYNC_TARGET)
+ mod_timer(&mdev->resync_timer, jiffies);
+
+ drbd_md_sync(mdev);
+ }
+}
+
+int drbd_worker(struct drbd_thread *thi)
+{
+ struct drbd_conf *mdev = thi->mdev;
+ struct drbd_work *w = NULL;
+ LIST_HEAD(work_list);
+ int intr = 0, i;
+
+ sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev));
+
+ while (get_t_state(thi) == Running) {
+ drbd_thread_current_set_cpu(mdev);
+
+ if (down_trylock(&mdev->data.work.s)) {
+ mutex_lock(&mdev->data.mutex);
+ if (mdev->data.socket && !mdev->net_conf->no_cork)
+ drbd_tcp_uncork(mdev->data.socket);
+ mutex_unlock(&mdev->data.mutex);
+
+ intr = down_interruptible(&mdev->data.work.s);
+
+ mutex_lock(&mdev->data.mutex);
+ if (mdev->data.socket && !mdev->net_conf->no_cork)
+ drbd_tcp_cork(mdev->data.socket);
+ mutex_unlock(&mdev->data.mutex);
+ }
+
+ if (intr) {
+ D_ASSERT(intr == -EINTR);
+ flush_signals(current);
+ ERR_IF (get_t_state(thi) == Running)
+ continue;
+ break;
+ }
+
+ if (get_t_state(thi) != Running)
+ break;
+ /* With this break, we have done a down() but not consumed
+ the entry from the list. The cleanup code takes care of
+ this... */
+
+ w = NULL;
+ spin_lock_irq(&mdev->data.work.q_lock);
+ ERR_IF(list_empty(&mdev->data.work.q)) {
+ /* something terribly wrong in our logic.
+ * we were able to down() the semaphore,
+ * but the list is empty... doh.
+ *
+ * what is the best thing to do now?
+ * try again from scratch, restarting the receiver,
+ * asender, whatnot? could break even more ugly,
+ * e.g. when we are primary, but no good local data.
+ *
+ * I'll try to get away just starting over this loop.
+ */
+ spin_unlock_irq(&mdev->data.work.q_lock);
+ continue;
+ }
+ w = list_entry(mdev->data.work.q.next, struct drbd_work, list);
+ list_del_init(&w->list);
+ spin_unlock_irq(&mdev->data.work.q_lock);
+
+ if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) {
+ /* dev_warn(DEV, "worker: a callback failed! \n"); */
+ if (mdev->state.conn >= C_CONNECTED)
+ drbd_force_state(mdev,
+ NS(conn, C_NETWORK_FAILURE));
+ }
+ }
+ D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
+ D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
+
+ spin_lock_irq(&mdev->data.work.q_lock);
+ i = 0;
+ while (!list_empty(&mdev->data.work.q)) {
+ list_splice_init(&mdev->data.work.q, &work_list);
+ spin_unlock_irq(&mdev->data.work.q_lock);
+
+ while (!list_empty(&work_list)) {
+ w = list_entry(work_list.next, struct drbd_work, list);
+ list_del_init(&w->list);
+ w->cb(mdev, w, 1);
+ i++; /* dead debugging code */
+ }
+
+ spin_lock_irq(&mdev->data.work.q_lock);
+ }
+ sema_init(&mdev->data.work.s, 0);
+ /* DANGEROUS race: if someone did queue his work within the spinlock,
+ * but up() ed outside the spinlock, we could get an up() on the
+ * semaphore without corresponding list entry.
+ * So don't do that.
+ */
+ spin_unlock_irq(&mdev->data.work.q_lock);
+
+ D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
+ /* _drbd_set_state only uses stop_nowait.
+ * wait here for the Exiting receiver. */
+ drbd_thread_stop(&mdev->receiver);
+ drbd_mdev_cleanup(mdev);
+
+ dev_info(DEV, "worker terminated\n");
+
+ clear_bit(DEVICE_DYING, &mdev->flags);
+ clear_bit(CONFIG_PENDING, &mdev->flags);
+ wake_up(&mdev->state_wait);
+
+ return 0;
+}
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
new file mode 100644
index 000000000000..f93fa111ce50
--- /dev/null
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -0,0 +1,91 @@
+#ifndef _DRBD_WRAPPERS_H
+#define _DRBD_WRAPPERS_H
+
+#include <linux/ctype.h>
+#include <linux/mm.h>
+
+/* see get_sb_bdev and bd_claim */
+extern char *drbd_sec_holder;
+
+/* sets the number of 512 byte sectors of our virtual device */
+static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
+ sector_t size)
+{
+ /* set_capacity(mdev->this_bdev->bd_disk, size); */
+ set_capacity(mdev->vdisk, size);
+ mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
+
+static inline int drbd_bio_has_active_page(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ if (page_count(bvec->bv_page) > 1)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* bi_end_io handlers */
+extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_endio_read_sec(struct bio *bio, int error);
+extern void drbd_endio_write_sec(struct bio *bio, int error);
+extern void drbd_endio_pri(struct bio *bio, int error);
+
+/*
+ * used to submit our private bio
+ */
+static inline void drbd_generic_make_request(struct drbd_conf *mdev,
+ int fault_type, struct bio *bio)
+{
+ __release(local);
+ if (!bio->bi_bdev) {
+ printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
+ "bio->bi_bdev == NULL\n",
+ mdev_to_minor(mdev));
+ dump_stack();
+ bio_endio(bio, -ENODEV);
+ return;
+ }
+
+ if (FAULT_ACTIVE(mdev, fault_type))
+ bio_endio(bio, -EIO);
+ else
+ generic_make_request(bio);
+}
+
+static inline void drbd_plug_device(struct drbd_conf *mdev)
+{
+ struct request_queue *q;
+ q = bdev_get_queue(mdev->this_bdev);
+
+ spin_lock_irq(q->queue_lock);
+
+/* XXX the check on !blk_queue_plugged is redundant,
+ * implicitly checked in blk_plug_device */
+
+ if (!blk_queue_plugged(q)) {
+ blk_plug_device(q);
+ del_timer(&q->unplug_timer);
+ /* unplugging should not happen automatically... */
+ }
+ spin_unlock_irq(q->queue_lock);
+}
+
+static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
+{
+ return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
+ == CRYPTO_ALG_TYPE_HASH;
+}
+
+#ifndef __CHECKER__
+# undef __cond_lock
+# define __cond_lock(x,c) (c)
+#endif
+
+#endif
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 3bb7c47c869f..1fb6c3135fc8 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -123,7 +123,15 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
- unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+ unsigned long timeout;
+
+ for (timeout = 20; timeout; timeout--) {
+ if (!notify[3])
+ return 0;
+ udelay(10);
+ }
+
+ timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
if (!notify[3])
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 614da5b8613a..e3749d0ba68b 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3557,67 +3557,65 @@ static ctl_table cdrom_table[] = {
.data = &cdrom_sysctl_settings.info,
.maxlen = CDROM_STR_SIZE,
.mode = 0444,
- .proc_handler = &cdrom_sysctl_info,
+ .proc_handler = cdrom_sysctl_info,
},
{
.procname = "autoclose",
.data = &cdrom_sysctl_settings.autoclose,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cdrom_sysctl_handler,
+ .proc_handler = cdrom_sysctl_handler,
},
{
.procname = "autoeject",
.data = &cdrom_sysctl_settings.autoeject,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cdrom_sysctl_handler,
+ .proc_handler = cdrom_sysctl_handler,
},
{
.procname = "debug",
.data = &cdrom_sysctl_settings.debug,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cdrom_sysctl_handler,
+ .proc_handler = cdrom_sysctl_handler,
},
{
.procname = "lock",
.data = &cdrom_sysctl_settings.lock,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cdrom_sysctl_handler,
+ .proc_handler = cdrom_sysctl_handler,
},
{
.procname = "check_media",
.data = &cdrom_sysctl_settings.check,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &cdrom_sysctl_handler
+ .proc_handler = cdrom_sysctl_handler
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table cdrom_cdrom_table[] = {
{
- .ctl_name = DEV_CDROM,
.procname = "cdrom",
.maxlen = 0,
.mode = 0555,
.child = cdrom_table,
},
- { .ctl_name = 0 }
+ { }
};
/* Make sure that /proc/sys/dev is there */
static ctl_table cdrom_root_table[] = {
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.child = cdrom_cdrom_table,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *cdrom_sysctl_header;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 70a770ac0138..e481c5938bad 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -675,36 +675,33 @@ static int hpet_is_known(struct hpet_data *hdp)
static ctl_table hpet_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "max-user-freq",
.data = &hpet_max_freq,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- {.ctl_name = 0}
+ {}
};
static ctl_table hpet_root[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hpet",
.maxlen = 0,
.mode = 0555,
.child = hpet_table,
},
- {.ctl_name = 0}
+ {}
};
static ctl_table dev_root[] = {
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.child = hpet_root,
},
- {.ctl_name = 0}
+ {}
};
static struct ctl_table_header *sysctl_header;
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index 2e66b5f773dd..0dec5da000ef 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -660,26 +660,23 @@ static struct ipmi_smi_watcher smi_watcher = {
#include <linux/sysctl.h>
static ctl_table ipmi_table[] = {
- { .ctl_name = DEV_IPMI_POWEROFF_POWERCYCLE,
- .procname = "poweroff_powercycle",
+ { .procname = "poweroff_powercycle",
.data = &poweroff_powercycle,
.maxlen = sizeof(poweroff_powercycle),
.mode = 0644,
- .proc_handler = &proc_dointvec },
+ .proc_handler = proc_dointvec },
{ }
};
static ctl_table ipmi_dir_table[] = {
- { .ctl_name = DEV_IPMI,
- .procname = "ipmi",
+ { .procname = "ipmi",
.mode = 0555,
.child = ipmi_table },
{ }
};
static ctl_table ipmi_root_table[] = {
- { .ctl_name = CTL_DEV,
- .procname = "dev",
+ { .procname = "dev",
.mode = 0555,
.child = ipmi_dir_table },
{ }
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 62f282e67638..d86c0bc05c1c 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -431,30 +431,25 @@ static struct cdev ptmx_cdev;
static struct ctl_table pty_table[] = {
{
- .ctl_name = PTY_MAX,
.procname = "max",
.maxlen = sizeof(int),
.mode = 0644,
.data = &pty_limit,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &pty_limit_min,
.extra2 = &pty_limit_max,
}, {
- .ctl_name = PTY_NR,
.procname = "nr",
.maxlen = sizeof(int),
.mode = 0444,
.data = &pty_count,
- .proc_handler = &proc_dointvec,
- }, {
- .ctl_name = 0
- }
+ .proc_handler = proc_dointvec,
+ },
+ {}
};
static struct ctl_table pty_kern_table[] = {
{
- .ctl_name = KERN_PTY,
.procname = "pty",
.mode = 0555,
.child = pty_table,
@@ -464,7 +459,6 @@ static struct ctl_table pty_kern_table[] = {
static struct ctl_table pty_root_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = pty_kern_table,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 04b505e5a5e2..dcd08635cf1b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1257,94 +1257,54 @@ static int proc_do_uuid(ctl_table *table, int write,
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
-static int uuid_strategy(ctl_table *table,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- unsigned char tmp_uuid[16], *uuid;
- unsigned int len;
-
- if (!oldval || !oldlenp)
- return 1;
-
- uuid = table->data;
- if (!uuid) {
- uuid = tmp_uuid;
- uuid[8] = 0;
- }
- if (uuid[8] == 0)
- generate_random_uuid(uuid);
-
- if (get_user(len, oldlenp))
- return -EFAULT;
- if (len) {
- if (len > 16)
- len = 16;
- if (copy_to_user(oldval, uuid, len) ||
- put_user(len, oldlenp))
- return -EFAULT;
- }
- return 1;
-}
-
static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
ctl_table random_table[] = {
{
- .ctl_name = RANDOM_POOLSIZE,
.procname = "poolsize",
.data = &sysctl_poolsize,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = RANDOM_ENTROPY_COUNT,
.procname = "entropy_avail",
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
.data = &input_pool.entropy_count,
},
{
- .ctl_name = RANDOM_READ_THRESH,
.procname = "read_wakeup_threshold",
.data = &random_read_wakeup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_read_thresh,
.extra2 = &max_read_thresh,
},
{
- .ctl_name = RANDOM_WRITE_THRESH,
.procname = "write_wakeup_threshold",
.data = &random_write_wakeup_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_write_thresh,
.extra2 = &max_write_thresh,
},
{
- .ctl_name = RANDOM_BOOT_ID,
.procname = "boot_id",
.data = &sysctl_bootid,
.maxlen = 16,
.mode = 0444,
- .proc_handler = &proc_do_uuid,
- .strategy = &uuid_strategy,
+ .proc_handler = proc_do_uuid,
},
{
- .ctl_name = RANDOM_UUID,
.procname = "uuid",
.maxlen = 16,
.mode = 0444,
- .proc_handler = &proc_do_uuid,
- .strategy = &uuid_strategy,
+ .proc_handler = proc_do_uuid,
},
- { .ctl_name = 0 }
+ { }
};
#endif /* CONFIG_SYSCTL */
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index bc4ab3e54550..95acb8c880f4 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -282,34 +282,31 @@ static irqreturn_t rtc_interrupt(int irq, void *dev_id)
*/
static ctl_table rtc_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "max-user-freq",
.data = &rtc_max_user_freq,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table rtc_root[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rtc",
.mode = 0555,
.child = rtc_table,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table dev_root[] = {
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.mode = 0555,
.child = rtc_root,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *sysctl_header;
diff --git a/drivers/dio/dio-driver.c b/drivers/dio/dio-driver.c
index 9c0c9afcd0ac..a7b174ef4c85 100644
--- a/drivers/dio/dio-driver.c
+++ b/drivers/dio/dio-driver.c
@@ -140,5 +140,4 @@ postcore_initcall(dio_driver_init);
EXPORT_SYMBOL(dio_match_device);
EXPORT_SYMBOL(dio_register_driver);
EXPORT_SYMBOL(dio_unregister_driver);
-EXPORT_SYMBOL(dio_dev_driver);
EXPORT_SYMBOL(dio_bus_type);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b401dadad4a8..eb140ff38c27 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -54,7 +54,7 @@ config DW_DMAC
config AT_HDMAC
tristate "Atmel AHB DMA support"
- depends on ARCH_AT91SAM9RL
+ depends on ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
select DMA_ENGINE
help
Support the Atmel AHB DMA controller. This can be integrated in
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 713ed7d37247..689cc6a6214d 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,7 +3,6 @@
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v)
{
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-static void amd_decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
+ struct mce *m = (struct mce *)data;
struct err_regs regs;
int node, ecc;
@@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
}
amd_decode_err_code(m->status & 0xffff);
+
+ return NOTIFY_STOP;
}
+static struct notifier_block amd_mce_dec_nb = {
+ .notifier_call = amd_decode_mce,
+};
+
static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
- (boot_cpu_data.x86 >= 0xf)) {
- /* safe the default decode mce callback */
- orig_mce_callback = x86_mce_decode_callback;
-
- x86_mce_decode_callback = amd_decode_mce;
- }
+ (boot_cpu_data.x86 >= 0xf))
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
return 0;
}
@@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
- x86_mce_decode_callback = orig_mce_callback;
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
}
MODULE_DESCRIPTION("AMD MCE decoder");
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index e4864e894e4f..7083bcc1b9c7 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -38,15 +38,14 @@
#include "core.h"
-int fw_compute_block_crc(u32 *block)
+int fw_compute_block_crc(__be32 *block)
{
- __be32 be32_block[256];
- int i, length;
+ int length;
+ u16 crc;
- length = (*block >> 16) & 0xff;
- for (i = 0; i < length; i++)
- be32_block[i] = cpu_to_be32(block[i + 1]);
- *block |= crc_itu_t(0, (u8 *) be32_block, length * 4);
+ length = (be32_to_cpu(block[0]) >> 16) & 0xff;
+ crc = crc_itu_t(0, (u8 *)&block[1], length * 4);
+ *block |= cpu_to_be32(crc);
return length;
}
@@ -57,6 +56,8 @@ static LIST_HEAD(card_list);
static LIST_HEAD(descriptor_list);
static int descriptor_count;
+static __be32 tmp_config_rom[256];
+
#define BIB_CRC(v) ((v) << 0)
#define BIB_CRC_LENGTH(v) ((v) << 16)
#define BIB_INFO_LENGTH(v) ((v) << 24)
@@ -72,11 +73,10 @@ static int descriptor_count;
#define BIB_CMC ((1) << 30)
#define BIB_IMC ((1) << 31)
-static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
+static size_t generate_config_rom(struct fw_card *card, __be32 *config_rom)
{
struct fw_descriptor *desc;
- static u32 config_rom[256];
- int i, j, length;
+ int i, j, k, length;
/*
* Initialize contents of config rom buffer. On the OHCI
@@ -87,40 +87,39 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
* the version stored in the OHCI registers.
*/
- memset(config_rom, 0, sizeof(config_rom));
- config_rom[0] = BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0);
- config_rom[1] = 0x31333934;
-
- config_rom[2] =
+ config_rom[0] = cpu_to_be32(
+ BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0));
+ config_rom[1] = cpu_to_be32(0x31333934);
+ config_rom[2] = cpu_to_be32(
BIB_LINK_SPEED(card->link_speed) |
BIB_GENERATION(card->config_rom_generation++ % 14 + 2) |
BIB_MAX_ROM(2) |
BIB_MAX_RECEIVE(card->max_receive) |
- BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC;
- config_rom[3] = card->guid >> 32;
- config_rom[4] = card->guid;
+ BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC);
+ config_rom[3] = cpu_to_be32(card->guid >> 32);
+ config_rom[4] = cpu_to_be32(card->guid);
/* Generate root directory. */
- i = 5;
- config_rom[i++] = 0;
- config_rom[i++] = 0x0c0083c0; /* node capabilities */
- j = i + descriptor_count;
+ config_rom[6] = cpu_to_be32(0x0c0083c0); /* node capabilities */
+ i = 7;
+ j = 7 + descriptor_count;
/* Generate root directory entries for descriptors. */
list_for_each_entry (desc, &descriptor_list, link) {
if (desc->immediate > 0)
- config_rom[i++] = desc->immediate;
- config_rom[i] = desc->key | (j - i);
+ config_rom[i++] = cpu_to_be32(desc->immediate);
+ config_rom[i] = cpu_to_be32(desc->key | (j - i));
i++;
j += desc->length;
}
/* Update root directory length. */
- config_rom[5] = (i - 5 - 1) << 16;
+ config_rom[5] = cpu_to_be32((i - 5 - 1) << 16);
/* End of root directory, now copy in descriptors. */
list_for_each_entry (desc, &descriptor_list, link) {
- memcpy(&config_rom[i], desc->data, desc->length * 4);
+ for (k = 0; k < desc->length; k++)
+ config_rom[i + k] = cpu_to_be32(desc->data[k]);
i += desc->length;
}
@@ -131,20 +130,17 @@ static u32 *generate_config_rom(struct fw_card *card, size_t *config_rom_length)
for (i = 0; i < j; i += length + 1)
length = fw_compute_block_crc(config_rom + i);
- *config_rom_length = j;
-
- return config_rom;
+ return j;
}
static void update_config_roms(void)
{
struct fw_card *card;
- u32 *config_rom;
size_t length;
list_for_each_entry (card, &card_list, link) {
- config_rom = generate_config_rom(card, &length);
- card->driver->set_config_rom(card, config_rom, length);
+ length = generate_config_rom(card, tmp_config_rom);
+ card->driver->set_config_rom(card, tmp_config_rom, length);
}
}
@@ -211,11 +207,8 @@ static const char gap_count_table[] = {
void fw_schedule_bm_work(struct fw_card *card, unsigned long delay)
{
- int scheduled;
-
fw_card_get(card);
- scheduled = schedule_delayed_work(&card->work, delay);
- if (!scheduled)
+ if (!schedule_delayed_work(&card->work, delay))
fw_card_put(card);
}
@@ -435,7 +428,6 @@ EXPORT_SYMBOL(fw_card_initialize);
int fw_card_add(struct fw_card *card,
u32 max_receive, u32 link_speed, u64 guid)
{
- u32 *config_rom;
size_t length;
int ret;
@@ -445,8 +437,8 @@ int fw_card_add(struct fw_card *card,
mutex_lock(&card_mutex);
- config_rom = generate_config_rom(card, &length);
- ret = card->driver->enable(card, config_rom, length);
+ length = generate_config_rom(card, tmp_config_rom);
+ ret = card->driver->enable(card, tmp_config_rom, length);
if (ret == 0)
list_add_tail(&card->link, &card_list);
@@ -465,7 +457,8 @@ EXPORT_SYMBOL(fw_card_add);
* shutdown still need to be provided by the card driver.
*/
-static int dummy_enable(struct fw_card *card, u32 *config_rom, size_t length)
+static int dummy_enable(struct fw_card *card,
+ const __be32 *config_rom, size_t length)
{
BUG();
return -1;
@@ -478,7 +471,7 @@ static int dummy_update_phy_reg(struct fw_card *card, int address,
}
static int dummy_set_config_rom(struct fw_card *card,
- u32 *config_rom, size_t length)
+ const __be32 *config_rom, size_t length)
{
/*
* We take the card out of card_list before setting the dummy
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 5089331544ed..231e6ee5ba43 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -130,9 +130,22 @@ struct iso_resource {
struct iso_resource_event *e_alloc, *e_dealloc;
};
-static void schedule_iso_resource(struct iso_resource *);
static void release_iso_resource(struct client *, struct client_resource *);
+static void schedule_iso_resource(struct iso_resource *r, unsigned long delay)
+{
+ client_get(r->client);
+ if (!schedule_delayed_work(&r->work, delay))
+ client_put(r->client);
+}
+
+static void schedule_if_iso_resource(struct client_resource *resource)
+{
+ if (resource->release == release_iso_resource)
+ schedule_iso_resource(container_of(resource,
+ struct iso_resource, resource), 0);
+}
+
/*
* dequeue_event() just kfree()'s the event, so the event has to be
* the first field in a struct XYZ_event.
@@ -166,7 +179,7 @@ struct iso_interrupt_event {
struct iso_resource_event {
struct event event;
- struct fw_cdev_event_iso_resource resource;
+ struct fw_cdev_event_iso_resource iso_resource;
};
static inline void __user *u64_to_uptr(__u64 value)
@@ -314,11 +327,8 @@ static void for_each_client(struct fw_device *device,
static int schedule_reallocations(int id, void *p, void *data)
{
- struct client_resource *r = p;
+ schedule_if_iso_resource(p);
- if (r->release == release_iso_resource)
- schedule_iso_resource(container_of(r,
- struct iso_resource, resource));
return 0;
}
@@ -414,9 +424,7 @@ static int add_client_resource(struct client *client,
&resource->handle);
if (ret >= 0) {
client_get(client);
- if (resource->release == release_iso_resource)
- schedule_iso_resource(container_of(resource,
- struct iso_resource, resource));
+ schedule_if_iso_resource(resource);
}
spin_unlock_irqrestore(&client->lock, flags);
@@ -428,26 +436,26 @@ static int add_client_resource(struct client *client,
static int release_client_resource(struct client *client, u32 handle,
client_resource_release_fn_t release,
- struct client_resource **resource)
+ struct client_resource **return_resource)
{
- struct client_resource *r;
+ struct client_resource *resource;
spin_lock_irq(&client->lock);
if (client->in_shutdown)
- r = NULL;
+ resource = NULL;
else
- r = idr_find(&client->resource_idr, handle);
- if (r && r->release == release)
+ resource = idr_find(&client->resource_idr, handle);
+ if (resource && resource->release == release)
idr_remove(&client->resource_idr, handle);
spin_unlock_irq(&client->lock);
- if (!(r && r->release == release))
+ if (!(resource && resource->release == release))
return -EINVAL;
- if (resource)
- *resource = r;
+ if (return_resource)
+ *return_resource = resource;
else
- r->release(client, r);
+ resource->release(client, resource);
client_put(client);
@@ -699,6 +707,7 @@ static int ioctl_send_response(struct client *client, void *buffer)
struct fw_cdev_send_response *request = buffer;
struct client_resource *resource;
struct inbound_transaction_resource *r;
+ int ret = 0;
if (release_client_resource(client, request->handle,
release_request, &resource) < 0)
@@ -708,13 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
resource);
if (request->length < r->length)
r->length = request->length;
- if (copy_from_user(r->data, u64_to_uptr(request->data), r->length))
- return -EFAULT;
+
+ if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
+ ret = -EFAULT;
+ goto out;
+ }
fw_send_response(client->device->card, r->request, request->rcode);
+ out:
kfree(r);
- return 0;
+ return ret;
}
static int ioctl_initiate_bus_reset(struct client *client, void *buffer)
@@ -1028,8 +1041,7 @@ static void iso_resource_work(struct work_struct *work)
/* Allow 1000ms grace period for other reallocations. */
if (todo == ISO_RES_ALLOC &&
time_is_after_jiffies(client->device->card->reset_jiffies + HZ)) {
- if (schedule_delayed_work(&r->work, DIV_ROUND_UP(HZ, 3)))
- client_get(client);
+ schedule_iso_resource(r, DIV_ROUND_UP(HZ, 3));
skip = true;
} else {
/* We could be called twice within the same generation. */
@@ -1097,12 +1109,12 @@ static void iso_resource_work(struct work_struct *work)
e = r->e_dealloc;
r->e_dealloc = NULL;
}
- e->resource.handle = r->resource.handle;
- e->resource.channel = channel;
- e->resource.bandwidth = bandwidth;
+ e->iso_resource.handle = r->resource.handle;
+ e->iso_resource.channel = channel;
+ e->iso_resource.bandwidth = bandwidth;
queue_event(client, &e->event,
- &e->resource, sizeof(e->resource), NULL, 0);
+ &e->iso_resource, sizeof(e->iso_resource), NULL, 0);
if (free) {
cancel_delayed_work(&r->work);
@@ -1114,13 +1126,6 @@ static void iso_resource_work(struct work_struct *work)
client_put(client);
}
-static void schedule_iso_resource(struct iso_resource *r)
-{
- client_get(r->client);
- if (!schedule_delayed_work(&r->work, 0))
- client_put(r->client);
-}
-
static void release_iso_resource(struct client *client,
struct client_resource *resource)
{
@@ -1129,7 +1134,7 @@ static void release_iso_resource(struct client *client,
spin_lock_irq(&client->lock);
r->todo = ISO_RES_DEALLOC;
- schedule_iso_resource(r);
+ schedule_iso_resource(r, 0);
spin_unlock_irq(&client->lock);
}
@@ -1162,10 +1167,10 @@ static int init_iso_resource(struct client *client,
r->e_alloc = e1;
r->e_dealloc = e2;
- e1->resource.closure = request->closure;
- e1->resource.type = FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED;
- e2->resource.closure = request->closure;
- e2->resource.type = FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED;
+ e1->iso_resource.closure = request->closure;
+ e1->iso_resource.type = FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED;
+ e2->iso_resource.closure = request->closure;
+ e2->iso_resource.type = FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED;
if (todo == ISO_RES_ALLOC) {
r->resource.release = release_iso_resource;
@@ -1175,7 +1180,7 @@ static int init_iso_resource(struct client *client,
} else {
r->resource.release = NULL;
r->resource.handle = -1;
- schedule_iso_resource(r);
+ schedule_iso_resource(r, 0);
}
request->handle = r->resource.handle;
@@ -1295,7 +1300,23 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
static int dispatch_ioctl(struct client *client,
unsigned int cmd, void __user *arg)
{
- char buffer[256];
+ char buffer[sizeof(union {
+ struct fw_cdev_get_info _00;
+ struct fw_cdev_send_request _01;
+ struct fw_cdev_allocate _02;
+ struct fw_cdev_deallocate _03;
+ struct fw_cdev_send_response _04;
+ struct fw_cdev_initiate_bus_reset _05;
+ struct fw_cdev_add_descriptor _06;
+ struct fw_cdev_remove_descriptor _07;
+ struct fw_cdev_create_iso_context _08;
+ struct fw_cdev_queue_iso _09;
+ struct fw_cdev_start_iso _0a;
+ struct fw_cdev_stop_iso _0b;
+ struct fw_cdev_get_cycle_timer _0c;
+ struct fw_cdev_allocate_iso_resource _0d;
+ struct fw_cdev_send_stream_packet _13;
+ })];
int ret;
if (_IOC_TYPE(cmd) != '#' ||
@@ -1390,10 +1411,10 @@ static int fw_device_op_mmap(struct file *file, struct vm_area_struct *vma)
static int shutdown_resource(int id, void *p, void *data)
{
- struct client_resource *r = p;
+ struct client_resource *resource = p;
struct client *client = data;
- r->release(client, r);
+ resource->release(client, resource);
client_put(client);
return 0;
@@ -1402,7 +1423,7 @@ static int shutdown_resource(int id, void *p, void *data)
static int fw_device_op_release(struct inode *inode, struct file *file)
{
struct client *client = file->private_data;
- struct event *e, *next_e;
+ struct event *event, *next_event;
mutex_lock(&client->device->client_list_mutex);
list_del(&client->link);
@@ -1423,8 +1444,8 @@ static int fw_device_op_release(struct inode *inode, struct file *file)
idr_remove_all(&client->resource_idr);
idr_destroy(&client->resource_idr);
- list_for_each_entry_safe(e, next_e, &client->event_list, link)
- kfree(e);
+ list_for_each_entry_safe(event, next_event, &client->event_list, link)
+ kfree(event);
client_put(client);
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index fddf2b358936..9a5f38c80b0e 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -28,9 +28,9 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/string.h>
#include <asm/atomic.h>
+#include <asm/byteorder.h>
#include <asm/system.h>
#include "core.h"
@@ -510,13 +510,16 @@ static void update_tree(struct fw_card *card, struct fw_node *root)
static void update_topology_map(struct fw_card *card,
u32 *self_ids, int self_id_count)
{
- int node_count;
+ int node_count = (card->root_node->node_id & 0x3f) + 1;
+ __be32 *map = card->topology_map;
+
+ *map++ = cpu_to_be32((self_id_count + 2) << 16);
+ *map++ = cpu_to_be32(be32_to_cpu(card->topology_map[1]) + 1);
+ *map++ = cpu_to_be32((node_count << 16) | self_id_count);
+
+ while (self_id_count--)
+ *map++ = cpu_to_be32p(self_ids++);
- card->topology_map[1]++;
- node_count = (card->root_node->node_id & 0x3f) + 1;
- card->topology_map[2] = (node_count << 16) | self_id_count;
- card->topology_map[0] = (self_id_count + 2) << 16;
- memcpy(&card->topology_map[3], self_ids, self_id_count * 4);
fw_compute_block_crc(card->topology_map);
}
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index da628c72a462..842739df23e2 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -218,12 +218,15 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
packet->header_length = 16;
packet->payload_length = 0;
break;
+
+ default:
+ WARN(1, KERN_ERR "wrong tcode %d", tcode);
}
common:
packet->speed = speed;
packet->generation = generation;
packet->ack = 0;
- packet->payload_bus = 0;
+ packet->payload_mapped = false;
}
/**
@@ -595,11 +598,10 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header,
break;
default:
- BUG();
- return;
+ WARN(1, KERN_ERR "wrong tcode %d", tcode);
}
- response->payload_bus = 0;
+ response->payload_mapped = false;
}
EXPORT_SYMBOL(fw_fill_response);
@@ -810,8 +812,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request
int speed, unsigned long long offset,
void *payload, size_t length, void *callback_data)
{
- int i, start, end;
- __be32 *map;
+ int start;
if (!TCODE_IS_READ_REQUEST(tcode)) {
fw_send_response(card, request, RCODE_TYPE_ERROR);
@@ -824,11 +825,7 @@ static void handle_topology_map(struct fw_card *card, struct fw_request *request
}
start = (offset - topology_map_region.start) / 4;
- end = start + length / 4;
- map = payload;
-
- for (i = 0; i < length / 4; i++)
- map[i] = cpu_to_be32(card->topology_map[start + i]);
+ memcpy(payload, &card->topology_map[start], length);
fw_send_response(card, request, RCODE_COMPLETE);
}
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 7ff6e7585152..ed3b1a765c00 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -40,7 +40,8 @@ struct fw_card_driver {
* enable the PHY or set the link_on bit and initiate a bus
* reset.
*/
- int (*enable)(struct fw_card *card, u32 *config_rom, size_t length);
+ int (*enable)(struct fw_card *card,
+ const __be32 *config_rom, size_t length);
int (*update_phy_reg)(struct fw_card *card, int address,
int clear_bits, int set_bits);
@@ -48,10 +49,10 @@ struct fw_card_driver {
/*
* Update the config rom for an enabled card. This function
* should change the config rom that is presented on the bus
- * an initiate a bus reset.
+ * and initiate a bus reset.
*/
int (*set_config_rom)(struct fw_card *card,
- u32 *config_rom, size_t length);
+ const __be32 *config_rom, size_t length);
void (*send_request)(struct fw_card *card, struct fw_packet *packet);
void (*send_response)(struct fw_card *card, struct fw_packet *packet);
@@ -93,7 +94,7 @@ int fw_card_add(struct fw_card *card,
u32 max_receive, u32 link_speed, u64 guid);
void fw_core_remove_card(struct fw_card *card);
int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
-int fw_compute_block_crc(u32 *block);
+int fw_compute_block_crc(__be32 *block);
void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
static inline struct fw_card *fw_card_get(struct fw_card *card)
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 94260aa76aa3..ae4556f0c0c1 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -205,7 +205,7 @@ struct fw_ohci {
dma_addr_t config_rom_bus;
__be32 *next_config_rom;
dma_addr_t next_config_rom_bus;
- u32 next_header;
+ __be32 next_header;
struct ar_context ar_request_ctx;
struct ar_context ar_response_ctx;
@@ -997,7 +997,8 @@ static int at_context_queue_packet(struct context *ctx,
packet->ack = RCODE_SEND_ERROR;
return -1;
}
- packet->payload_bus = payload_bus;
+ packet->payload_bus = payload_bus;
+ packet->payload_mapped = true;
d[2].req_count = cpu_to_le16(packet->payload_length);
d[2].data_address = cpu_to_le32(payload_bus);
@@ -1025,7 +1026,7 @@ static int at_context_queue_packet(struct context *ctx,
*/
if (ohci->generation != packet->generation ||
reg_read(ohci, OHCI1394_IntEventSet) & OHCI1394_busReset) {
- if (packet->payload_length > 0)
+ if (packet->payload_mapped)
dma_unmap_single(ohci->card.device, payload_bus,
packet->payload_length, DMA_TO_DEVICE);
packet->ack = RCODE_GENERATION;
@@ -1061,7 +1062,7 @@ static int handle_at_packet(struct context *context,
/* This packet was cancelled, just continue. */
return 1;
- if (packet->payload_bus)
+ if (packet->payload_mapped)
dma_unmap_single(ohci->card.device, packet->payload_bus,
packet->payload_length, DMA_TO_DEVICE);
@@ -1357,8 +1358,9 @@ static void bus_reset_tasklet(unsigned long data)
*/
reg_write(ohci, OHCI1394_BusOptions,
be32_to_cpu(ohci->config_rom[2]));
- ohci->config_rom[0] = cpu_to_be32(ohci->next_header);
- reg_write(ohci, OHCI1394_ConfigROMhdr, ohci->next_header);
+ ohci->config_rom[0] = ohci->next_header;
+ reg_write(ohci, OHCI1394_ConfigROMhdr,
+ be32_to_cpu(ohci->next_header));
}
#ifdef CONFIG_FIREWIRE_OHCI_REMOTE_DMA
@@ -1477,7 +1479,17 @@ static int software_reset(struct fw_ohci *ohci)
return -EBUSY;
}
-static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
+static void copy_config_rom(__be32 *dest, const __be32 *src, size_t length)
+{
+ size_t size = length * 4;
+
+ memcpy(dest, src, size);
+ if (size < CONFIG_ROM_SIZE)
+ memset(&dest[length], 0, CONFIG_ROM_SIZE - size);
+}
+
+static int ohci_enable(struct fw_card *card,
+ const __be32 *config_rom, size_t length)
{
struct fw_ohci *ohci = fw_ohci(card);
struct pci_dev *dev = to_pci_dev(card->device);
@@ -1579,8 +1591,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
if (ohci->next_config_rom == NULL)
return -ENOMEM;
- memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE);
- fw_memcpy_to_be32(ohci->next_config_rom, config_rom, length * 4);
+ copy_config_rom(ohci->next_config_rom, config_rom, length);
} else {
/*
* In the suspend case, config_rom is NULL, which
@@ -1590,7 +1601,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
ohci->next_config_rom_bus = ohci->config_rom_bus;
}
- ohci->next_header = be32_to_cpu(ohci->next_config_rom[0]);
+ ohci->next_header = ohci->next_config_rom[0];
ohci->next_config_rom[0] = 0;
reg_write(ohci, OHCI1394_ConfigROMhdr, 0);
reg_write(ohci, OHCI1394_BusOptions,
@@ -1624,7 +1635,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
}
static int ohci_set_config_rom(struct fw_card *card,
- u32 *config_rom, size_t length)
+ const __be32 *config_rom, size_t length)
{
struct fw_ohci *ohci;
unsigned long flags;
@@ -1673,9 +1684,7 @@ static int ohci_set_config_rom(struct fw_card *card,
ohci->next_config_rom = next_config_rom;
ohci->next_config_rom_bus = next_config_rom_bus;
- memset(ohci->next_config_rom, 0, CONFIG_ROM_SIZE);
- fw_memcpy_to_be32(ohci->next_config_rom, config_rom,
- length * 4);
+ copy_config_rom(ohci->next_config_rom, config_rom, length);
ohci->next_header = config_rom[0];
ohci->next_config_rom[0] = 0;
@@ -1729,7 +1738,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
if (packet->ack != 0)
goto out;
- if (packet->payload_bus)
+ if (packet->payload_mapped)
dma_unmap_single(ohci->card.device, packet->payload_bus,
packet->payload_length, DMA_TO_DEVICE);
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 98dbbda3ad41..d485cdd8cbac 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -820,20 +820,25 @@ static void sbp2_release_target(struct kref *kref)
fw_device_put(device);
}
-static struct workqueue_struct *sbp2_wq;
+static void sbp2_target_get(struct sbp2_target *tgt)
+{
+ kref_get(&tgt->kref);
+}
static void sbp2_target_put(struct sbp2_target *tgt)
{
kref_put(&tgt->kref, sbp2_release_target);
}
+static struct workqueue_struct *sbp2_wq;
+
/*
* Always get the target's kref when scheduling work on one its units.
* Each workqueue job is responsible to call sbp2_target_put() upon return.
*/
static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay)
{
- kref_get(&lu->tgt->kref);
+ sbp2_target_get(lu->tgt);
if (!queue_delayed_work(sbp2_wq, &lu->work, delay))
sbp2_target_put(lu->tgt);
}
diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index d7ece131b4f4..8d8a00e5a30e 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -5,6 +5,7 @@
menuconfig I2C
tristate "I2C support"
depends on HAS_IOMEM
+ select RT_MUTEXES
---help---
I2C (pronounce: I-square-C) is a slow serial bus protocol used in
many micro controller applications and developed by Philips. SMBus,
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e8fe7f169e25..5f318ce29770 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -640,22 +640,6 @@ config I2C_TINY_USB
This driver can also be built as a module. If so, the module
will be called i2c-tiny-usb.
-comment "Graphics adapter I2C/DDC channel drivers"
- depends on PCI
-
-config I2C_VOODOO3
- tristate "Voodoo 3 (DEPRECATED)"
- depends on PCI
- select I2C_ALGOBIT
- help
- If you say yes to this option, support will be included for the
- Voodoo 3 I2C interface. This driver is deprecated and you should
- use the tdfxfb driver instead, which additionally provides
- framebuffer support.
-
- This driver can also be built as a module. If so, the module
- will be called i2c-voodoo3.
-
comment "Other I2C/SMBus bus drivers"
config I2C_ACORN
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index ff937ac69f5b..302c551977bb 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -61,9 +61,6 @@ obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o
obj-$(CONFIG_I2C_TAOS_EVM) += i2c-taos-evm.o
obj-$(CONFIG_I2C_TINY_USB) += i2c-tiny-usb.o
-# Graphics adapter I2C/DDC channel drivers
-obj-$(CONFIG_I2C_VOODOO3) += i2c-voodoo3.o
-
# Other I2C/SMBus bus drivers
obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o
obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c
index d108450df064..8de7d7b87bb0 100644
--- a/drivers/i2c/busses/i2c-ali1535.c
+++ b/drivers/i2c/busses/i2c-ali1535.c
@@ -138,7 +138,7 @@ static unsigned short ali1535_smba;
Note the differences between kernels with the old PCI BIOS interface and
newer kernels with the real PCI interface. In compat.h some things are
defined to make the transition easier. */
-static int ali1535_setup(struct pci_dev *dev)
+static int __devinit ali1535_setup(struct pci_dev *dev)
{
int retval = -ENODEV;
unsigned char temp;
diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
index d627fceb790b..e7e3205f1286 100644
--- a/drivers/i2c/busses/i2c-ali15x3.c
+++ b/drivers/i2c/busses/i2c-ali15x3.c
@@ -131,7 +131,7 @@ MODULE_PARM_DESC(force_addr,
static struct pci_driver ali15x3_driver;
static unsigned short ali15x3_smba;
-static int ali15x3_setup(struct pci_dev *ALI15X3_dev)
+static int __devinit ali15x3_setup(struct pci_dev *ALI15X3_dev)
{
u16 a;
unsigned char temp;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 55edcfe5b851..df6ab553f975 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -767,6 +767,9 @@ static int __devinit i801_probe(struct pci_dev *dev, const struct pci_device_id
/* set up the sysfs linkage to our parent device */
i801_adapter.dev.parent = &dev->dev;
+ /* Retry up to 3 times on lost arbitration */
+ i801_adapter.retries = 3;
+
snprintf(i801_adapter.name, sizeof(i801_adapter.name),
"SMBus I801 adapter at %04lx", i801_smba);
err = i2c_add_adapter(&i801_adapter);
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index a75c75e77b92..5901707fc66a 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -56,12 +56,6 @@ iic_cook_addr(struct i2c_msg *msg)
if (msg->flags & I2C_M_RD)
addr |= 1;
- /*
- * Read or Write?
- */
- if (msg->flags & I2C_M_REV_DIR_ADDR)
- addr ^= 1;
-
return addr;
}
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index bbab0e166630..ed387ffa4730 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -338,9 +338,6 @@ mv64xxx_i2c_prepare_for_io(struct mv64xxx_i2c_data *drv_data,
if (msg->flags & I2C_M_RD)
dir = 1;
- if (msg->flags & I2C_M_REV_DIR_ADDR)
- dir ^= 1;
-
if (msg->flags & I2C_M_TEN) {
drv_data->addr1 = 0xf0 | (((u32)msg->addr & 0x300) >> 7) | dir;
drv_data->addr2 = (u32)msg->addr & 0xff;
diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c
index 3c9d71f60187..1c440a70ec61 100644
--- a/drivers/i2c/busses/i2c-powermac.c
+++ b/drivers/i2c/busses/i2c-powermac.c
@@ -49,48 +49,38 @@ static s32 i2c_powermac_smbus_xfer( struct i2c_adapter* adap,
int rc = 0;
int read = (read_write == I2C_SMBUS_READ);
int addrdir = (addr << 1) | read;
+ int mode, subsize, len;
+ u32 subaddr;
+ u8 *buf;
u8 local[2];
- rc = pmac_i2c_open(bus, 0);
- if (rc)
- return rc;
+ if (size == I2C_SMBUS_QUICK || size == I2C_SMBUS_BYTE) {
+ mode = pmac_i2c_mode_std;
+ subsize = 0;
+ subaddr = 0;
+ } else {
+ mode = read ? pmac_i2c_mode_combined : pmac_i2c_mode_stdsub;
+ subsize = 1;
+ subaddr = command;
+ }
switch (size) {
case I2C_SMBUS_QUICK:
- rc = pmac_i2c_setmode(bus, pmac_i2c_mode_std);
- if (rc)
- goto bail;
- rc = pmac_i2c_xfer(bus, addrdir, 0, 0, NULL, 0);
+ buf = NULL;
+ len = 0;
break;
case I2C_SMBUS_BYTE:
- rc = pmac_i2c_setmode(bus, pmac_i2c_mode_std);
- if (rc)
- goto bail;
- rc = pmac_i2c_xfer(bus, addrdir, 0, 0, &data->byte, 1);
- break;
case I2C_SMBUS_BYTE_DATA:
- rc = pmac_i2c_setmode(bus, read ?
- pmac_i2c_mode_combined :
- pmac_i2c_mode_stdsub);
- if (rc)
- goto bail;
- rc = pmac_i2c_xfer(bus, addrdir, 1, command, &data->byte, 1);
+ buf = &data->byte;
+ len = 1;
break;
case I2C_SMBUS_WORD_DATA:
- rc = pmac_i2c_setmode(bus, read ?
- pmac_i2c_mode_combined :
- pmac_i2c_mode_stdsub);
- if (rc)
- goto bail;
if (!read) {
local[0] = data->word & 0xff;
local[1] = (data->word >> 8) & 0xff;
}
- rc = pmac_i2c_xfer(bus, addrdir, 1, command, local, 2);
- if (rc == 0 && read) {
- data->word = ((u16)local[1]) << 8;
- data->word |= local[0];
- }
+ buf = local;
+ len = 2;
break;
/* Note that these are broken vs. the expected smbus API where
@@ -105,28 +95,44 @@ static s32 i2c_powermac_smbus_xfer( struct i2c_adapter* adap,
* a repeat start/addr phase (but not stop in between)
*/
case I2C_SMBUS_BLOCK_DATA:
- rc = pmac_i2c_setmode(bus, read ?
- pmac_i2c_mode_combined :
- pmac_i2c_mode_stdsub);
- if (rc)
- goto bail;
- rc = pmac_i2c_xfer(bus, addrdir, 1, command, data->block,
- data->block[0] + 1);
-
+ buf = data->block;
+ len = data->block[0] + 1;
break;
case I2C_SMBUS_I2C_BLOCK_DATA:
- rc = pmac_i2c_setmode(bus, read ?
- pmac_i2c_mode_combined :
- pmac_i2c_mode_stdsub);
- if (rc)
- goto bail;
- rc = pmac_i2c_xfer(bus, addrdir, 1, command,
- &data->block[1], data->block[0]);
+ buf = &data->block[1];
+ len = data->block[0];
break;
default:
- rc = -EINVAL;
+ return -EINVAL;
+ }
+
+ rc = pmac_i2c_open(bus, 0);
+ if (rc) {
+ dev_err(&adap->dev, "Failed to open I2C, err %d\n", rc);
+ return rc;
+ }
+
+ rc = pmac_i2c_setmode(bus, mode);
+ if (rc) {
+ dev_err(&adap->dev, "Failed to set I2C mode %d, err %d\n",
+ mode, rc);
+ goto bail;
}
+
+ rc = pmac_i2c_xfer(bus, addrdir, subsize, subaddr, buf, len);
+ if (rc) {
+ dev_err(&adap->dev,
+ "I2C transfer at 0x%02x failed, size %d, err %d\n",
+ addrdir >> 1, size, rc);
+ goto bail;
+ }
+
+ if (size == I2C_SMBUS_WORD_DATA && read) {
+ data->word = ((u16)local[1]) << 8;
+ data->word |= local[0];
+ }
+
bail:
pmac_i2c_close(bus);
return rc;
@@ -146,20 +152,33 @@ static int i2c_powermac_master_xfer( struct i2c_adapter *adap,
int read;
int addrdir;
+ if (num != 1) {
+ dev_err(&adap->dev,
+ "Multi-message I2C transactions not supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (msgs->flags & I2C_M_TEN)
return -EINVAL;
read = (msgs->flags & I2C_M_RD) != 0;
addrdir = (msgs->addr << 1) | read;
- if (msgs->flags & I2C_M_REV_DIR_ADDR)
- addrdir ^= 1;
rc = pmac_i2c_open(bus, 0);
- if (rc)
+ if (rc) {
+ dev_err(&adap->dev, "Failed to open I2C, err %d\n", rc);
return rc;
+ }
rc = pmac_i2c_setmode(bus, pmac_i2c_mode_std);
- if (rc)
+ if (rc) {
+ dev_err(&adap->dev, "Failed to set I2C mode %d, err %d\n",
+ pmac_i2c_mode_std, rc);
goto bail;
+ }
rc = pmac_i2c_xfer(bus, addrdir, 0, 0, msgs->buf, msgs->len);
+ if (rc < 0)
+ dev_err(&adap->dev, "I2C %s 0x%02x failed, err %d\n",
+ addrdir & 1 ? "read from" : "write to", addrdir >> 1,
+ rc);
bail:
pmac_i2c_close(bus);
return rc < 0 ? rc : 1;
@@ -183,19 +202,16 @@ static const struct i2c_algorithm i2c_powermac_algorithm = {
static int __devexit i2c_powermac_remove(struct platform_device *dev)
{
struct i2c_adapter *adapter = platform_get_drvdata(dev);
- struct pmac_i2c_bus *bus = i2c_get_adapdata(adapter);
int rc;
rc = i2c_del_adapter(adapter);
- pmac_i2c_detach_adapter(bus, adapter);
- i2c_set_adapdata(adapter, NULL);
/* We aren't that prepared to deal with this... */
if (rc)
printk(KERN_WARNING
"i2c-powermac.c: Failed to remove bus %s !\n",
adapter->name);
platform_set_drvdata(dev, NULL);
- kfree(adapter);
+ memset(adapter, 0, sizeof(*adapter));
return 0;
}
@@ -206,12 +222,12 @@ static int __devinit i2c_powermac_probe(struct platform_device *dev)
struct pmac_i2c_bus *bus = dev->dev.platform_data;
struct device_node *parent = NULL;
struct i2c_adapter *adapter;
- char name[32];
const char *basename;
int rc;
if (bus == NULL)
return -EINVAL;
+ adapter = pmac_i2c_get_adapter(bus);
/* Ok, now we need to make up a name for the interface that will
* match what we used to do in the past, that is basically the
@@ -237,29 +253,22 @@ static int __devinit i2c_powermac_probe(struct platform_device *dev)
default:
return -EINVAL;
}
- snprintf(name, 32, "%s %d", basename, pmac_i2c_get_channel(bus));
+ snprintf(adapter->name, sizeof(adapter->name), "%s %d", basename,
+ pmac_i2c_get_channel(bus));
of_node_put(parent);
- adapter = kzalloc(sizeof(struct i2c_adapter), GFP_KERNEL);
- if (adapter == NULL) {
- printk(KERN_ERR "i2c-powermac: can't allocate inteface !\n");
- return -ENOMEM;
- }
platform_set_drvdata(dev, adapter);
- strcpy(adapter->name, name);
adapter->algo = &i2c_powermac_algorithm;
i2c_set_adapdata(adapter, bus);
adapter->dev.parent = &dev->dev;
- pmac_i2c_attach_adapter(bus, adapter);
rc = i2c_add_adapter(adapter);
if (rc) {
printk(KERN_ERR "i2c-powermac: Adapter %s registration "
- "failed\n", name);
- i2c_set_adapdata(adapter, NULL);
- pmac_i2c_detach_adapter(bus, adapter);
+ "failed\n", adapter->name);
+ memset(adapter, 0, sizeof(*adapter));
}
- printk(KERN_INFO "PowerMac i2c bus %s registered\n", name);
+ printk(KERN_INFO "PowerMac i2c bus %s registered\n", adapter->name);
if (!strncmp(basename, "uni-n", 5)) {
struct device_node *np;
diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c
index 139f0c7f12a4..844569f7d8b7 100644
--- a/drivers/i2c/busses/i2c-sis5595.c
+++ b/drivers/i2c/busses/i2c-sis5595.c
@@ -142,7 +142,7 @@ static void sis5595_write(u8 reg, u8 data)
outb(data, sis5595_base + SMB_DAT);
}
-static int sis5595_setup(struct pci_dev *SIS5595_dev)
+static int __devinit sis5595_setup(struct pci_dev *SIS5595_dev)
{
u16 a;
u8 val;
diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c
index 70ca41e90e58..68cff7af7013 100644
--- a/drivers/i2c/busses/i2c-sis630.c
+++ b/drivers/i2c/busses/i2c-sis630.c
@@ -389,7 +389,7 @@ static u32 sis630_func(struct i2c_adapter *adapter)
I2C_FUNC_SMBUS_BLOCK_DATA;
}
-static int sis630_setup(struct pci_dev *sis630_dev)
+static int __devinit sis630_setup(struct pci_dev *sis630_dev)
{
unsigned char b;
struct pci_dev *dummy = NULL;
diff --git a/drivers/i2c/busses/i2c-stub.c b/drivers/i2c/busses/i2c-stub.c
index 1b7b2af94036..0c770eabe85e 100644
--- a/drivers/i2c/busses/i2c-stub.c
+++ b/drivers/i2c/busses/i2c-stub.c
@@ -35,6 +35,10 @@ module_param_array(chip_addr, ushort, NULL, S_IRUGO);
MODULE_PARM_DESC(chip_addr,
"Chip addresses (up to 10, between 0x03 and 0x77)");
+static unsigned long functionality = ~0UL;
+module_param(functionality, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(functionality, "Override functionality bitfield");
+
struct stub_chip {
u8 pointer;
u16 words[256]; /* Byte operations use the LSB as per SMBus
@@ -48,7 +52,7 @@ static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
char read_write, u8 command, int size, union i2c_smbus_data * data)
{
s32 ret;
- int i;
+ int i, len;
struct stub_chip *chip = NULL;
/* Search for the right chip */
@@ -118,6 +122,29 @@ static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
ret = 0;
break;
+ case I2C_SMBUS_I2C_BLOCK_DATA:
+ len = data->block[0];
+ if (read_write == I2C_SMBUS_WRITE) {
+ for (i = 0; i < len; i++) {
+ chip->words[command + i] &= 0xff00;
+ chip->words[command + i] |= data->block[1 + i];
+ }
+ dev_dbg(&adap->dev, "i2c block data - addr 0x%02x, "
+ "wrote %d bytes at 0x%02x.\n",
+ addr, len, command);
+ } else {
+ for (i = 0; i < len; i++) {
+ data->block[1 + i] =
+ chip->words[command + i] & 0xff;
+ }
+ dev_dbg(&adap->dev, "i2c block data - addr 0x%02x, "
+ "read %d bytes at 0x%02x.\n",
+ addr, len, command);
+ }
+
+ ret = 0;
+ break;
+
default:
dev_dbg(&adap->dev, "Unsupported I2C/SMBus command\n");
ret = -EOPNOTSUPP;
@@ -129,8 +156,9 @@ static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
static u32 stub_func(struct i2c_adapter *adapter)
{
- return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
- I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA;
+ return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+ I2C_FUNC_SMBUS_I2C_BLOCK) & functionality;
}
static const struct i2c_algorithm smbus_algorithm = {
diff --git a/drivers/i2c/busses/i2c-voodoo3.c b/drivers/i2c/busses/i2c-voodoo3.c
deleted file mode 100644
index 7663d57833a0..000000000000
--- a/drivers/i2c/busses/i2c-voodoo3.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- Copyright (c) 1998, 1999 Frodo Looijaard <frodol@dds.nl>,
- Philip Edelbrock <phil@netroedge.com>,
- Ralph Metzler <rjkm@thp.uni-koeln.de>, and
- Mark D. Studebaker <mdsxyz123@yahoo.com>
-
- Based on code written by Ralph Metzler <rjkm@thp.uni-koeln.de> and
- Simon Vogl
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/* This interfaces to the I2C bus of the Voodoo3 to gain access to
- the BT869 and possibly other I2C devices. */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#include <asm/io.h>
-
-/* the only registers we use */
-#define REG 0x78
-#define REG2 0x70
-
-/* bit locations in the register */
-#define DDC_ENAB 0x00040000
-#define DDC_SCL_OUT 0x00080000
-#define DDC_SDA_OUT 0x00100000
-#define DDC_SCL_IN 0x00200000
-#define DDC_SDA_IN 0x00400000
-#define I2C_ENAB 0x00800000
-#define I2C_SCL_OUT 0x01000000
-#define I2C_SDA_OUT 0x02000000
-#define I2C_SCL_IN 0x04000000
-#define I2C_SDA_IN 0x08000000
-
-/* initialization states */
-#define INIT2 0x2
-#define INIT3 0x4
-
-/* delays */
-#define CYCLE_DELAY 10
-#define TIMEOUT (HZ / 2)
-
-
-static void __iomem *ioaddr;
-
-/* The voo GPIO registers don't have individual masks for each bit
- so we always have to read before writing. */
-
-static void bit_vooi2c_setscl(void *data, int val)
-{
- unsigned int r;
- r = readl(ioaddr + REG);
- if (val)
- r |= I2C_SCL_OUT;
- else
- r &= ~I2C_SCL_OUT;
- writel(r, ioaddr + REG);
- readl(ioaddr + REG); /* flush posted write */
-}
-
-static void bit_vooi2c_setsda(void *data, int val)
-{
- unsigned int r;
- r = readl(ioaddr + REG);
- if (val)
- r |= I2C_SDA_OUT;
- else
- r &= ~I2C_SDA_OUT;
- writel(r, ioaddr + REG);
- readl(ioaddr + REG); /* flush posted write */
-}
-
-/* The GPIO pins are open drain, so the pins always remain outputs.
- We rely on the i2c-algo-bit routines to set the pins high before
- reading the input from other chips. */
-
-static int bit_vooi2c_getscl(void *data)
-{
- return (0 != (readl(ioaddr + REG) & I2C_SCL_IN));
-}
-
-static int bit_vooi2c_getsda(void *data)
-{
- return (0 != (readl(ioaddr + REG) & I2C_SDA_IN));
-}
-
-static void bit_vooddc_setscl(void *data, int val)
-{
- unsigned int r;
- r = readl(ioaddr + REG);
- if (val)
- r |= DDC_SCL_OUT;
- else
- r &= ~DDC_SCL_OUT;
- writel(r, ioaddr + REG);
- readl(ioaddr + REG); /* flush posted write */
-}
-
-static void bit_vooddc_setsda(void *data, int val)
-{
- unsigned int r;
- r = readl(ioaddr + REG);
- if (val)
- r |= DDC_SDA_OUT;
- else
- r &= ~DDC_SDA_OUT;
- writel(r, ioaddr + REG);
- readl(ioaddr + REG); /* flush posted write */
-}
-
-static int bit_vooddc_getscl(void *data)
-{
- return (0 != (readl(ioaddr + REG) & DDC_SCL_IN));
-}
-
-static int bit_vooddc_getsda(void *data)
-{
- return (0 != (readl(ioaddr + REG) & DDC_SDA_IN));
-}
-
-static int config_v3(struct pci_dev *dev)
-{
- unsigned long cadr;
-
- /* map Voodoo3 memory */
- cadr = dev->resource[0].start;
- cadr &= PCI_BASE_ADDRESS_MEM_MASK;
- ioaddr = ioremap_nocache(cadr, 0x1000);
- if (ioaddr) {
- writel(0x8160, ioaddr + REG2);
- writel(0xcffc0020, ioaddr + REG);
- dev_info(&dev->dev, "Using Banshee/Voodoo3 I2C device at %p\n", ioaddr);
- return 0;
- }
- return -ENODEV;
-}
-
-static struct i2c_algo_bit_data voo_i2c_bit_data = {
- .setsda = bit_vooi2c_setsda,
- .setscl = bit_vooi2c_setscl,
- .getsda = bit_vooi2c_getsda,
- .getscl = bit_vooi2c_getscl,
- .udelay = CYCLE_DELAY,
- .timeout = TIMEOUT
-};
-
-static struct i2c_adapter voodoo3_i2c_adapter = {
- .owner = THIS_MODULE,
- .name = "I2C Voodoo3/Banshee adapter",
- .algo_data = &voo_i2c_bit_data,
-};
-
-static struct i2c_algo_bit_data voo_ddc_bit_data = {
- .setsda = bit_vooddc_setsda,
- .setscl = bit_vooddc_setscl,
- .getsda = bit_vooddc_getsda,
- .getscl = bit_vooddc_getscl,
- .udelay = CYCLE_DELAY,
- .timeout = TIMEOUT
-};
-
-static struct i2c_adapter voodoo3_ddc_adapter = {
- .owner = THIS_MODULE,
- .class = I2C_CLASS_DDC,
- .name = "DDC Voodoo3/Banshee adapter",
- .algo_data = &voo_ddc_bit_data,
-};
-
-static struct pci_device_id voodoo3_ids[] __devinitdata = {
- { PCI_DEVICE(PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_VOODOO3) },
- { PCI_DEVICE(PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_BANSHEE) },
- { 0, }
-};
-
-MODULE_DEVICE_TABLE (pci, voodoo3_ids);
-
-static int __devinit voodoo3_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
- int retval;
-
- retval = config_v3(dev);
- if (retval)
- return retval;
-
- /* set up the sysfs linkage to our parent device */
- voodoo3_i2c_adapter.dev.parent = &dev->dev;
- voodoo3_ddc_adapter.dev.parent = &dev->dev;
-
- retval = i2c_bit_add_bus(&voodoo3_i2c_adapter);
- if (retval)
- return retval;
- retval = i2c_bit_add_bus(&voodoo3_ddc_adapter);
- if (retval)
- i2c_del_adapter(&voodoo3_i2c_adapter);
- return retval;
-}
-
-static void __devexit voodoo3_remove(struct pci_dev *dev)
-{
- i2c_del_adapter(&voodoo3_i2c_adapter);
- i2c_del_adapter(&voodoo3_ddc_adapter);
- iounmap(ioaddr);
-}
-
-static struct pci_driver voodoo3_driver = {
- .name = "voodoo3_smbus",
- .id_table = voodoo3_ids,
- .probe = voodoo3_probe,
- .remove = __devexit_p(voodoo3_remove),
-};
-
-static int __init i2c_voodoo3_init(void)
-{
- return pci_register_driver(&voodoo3_driver);
-}
-
-static void __exit i2c_voodoo3_exit(void)
-{
- pci_unregister_driver(&voodoo3_driver);
-}
-
-
-MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>, "
- "Philip Edelbrock <phil@netroedge.com>, "
- "Ralph Metzler <rjkm@thp.uni-koeln.de>, "
- "and Mark D. Studebaker <mdsxyz123@yahoo.com>");
-MODULE_DESCRIPTION("Voodoo3 I2C/SMBus driver");
-MODULE_LICENSE("GPL");
-
-module_init(i2c_voodoo3_init);
-module_exit(i2c_voodoo3_exit);
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index f9618f4d4e47..ae4539d99bef 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -6,16 +6,6 @@
menu "Miscellaneous I2C Chip support"
-config DS1682
- tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
- depends on EXPERIMENTAL
- help
- If you say yes here you get support for Dallas Semiconductor
- DS1682 Total Elapsed Time Recorder.
-
- This driver can also be built as a module. If so, the module
- will be called ds1682.
-
config SENSORS_TSL2550
tristate "Taos TSL2550 ambient light sensor"
depends on EXPERIMENTAL
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index 749cf3606294..fe0af0f81f2d 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -10,7 +10,6 @@
# * I/O expander drivers go to drivers/gpio
#
-obj-$(CONFIG_DS1682) += ds1682.o
obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o
ifeq ($(CONFIG_I2C_DEBUG_CHIP),y)
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 296504355142..4f34823e86b1 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -558,11 +558,9 @@ static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
up_read(&__i2c_board_lock);
}
-static int i2c_do_add_adapter(struct device_driver *d, void *data)
+static int i2c_do_add_adapter(struct i2c_driver *driver,
+ struct i2c_adapter *adap)
{
- struct i2c_driver *driver = to_i2c_driver(d);
- struct i2c_adapter *adap = data;
-
/* Detect supported devices on that bus, and instantiate them */
i2c_detect(adap, driver);
@@ -574,6 +572,11 @@ static int i2c_do_add_adapter(struct device_driver *d, void *data)
return 0;
}
+static int __process_new_adapter(struct device_driver *d, void *data)
+{
+ return i2c_do_add_adapter(to_i2c_driver(d), data);
+}
+
static int i2c_register_adapter(struct i2c_adapter *adap)
{
int res = 0, dummy;
@@ -584,7 +587,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
goto out_list;
}
- mutex_init(&adap->bus_lock);
+ rt_mutex_init(&adap->bus_lock);
/* Set default timeout to 1 second if not already set */
if (adap->timeout == 0)
@@ -614,7 +617,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
/* Notify drivers */
mutex_lock(&core_lock);
dummy = bus_for_each_drv(&i2c_bus_type, NULL, adap,
- i2c_do_add_adapter);
+ __process_new_adapter);
mutex_unlock(&core_lock);
return 0;
@@ -715,10 +718,9 @@ retry:
}
EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);
-static int i2c_do_del_adapter(struct device_driver *d, void *data)
+static int i2c_do_del_adapter(struct i2c_driver *driver,
+ struct i2c_adapter *adapter)
{
- struct i2c_driver *driver = to_i2c_driver(d);
- struct i2c_adapter *adapter = data;
struct i2c_client *client, *_n;
int res;
@@ -750,6 +752,11 @@ static int __unregister_client(struct device *dev, void *dummy)
return 0;
}
+static int __process_removed_adapter(struct device_driver *d, void *data)
+{
+ return i2c_do_del_adapter(to_i2c_driver(d), data);
+}
+
/**
* i2c_del_adapter - unregister I2C adapter
* @adap: the adapter being unregistered
@@ -777,7 +784,7 @@ int i2c_del_adapter(struct i2c_adapter *adap)
/* Tell drivers about this removal */
mutex_lock(&core_lock);
res = bus_for_each_drv(&i2c_bus_type, NULL, adap,
- i2c_do_del_adapter);
+ __process_removed_adapter);
mutex_unlock(&core_lock);
if (res)
return res;
@@ -826,22 +833,11 @@ EXPORT_SYMBOL(i2c_del_adapter);
/* ------------------------------------------------------------------------- */
-static int __attach_adapter(struct device *dev, void *data)
+static int __process_new_driver(struct device *dev, void *data)
{
- struct i2c_adapter *adapter;
- struct i2c_driver *driver = data;
-
if (dev->type != &i2c_adapter_type)
return 0;
- adapter = to_i2c_adapter(dev);
-
- i2c_detect(adapter, driver);
-
- /* Legacy drivers scan i2c busses directly */
- if (driver->attach_adapter)
- driver->attach_adapter(adapter);
-
- return 0;
+ return i2c_do_add_adapter(data, to_i2c_adapter(dev));
}
/*
@@ -873,40 +869,18 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
INIT_LIST_HEAD(&driver->clients);
/* Walk the adapters that are already present */
mutex_lock(&core_lock);
- bus_for_each_dev(&i2c_bus_type, NULL, driver, __attach_adapter);
+ bus_for_each_dev(&i2c_bus_type, NULL, driver, __process_new_driver);
mutex_unlock(&core_lock);
return 0;
}
EXPORT_SYMBOL(i2c_register_driver);
-static int __detach_adapter(struct device *dev, void *data)
+static int __process_removed_driver(struct device *dev, void *data)
{
- struct i2c_adapter *adapter;
- struct i2c_driver *driver = data;
- struct i2c_client *client, *_n;
-
if (dev->type != &i2c_adapter_type)
return 0;
- adapter = to_i2c_adapter(dev);
-
- /* Remove the devices we created ourselves as the result of hardware
- * probing (using a driver's detect method) */
- list_for_each_entry_safe(client, _n, &driver->clients, detected) {
- dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
- client->name, client->addr);
- list_del(&client->detected);
- i2c_unregister_device(client);
- }
-
- if (driver->detach_adapter) {
- if (driver->detach_adapter(adapter))
- dev_err(&adapter->dev,
- "detach_adapter failed for driver [%s]\n",
- driver->driver.name);
- }
-
- return 0;
+ return i2c_do_del_adapter(data, to_i2c_adapter(dev));
}
/**
@@ -917,7 +891,7 @@ static int __detach_adapter(struct device *dev, void *data)
void i2c_del_driver(struct i2c_driver *driver)
{
mutex_lock(&core_lock);
- bus_for_each_dev(&i2c_bus_type, NULL, driver, __detach_adapter);
+ bus_for_each_dev(&i2c_bus_type, NULL, driver, __process_removed_driver);
mutex_unlock(&core_lock);
driver_unregister(&driver->driver);
@@ -1092,12 +1066,12 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
#endif
if (in_atomic() || irqs_disabled()) {
- ret = mutex_trylock(&adap->bus_lock);
+ ret = rt_mutex_trylock(&adap->bus_lock);
if (!ret)
/* I2C activity is ongoing. */
return -EAGAIN;
} else {
- mutex_lock_nested(&adap->bus_lock, adap->level);
+ rt_mutex_lock(&adap->bus_lock);
}
/* Retry automatically on arbitration loss */
@@ -1109,7 +1083,7 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
if (time_after(jiffies, orig_jiffies + adap->timeout))
break;
}
- mutex_unlock(&adap->bus_lock);
+ rt_mutex_unlock(&adap->bus_lock);
return ret;
} else {
@@ -1180,7 +1154,7 @@ EXPORT_SYMBOL(i2c_master_recv);
* ----------------------------------------------------
*/
-static int i2c_detect_address(struct i2c_client *temp_client, int kind,
+static int i2c_detect_address(struct i2c_client *temp_client,
struct i2c_driver *driver)
{
struct i2c_board_info info;
@@ -1199,22 +1173,18 @@ static int i2c_detect_address(struct i2c_client *temp_client, int kind,
if (i2c_check_addr(adapter, addr))
return 0;
- /* Make sure there is something at this address, unless forced */
- if (kind < 0) {
- if (i2c_smbus_xfer(adapter, addr, 0, 0, 0,
- I2C_SMBUS_QUICK, NULL) < 0)
- return 0;
+ /* Make sure there is something at this address */
+ if (i2c_smbus_xfer(adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) < 0)
+ return 0;
- /* prevent 24RF08 corruption */
- if ((addr & ~0x0f) == 0x50)
- i2c_smbus_xfer(adapter, addr, 0, 0, 0,
- I2C_SMBUS_QUICK, NULL);
- }
+ /* Prevent 24RF08 corruption */
+ if ((addr & ~0x0f) == 0x50)
+ i2c_smbus_xfer(adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL);
/* Finally call the custom detection function */
memset(&info, 0, sizeof(struct i2c_board_info));
info.addr = addr;
- err = driver->detect(temp_client, kind, &info);
+ err = driver->detect(temp_client, -1, &info);
if (err) {
/* -ENODEV is returned if the detection fails. We catch it
here as this isn't an error. */
@@ -1259,40 +1229,13 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
return -ENOMEM;
temp_client->adapter = adapter;
- /* Force entries are done first, and are not affected by ignore
- entries */
- if (address_data->forces) {
- const unsigned short * const *forces = address_data->forces;
- int kind;
-
- for (kind = 0; forces[kind]; kind++) {
- for (i = 0; forces[kind][i] != I2C_CLIENT_END;
- i += 2) {
- if (forces[kind][i] == adap_id
- || forces[kind][i] == ANY_I2C_BUS) {
- dev_dbg(&adapter->dev, "found force "
- "parameter for adapter %d, "
- "addr 0x%02x, kind %d\n",
- adap_id, forces[kind][i + 1],
- kind);
- temp_client->addr = forces[kind][i + 1];
- err = i2c_detect_address(temp_client,
- kind, driver);
- if (err)
- goto exit_free;
- }
- }
- }
- }
-
/* Stop here if the classes do not match */
if (!(adapter->class & driver->class))
goto exit_free;
/* Stop here if we can't use SMBUS_QUICK */
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
- if (address_data->probe[0] == I2C_CLIENT_END
- && address_data->normal_i2c[0] == I2C_CLIENT_END)
+ if (address_data->normal_i2c[0] == I2C_CLIENT_END)
goto exit_free;
dev_warn(&adapter->dev, "SMBus Quick command not supported, "
@@ -1301,48 +1244,12 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
goto exit_free;
}
- /* Probe entries are done second, and are not affected by ignore
- entries either */
- for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) {
- if (address_data->probe[i] == adap_id
- || address_data->probe[i] == ANY_I2C_BUS) {
- dev_dbg(&adapter->dev, "found probe parameter for "
- "adapter %d, addr 0x%02x\n", adap_id,
- address_data->probe[i + 1]);
- temp_client->addr = address_data->probe[i + 1];
- err = i2c_detect_address(temp_client, -1, driver);
- if (err)
- goto exit_free;
- }
- }
-
- /* Normal entries are done last, unless shadowed by an ignore entry */
for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
- int j, ignore;
-
- ignore = 0;
- for (j = 0; address_data->ignore[j] != I2C_CLIENT_END;
- j += 2) {
- if ((address_data->ignore[j] == adap_id ||
- address_data->ignore[j] == ANY_I2C_BUS)
- && address_data->ignore[j + 1]
- == address_data->normal_i2c[i]) {
- dev_dbg(&adapter->dev, "found ignore "
- "parameter for adapter %d, "
- "addr 0x%02x\n", adap_id,
- address_data->ignore[j + 1]);
- ignore = 1;
- break;
- }
- }
- if (ignore)
- continue;
-
dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
"addr 0x%02x\n", adap_id,
address_data->normal_i2c[i]);
temp_client->addr = address_data->normal_i2c[i];
- err = i2c_detect_address(temp_client, -1, driver);
+ err = i2c_detect_address(temp_client, driver);
if (err)
goto exit_free;
}
@@ -1913,7 +1820,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
flags &= I2C_M_TEN | I2C_CLIENT_PEC;
if (adapter->algo->smbus_xfer) {
- mutex_lock(&adapter->bus_lock);
+ rt_mutex_lock(&adapter->bus_lock);
/* Retry automatically on arbitration loss */
orig_jiffies = jiffies;
@@ -1927,7 +1834,7 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
orig_jiffies + adapter->timeout))
break;
}
- mutex_unlock(&adapter->bus_lock);
+ rt_mutex_unlock(&adapter->bus_lock);
} else
res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write,
command, protocol, data);
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 7e13d2df9af3..f4110aa49600 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -34,7 +34,6 @@
#include <linux/list.h>
#include <linux/i2c.h>
#include <linux/i2c-dev.h>
-#include <linux/smp_lock.h>
#include <linux/jiffies.h>
#include <asm/uaccess.h>
@@ -445,20 +444,14 @@ static int i2cdev_open(struct inode *inode, struct file *file)
struct i2c_client *client;
struct i2c_adapter *adap;
struct i2c_dev *i2c_dev;
- int ret = 0;
- lock_kernel();
i2c_dev = i2c_dev_get_by_minor(minor);
- if (!i2c_dev) {
- ret = -ENODEV;
- goto out;
- }
+ if (!i2c_dev)
+ return -ENODEV;
adap = i2c_get_adapter(i2c_dev->adap->nr);
- if (!adap) {
- ret = -ENODEV;
- goto out;
- }
+ if (!adap)
+ return -ENODEV;
/* This creates an anonymous i2c_client, which may later be
* pointed to some address using I2C_SLAVE or I2C_SLAVE_FORCE.
@@ -470,8 +463,7 @@ static int i2cdev_open(struct inode *inode, struct file *file)
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client) {
i2c_put_adapter(adap);
- ret = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
snprintf(client->name, I2C_NAME_SIZE, "i2c-dev %d", adap->nr);
client->driver = &i2cdev_driver;
@@ -479,9 +471,7 @@ static int i2cdev_open(struct inode *inode, struct file *file)
client->adapter = adap;
file->private_data = client;
-out:
- unlock_kernel();
- return ret;
+ return 0;
}
static int i2cdev_release(struct inode *inode, struct file *file)
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index 39d4e01f5c9c..a743e68a8903 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -162,9 +162,10 @@ static const struct pci_device_id generic_pci_tbl[] = {
#ifdef CONFIG_BLK_DEV_IDE_SATA
{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_8237_SATA), 5 },
#endif
- { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO), 4 },
{ PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), 4 },
{ PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), 4 },
+ { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), 4 },
+ { PCI_VDEVICE(TOSHIBA, PCI_DEVICE_ID_TOSHIBA_PICCOLO_5), 4 },
{ PCI_VDEVICE(NETCELL, PCI_DEVICE_ID_REVOLUTION), 6 },
/*
* Must come last. If you add entries adjust
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index 65c1429e4129..d0dc1db80b29 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -82,6 +82,7 @@
*
*/
+#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -434,7 +435,6 @@ static void initialize_dma_trm_ctx(struct dma_trm_ctx *d)
/* Count the number of available iso contexts */
static int get_nb_iso_ctx(struct ti_ohci *ohci, int reg)
{
- int i,ctx=0;
u32 tmp;
reg_write(ohci, reg, 0xffffffff);
@@ -443,11 +443,7 @@ static int get_nb_iso_ctx(struct ti_ohci *ohci, int reg)
DBGMSG("Iso contexts reg: %08x implemented: %08x", reg, tmp);
/* Count the number of contexts */
- for (i=0; i<32; i++) {
- if (tmp & 1) ctx++;
- tmp >>= 1;
- }
- return ctx;
+ return hweight32(tmp);
}
/* Global initialization */
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index bba85add35a3..1a494d505431 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -35,12 +35,12 @@
#include <linux/mutex.h>
#include <linux/errno.h>
#include <mach/gpio.h>
-#include <mach/keypad.h>
-#include <mach/menelaus.h>
+#include <plat/keypad.h>
+#include <plat/menelaus.h>
#include <asm/irq.h>
#include <mach/hardware.h>
#include <asm/io.h>
-#include <mach/mux.h>
+#include <plat/mux.h>
#undef NEW_BOARD_LEARNING_MODE
diff --git a/drivers/leds/leds-ams-delta.c b/drivers/leds/leds-ams-delta.c
index 446050759b4d..b9826032450b 100644
--- a/drivers/leds/leds-ams-delta.c
+++ b/drivers/leds/leds-ams-delta.c
@@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/leds.h>
-#include <mach/board-ams-delta.h>
+#include <plat/board-ams-delta.h>
/*
* Our context
diff --git a/drivers/leds/leds-locomo.c b/drivers/leds/leds-locomo.c
index 5d91362e3066..1f7c10f6b7f2 100644
--- a/drivers/leds/leds-locomo.c
+++ b/drivers/leds/leds-locomo.c
@@ -44,7 +44,7 @@ static void locomoled_brightness_set1(struct led_classdev *led_cdev,
static struct led_classdev locomo_led0 = {
.name = "locomo:amber:charge",
- .default_trigger = "sharpsl-charge",
+ .default_trigger = "main-battery-charging",
.brightness_set = locomoled_brightness_set0,
};
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index cc9f27514aef..7b4ef5bb556b 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -27,54 +27,49 @@ static int mouse_last_keycode;
/* file(s) in /proc/sys/dev/mac_hid */
static ctl_table mac_hid_files[] = {
{
- .ctl_name = DEV_MAC_HID_MOUSE_BUTTON_EMULATION,
.procname = "mouse_button_emulation",
.data = &mouse_emulate_buttons,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE,
.procname = "mouse_button2_keycode",
.data = &mouse_button2_keycode,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE,
.procname = "mouse_button3_keycode",
.data = &mouse_button3_keycode,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
/* dir in /proc/sys/dev */
static ctl_table mac_hid_dir[] = {
{
- .ctl_name = DEV_MAC_HID,
.procname = "mac_hid",
.maxlen = 0,
.mode = 0555,
.child = mac_hid_files,
},
- { .ctl_name = 0 }
+ { }
};
/* /proc/sys/dev itself, in case that is not there yet */
static ctl_table mac_hid_root_dir[] = {
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.child = mac_hid_dir,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *mac_hid_sysctl_header;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b182f86a19dd..5f154ef1e4be 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -98,44 +98,40 @@ static struct ctl_table_header *raid_table_header;
static ctl_table raid_table[] = {
{
- .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
.maxlen = sizeof(int),
.mode = S_IRUGO|S_IWUSR,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
.procname = "speed_limit_max",
.data = &sysctl_speed_limit_max,
.maxlen = sizeof(int),
.mode = S_IRUGO|S_IWUSR,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table raid_dir_table[] = {
{
- .ctl_name = DEV_RAID,
.procname = "raid",
.maxlen = 0,
.mode = S_IRUGO|S_IXUGO,
.child = raid_table,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table raid_root_table[] = {
{
- .ctl_name = CTL_DEV,
.procname = "dev",
.maxlen = 0,
.mode = 0555,
.child = raid_dir_table,
},
- { .ctl_name = 0 }
+ { }
};
static const struct block_device_operations md_fops;
diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig
index a87a477c87f2..b134553eb3b5 100644
--- a/drivers/media/radio/Kconfig
+++ b/drivers/media/radio/Kconfig
@@ -195,6 +195,24 @@ config RADIO_MAESTRO
To compile this driver as a module, choose M here: the
module will be called radio-maestro.
+config RADIO_MIROPCM20
+ tristate "miroSOUND PCM20 radio"
+ depends on ISA && VIDEO_V4L2
+ select SND_MIRO
+ ---help---
+ Choose Y here if you have this FM radio card. You also need to enable
+ the ALSA sound system. This choice automatically selects the ALSA
+ sound card driver "Miro miroSOUND PCM1pro/PCM12/PCM20radio" as this
+ is required for the radio-miropcm20.
+
+ In order to control your radio card, you will need to use programs
+ that are compatible with the Video For Linux API. Information on
+ this API and pointers to "v4l" programs may be found at
+ <file:Documentation/video4linux/API.html>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called radio-miropcm20.
+
config RADIO_SF16FMI
tristate "SF16FMI Radio"
depends on ISA && VIDEO_V4L2
diff --git a/drivers/media/radio/Makefile b/drivers/media/radio/Makefile
index 2a1be3bf4f7c..8a63d543ae41 100644
--- a/drivers/media/radio/Makefile
+++ b/drivers/media/radio/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_RADIO_TRUST) += radio-trust.o
obj-$(CONFIG_I2C_SI4713) += si4713-i2c.o
obj-$(CONFIG_RADIO_SI4713) += radio-si4713.o
obj-$(CONFIG_RADIO_MAESTRO) += radio-maestro.o
+obj-$(CONFIG_RADIO_MIROPCM20) += radio-miropcm20.o
obj-$(CONFIG_USB_DSBR) += dsbr100.o
obj-$(CONFIG_RADIO_SI470X) += si470x/
obj-$(CONFIG_USB_MR800) += radio-mr800.o
diff --git a/drivers/media/radio/radio-miropcm20.c b/drivers/media/radio/radio-miropcm20.c
new file mode 100644
index 000000000000..4ff885445fd4
--- /dev/null
+++ b/drivers/media/radio/radio-miropcm20.c
@@ -0,0 +1,270 @@
+/* Miro PCM20 radio driver for Linux radio support
+ * (c) 1998 Ruurd Reitsma <R.A.Reitsma@wbmt.tudelft.nl>
+ * Thanks to Norberto Pellici for the ACI device interface specification
+ * The API part is based on the radiotrack driver by M. Kirkwood
+ * This driver relies on the aci mixer provided by the snd-miro
+ * ALSA driver.
+ * Look there for further info...
+ */
+
+/* What ever you think about the ACI, version 0x07 is not very well!
+ * I can't get frequency, 'tuner status', 'tuner flags' or mute/mono
+ * conditions... Robert
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <sound/aci.h>
+
+static int radio_nr = -1;
+module_param(radio_nr, int, 0);
+MODULE_PARM_DESC(radio_nr, "Set radio device number (/dev/radioX). Default: -1 (autodetect)");
+
+static int mono;
+module_param(mono, bool, 0);
+MODULE_PARM_DESC(mono, "Force tuner into mono mode.");
+
+struct pcm20 {
+ struct v4l2_device v4l2_dev;
+ struct video_device vdev;
+ unsigned long freq;
+ int muted;
+ struct snd_miro_aci *aci;
+};
+
+static struct pcm20 pcm20_card = {
+ .freq = 87*16000,
+ .muted = 1,
+};
+
+static int pcm20_mute(struct pcm20 *dev, unsigned char mute)
+{
+ dev->muted = mute;
+ return snd_aci_cmd(dev->aci, ACI_SET_TUNERMUTE, mute, -1);
+}
+
+static int pcm20_stereo(struct pcm20 *dev, unsigned char stereo)
+{
+ return snd_aci_cmd(dev->aci, ACI_SET_TUNERMONO, !stereo, -1);
+}
+
+static int pcm20_setfreq(struct pcm20 *dev, unsigned long freq)
+{
+ unsigned char freql;
+ unsigned char freqh;
+ struct snd_miro_aci *aci = dev->aci;
+
+ dev->freq = freq;
+
+ freq /= 160;
+ if (!(aci->aci_version == 0x07 || aci->aci_version >= 0xb0))
+ freq /= 10; /* I don't know exactly which version
+ * needs this hack */
+ freql = freq & 0xff;
+ freqh = freq >> 8;
+
+ pcm20_stereo(dev, !mono);
+ return snd_aci_cmd(aci, ACI_WRITE_TUNE, freql, freqh);
+}
+
+static const struct v4l2_file_operations pcm20_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = video_ioctl2,
+};
+
+static int vidioc_querycap(struct file *file, void *priv,
+ struct v4l2_capability *v)
+{
+ strlcpy(v->driver, "Miro PCM20", sizeof(v->driver));
+ strlcpy(v->card, "Miro PCM20", sizeof(v->card));
+ strlcpy(v->bus_info, "ISA", sizeof(v->bus_info));
+ v->version = 0x1;
+ v->capabilities = V4L2_CAP_TUNER | V4L2_CAP_RADIO;
+ return 0;
+}
+
+static int vidioc_g_tuner(struct file *file, void *priv,
+ struct v4l2_tuner *v)
+{
+ if (v->index) /* Only 1 tuner */
+ return -EINVAL;
+ strlcpy(v->name, "FM", sizeof(v->name));
+ v->type = V4L2_TUNER_RADIO;
+ v->rangelow = 87*16000;
+ v->rangehigh = 108*16000;
+ v->signal = 0xffff;
+ v->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO;
+ v->capability = V4L2_TUNER_CAP_LOW;
+ v->audmode = V4L2_TUNER_MODE_MONO;
+ return 0;
+}
+
+static int vidioc_s_tuner(struct file *file, void *priv,
+ struct v4l2_tuner *v)
+{
+ return v->index ? -EINVAL : 0;
+}
+
+static int vidioc_g_frequency(struct file *file, void *priv,
+ struct v4l2_frequency *f)
+{
+ struct pcm20 *dev = video_drvdata(file);
+
+ if (f->tuner != 0)
+ return -EINVAL;
+
+ f->type = V4L2_TUNER_RADIO;
+ f->frequency = dev->freq;
+ return 0;
+}
+
+
+static int vidioc_s_frequency(struct file *file, void *priv,
+ struct v4l2_frequency *f)
+{
+ struct pcm20 *dev = video_drvdata(file);
+
+ if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO)
+ return -EINVAL;
+
+ dev->freq = f->frequency;
+ pcm20_setfreq(dev, f->frequency);
+ return 0;
+}
+
+static int vidioc_queryctrl(struct file *file, void *priv,
+ struct v4l2_queryctrl *qc)
+{
+ switch (qc->id) {
+ case V4L2_CID_AUDIO_MUTE:
+ return v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+ }
+ return -EINVAL;
+}
+
+static int vidioc_g_ctrl(struct file *file, void *priv,
+ struct v4l2_control *ctrl)
+{
+ struct pcm20 *dev = video_drvdata(file);
+
+ switch (ctrl->id) {
+ case V4L2_CID_AUDIO_MUTE:
+ ctrl->value = dev->muted;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int vidioc_s_ctrl(struct file *file, void *priv,
+ struct v4l2_control *ctrl)
+{
+ struct pcm20 *dev = video_drvdata(file);
+
+ switch (ctrl->id) {
+ case V4L2_CID_AUDIO_MUTE:
+ pcm20_mute(dev, ctrl->value);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int vidioc_g_input(struct file *filp, void *priv, unsigned int *i)
+{
+ *i = 0;
+ return 0;
+}
+
+static int vidioc_s_input(struct file *filp, void *priv, unsigned int i)
+{
+ return i ? -EINVAL : 0;
+}
+
+static int vidioc_g_audio(struct file *file, void *priv,
+ struct v4l2_audio *a)
+{
+ a->index = 0;
+ strlcpy(a->name, "Radio", sizeof(a->name));
+ a->capability = V4L2_AUDCAP_STEREO;
+ return 0;
+}
+
+static int vidioc_s_audio(struct file *file, void *priv,
+ struct v4l2_audio *a)
+{
+ return a->index ? -EINVAL : 0;
+}
+
+static const struct v4l2_ioctl_ops pcm20_ioctl_ops = {
+ .vidioc_querycap = vidioc_querycap,
+ .vidioc_g_tuner = vidioc_g_tuner,
+ .vidioc_s_tuner = vidioc_s_tuner,
+ .vidioc_g_frequency = vidioc_g_frequency,
+ .vidioc_s_frequency = vidioc_s_frequency,
+ .vidioc_queryctrl = vidioc_queryctrl,
+ .vidioc_g_ctrl = vidioc_g_ctrl,
+ .vidioc_s_ctrl = vidioc_s_ctrl,
+ .vidioc_g_audio = vidioc_g_audio,
+ .vidioc_s_audio = vidioc_s_audio,
+ .vidioc_g_input = vidioc_g_input,
+ .vidioc_s_input = vidioc_s_input,
+};
+
+static int __init pcm20_init(void)
+{
+ struct pcm20 *dev = &pcm20_card;
+ struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
+ int res;
+
+ dev->aci = snd_aci_get_aci();
+ if (dev->aci == NULL) {
+ v4l2_err(v4l2_dev,
+ "you must load the snd-miro driver first!\n");
+ return -ENODEV;
+ }
+ strlcpy(v4l2_dev->name, "miropcm20", sizeof(v4l2_dev->name));
+
+
+ res = v4l2_device_register(NULL, v4l2_dev);
+ if (res < 0) {
+ v4l2_err(v4l2_dev, "could not register v4l2_device\n");
+ return -EINVAL;
+ }
+
+ strlcpy(dev->vdev.name, v4l2_dev->name, sizeof(dev->vdev.name));
+ dev->vdev.v4l2_dev = v4l2_dev;
+ dev->vdev.fops = &pcm20_fops;
+ dev->vdev.ioctl_ops = &pcm20_ioctl_ops;
+ dev->vdev.release = video_device_release_empty;
+ video_set_drvdata(&dev->vdev, dev);
+
+ if (video_register_device(&dev->vdev, VFL_TYPE_RADIO, radio_nr) < 0)
+ goto fail;
+
+ v4l2_info(v4l2_dev, "Mirosound PCM20 Radio tuner\n");
+ return 0;
+fail:
+ v4l2_device_unregister(v4l2_dev);
+ return -EINVAL;
+}
+
+MODULE_AUTHOR("Ruurd Reitsma, Krzysztof Helt");
+MODULE_DESCRIPTION("A driver for the Miro PCM20 radio card.");
+MODULE_LICENSE("GPL");
+
+static void __exit pcm20_cleanup(void)
+{
+ struct pcm20 *dev = &pcm20_card;
+
+ video_unregister_device(&dev->vdev);
+ v4l2_device_unregister(&dev->v4l2_dev);
+}
+
+module_init(pcm20_init);
+module_exit(pcm20_cleanup);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 570be139f9df..08f2d07bf56a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -121,6 +121,12 @@ config TWL4030_POWER
and load scripts controling which resources are switched off/on
or reset when a sleep, wakeup or warm reset event occurs.
+config TWL4030_CODEC
+ bool
+ depends on TWL4030_CORE
+ select MFD_CORE
+ default n
+
config MFD_TMIO
bool
default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f3b277b90d40..af0fc903cec8 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_MENELAUS) += menelaus.o
obj-$(CONFIG_TWL4030_CORE) += twl4030-core.o twl4030-irq.o
obj-$(CONFIG_TWL4030_POWER) += twl4030-power.o
+obj-$(CONFIG_TWL4030_CODEC) += twl4030-codec.o
obj-$(CONFIG_MFD_MC13783) += mc13783-core.o
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 57271cb3b316..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -17,11 +17,11 @@
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/mfd/mcp.h>
#include <mach/dma.h>
#include <asm/system.h>
-#include "mcp.h"
#define to_mcp(d) container_of(d, struct mcp, attached_device)
#define to_mcp_driver(d) container_of(d, struct mcp_driver, drv)
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 62b32dabf629..258427232728 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/platform_device.h>
+#include <linux/mfd/mcp.h>
#include <mach/dma.h>
#include <mach/hardware.h>
@@ -28,7 +29,6 @@
#include <mach/assabet.h>
-#include "mcp.h"
struct mcp_sa11x0 {
u32 mccr0;
@@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
mcp->dma_audio_wr = DMA_Ser4MCP0Wr;
mcp->dma_telco_rd = DMA_Ser4MCP1Rd;
mcp->dma_telco_wr = DMA_Ser4MCP1Wr;
+ mcp->gpio_base = data->gpio_base;
platform_set_drvdata(pdev, mcp);
diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h
deleted file mode 100644
index c093a93b8808..000000000000
--- a/drivers/mfd/mcp.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * linux/drivers/mfd/mcp.h
- *
- * Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef MCP_H
-#define MCP_H
-
-struct mcp_ops;
-
-struct mcp {
- struct module *owner;
- struct mcp_ops *ops;
- spinlock_t lock;
- int use_count;
- unsigned int sclk_rate;
- unsigned int rw_timeout;
- dma_device_t dma_audio_rd;
- dma_device_t dma_audio_wr;
- dma_device_t dma_telco_rd;
- dma_device_t dma_telco_wr;
- struct device attached_device;
-};
-
-struct mcp_ops {
- void (*set_telecom_divisor)(struct mcp *, unsigned int);
- void (*set_audio_divisor)(struct mcp *, unsigned int);
- void (*reg_write)(struct mcp *, unsigned int, unsigned int);
- unsigned int (*reg_read)(struct mcp *, unsigned int);
- void (*enable)(struct mcp *);
- void (*disable)(struct mcp *);
-};
-
-void mcp_set_telecom_divisor(struct mcp *, unsigned int);
-void mcp_set_audio_divisor(struct mcp *, unsigned int);
-void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
-unsigned int mcp_reg_read(struct mcp *, unsigned int);
-void mcp_enable(struct mcp *);
-void mcp_disable(struct mcp *);
-#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate)
-
-struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
-void mcp_host_unregister(struct mcp *);
-
-struct mcp_driver {
- struct device_driver drv;
- int (*probe)(struct mcp *);
- void (*remove)(struct mcp *);
- int (*suspend)(struct mcp *, pm_message_t);
- int (*resume)(struct mcp *);
-};
-
-int mcp_driver_register(struct mcp_driver *);
-void mcp_driver_unregister(struct mcp_driver *);
-
-#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device)
-#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d)
-
-#define mcp_priv(mcp) ((void *)((mcp)+1))
-
-#endif
diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c
index 4b364bae6b3e..970afa103261 100644
--- a/drivers/mfd/menelaus.c
+++ b/drivers/mfd/menelaus.c
@@ -44,7 +44,7 @@
#include <asm/mach/irq.h>
#include <mach/gpio.h>
-#include <mach/menelaus.h>
+#include <plat/menelaus.h>
#define DRIVER_NAME "menelaus"
diff --git a/drivers/mfd/twl4030-codec.c b/drivers/mfd/twl4030-codec.c
new file mode 100644
index 000000000000..77b914907d7c
--- /dev/null
+++ b/drivers/mfd/twl4030-codec.c
@@ -0,0 +1,276 @@
+/*
+ * MFD driver for twl4030 codec submodule
+ *
+ * Author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * Copyright: (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/twl4030.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/twl4030-codec.h>
+
+#define TWL4030_CODEC_CELLS 2
+
+static struct platform_device *twl4030_codec_dev;
+
+struct twl4030_codec_resource {
+ int request_count;
+ u8 reg;
+ u8 mask;
+};
+
+struct twl4030_codec {
+ unsigned int audio_mclk;
+ struct mutex mutex;
+ struct twl4030_codec_resource resource[TWL4030_CODEC_RES_MAX];
+ struct mfd_cell cells[TWL4030_CODEC_CELLS];
+};
+
+/*
+ * Modify the resource, the function returns the content of the register
+ * after the modification.
+ */
+static int twl4030_codec_set_resource(enum twl4030_codec_res id, int enable)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+ u8 val;
+
+ twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+ codec->resource[id].reg);
+
+ if (enable)
+ val |= codec->resource[id].mask;
+ else
+ val &= ~codec->resource[id].mask;
+
+ twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+ val, codec->resource[id].reg);
+
+ return val;
+}
+
+static inline int twl4030_codec_get_resource(enum twl4030_codec_res id)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+ u8 val;
+
+ twl4030_i2c_read_u8(TWL4030_MODULE_AUDIO_VOICE, &val,
+ codec->resource[id].reg);
+
+ return val;
+}
+
+/*
+ * Enable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_enable_resource(enum twl4030_codec_res id)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+ int val;
+
+ if (id >= TWL4030_CODEC_RES_MAX) {
+ dev_err(&twl4030_codec_dev->dev,
+ "Invalid resource ID (%u)\n", id);
+ return -EINVAL;
+ }
+
+ mutex_lock(&codec->mutex);
+ if (!codec->resource[id].request_count)
+ /* Resource was disabled, enable it */
+ val = twl4030_codec_set_resource(id, 1);
+ else
+ val = twl4030_codec_get_resource(id);
+
+ codec->resource[id].request_count++;
+ mutex_unlock(&codec->mutex);
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_enable_resource);
+
+/*
+ * Disable the resource.
+ * The function returns with error or the content of the register
+ */
+int twl4030_codec_disable_resource(unsigned id)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+ int val;
+
+ if (id >= TWL4030_CODEC_RES_MAX) {
+ dev_err(&twl4030_codec_dev->dev,
+ "Invalid resource ID (%u)\n", id);
+ return -EINVAL;
+ }
+
+ mutex_lock(&codec->mutex);
+ if (!codec->resource[id].request_count) {
+ dev_err(&twl4030_codec_dev->dev,
+ "Resource has been disabled already (%u)\n", id);
+ mutex_unlock(&codec->mutex);
+ return -EPERM;
+ }
+ codec->resource[id].request_count--;
+
+ if (!codec->resource[id].request_count)
+ /* Resource can be disabled now */
+ val = twl4030_codec_set_resource(id, 0);
+ else
+ val = twl4030_codec_get_resource(id);
+
+ mutex_unlock(&codec->mutex);
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_disable_resource);
+
+unsigned int twl4030_codec_get_mclk(void)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(twl4030_codec_dev);
+
+ return codec->audio_mclk;
+}
+EXPORT_SYMBOL_GPL(twl4030_codec_get_mclk);
+
+static int __devinit twl4030_codec_probe(struct platform_device *pdev)
+{
+ struct twl4030_codec *codec;
+ struct twl4030_codec_data *pdata = pdev->dev.platform_data;
+ struct mfd_cell *cell = NULL;
+ int ret, childs = 0;
+ u8 val;
+
+ if (!pdata) {
+ dev_err(&pdev->dev, "Platform data is missing\n");
+ return -EINVAL;
+ }
+
+ /* Configure APLL_INFREQ and disable APLL if enabled */
+ val = 0;
+ switch (pdata->audio_mclk) {
+ case 19200000:
+ val |= TWL4030_APLL_INFREQ_19200KHZ;
+ break;
+ case 26000000:
+ val |= TWL4030_APLL_INFREQ_26000KHZ;
+ break;
+ case 38400000:
+ val |= TWL4030_APLL_INFREQ_38400KHZ;
+ break;
+ default:
+ dev_err(&pdev->dev, "Invalid audio_mclk\n");
+ return -EINVAL;
+ }
+ twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
+ val, TWL4030_REG_APLL_CTL);
+
+ codec = kzalloc(sizeof(struct twl4030_codec), GFP_KERNEL);
+ if (!codec)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, codec);
+
+ twl4030_codec_dev = pdev;
+ mutex_init(&codec->mutex);
+ codec->audio_mclk = pdata->audio_mclk;
+
+ /* Codec power */
+ codec->resource[TWL4030_CODEC_RES_POWER].reg = TWL4030_REG_CODEC_MODE;
+ codec->resource[TWL4030_CODEC_RES_POWER].mask = TWL4030_CODECPDZ;
+
+ /* PLL */
+ codec->resource[TWL4030_CODEC_RES_APLL].reg = TWL4030_REG_APLL_CTL;
+ codec->resource[TWL4030_CODEC_RES_APLL].mask = TWL4030_APLL_EN;
+
+ if (pdata->audio) {
+ cell = &codec->cells[childs];
+ cell->name = "twl4030_codec_audio";
+ cell->platform_data = pdata->audio;
+ cell->data_size = sizeof(*pdata->audio);
+ childs++;
+ }
+ if (pdata->vibra) {
+ cell = &codec->cells[childs];
+ cell->name = "twl4030_codec_vibra";
+ cell->platform_data = pdata->vibra;
+ cell->data_size = sizeof(*pdata->vibra);
+ childs++;
+ }
+
+ if (childs)
+ ret = mfd_add_devices(&pdev->dev, pdev->id, codec->cells,
+ childs, NULL, 0);
+ else {
+ dev_err(&pdev->dev, "No platform data found for childs\n");
+ ret = -ENODEV;
+ }
+
+ if (!ret)
+ return 0;
+
+ platform_set_drvdata(pdev, NULL);
+ kfree(codec);
+ twl4030_codec_dev = NULL;
+ return ret;
+}
+
+static int __devexit twl4030_codec_remove(struct platform_device *pdev)
+{
+ struct twl4030_codec *codec = platform_get_drvdata(pdev);
+
+ mfd_remove_devices(&pdev->dev);
+ platform_set_drvdata(pdev, NULL);
+ kfree(codec);
+ twl4030_codec_dev = NULL;
+
+ return 0;
+}
+
+MODULE_ALIAS("platform:twl4030_codec");
+
+static struct platform_driver twl4030_codec_driver = {
+ .probe = twl4030_codec_probe,
+ .remove = __devexit_p(twl4030_codec_remove),
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "twl4030_codec",
+ },
+};
+
+static int __devinit twl4030_codec_init(void)
+{
+ return platform_driver_register(&twl4030_codec_driver);
+}
+module_init(twl4030_codec_init);
+
+static void __devexit twl4030_codec_exit(void)
+{
+ platform_driver_unregister(&twl4030_codec_driver);
+}
+module_exit(twl4030_codec_exit);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index a1c47ee95c0e..40449cdf09db 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -39,7 +39,7 @@
#include <linux/i2c/twl4030.h>
#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
-#include <mach/cpu.h>
+#include <plat/cpu.h>
#endif
/*
@@ -114,6 +114,12 @@
#define twl_has_watchdog() false
#endif
+#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
+#define twl_has_codec() true
+#else
+#define twl_has_codec() false
+#endif
+
/* Triton Core internal information (BEGIN) */
/* Last - for index max*/
@@ -601,6 +607,14 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
return PTR_ERR(child);
}
+ if (twl_has_codec() && pdata->codec) {
+ child = add_child(1, "twl4030_codec",
+ pdata->codec, sizeof(*pdata->codec),
+ false, 0, 0);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+ }
+
if (twl_has_regulator()) {
/*
child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
@@ -763,7 +777,7 @@ static int twl4030_remove(struct i2c_client *client)
}
/* NOTE: this driver only handles a single twl4030/tps659x0 chip */
-static int
+static int __init
twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
{
int status;
diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c
index 86fed4870f93..cea9da60850d 100644
--- a/drivers/mfd/ucb1x00-assabet.c
+++ b/drivers/mfd/ucb1x00-assabet.c
@@ -14,10 +14,10 @@
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/device.h>
+#include <linux/mfd/ucb1x00.h>
#include <mach/dma.h>
-#include "ucb1x00.h"
#define UCB1X00_ATTR(name,input)\
static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 60c3988f3cf3..252b74188ec2 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
#include <linux/device.h>
#include <linux/mutex.h>
+#include <linux/mfd/ucb1x00.h>
+#include <linux/gpio.h>
#include <mach/dma.h>
#include <mach/hardware.h>
-#include "ucb1x00.h"
-
static DEFINE_MUTEX(ucb1x00_mutex);
static LIST_HEAD(ucb1x00_drivers);
static LIST_HEAD(ucb1x00_devices);
@@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb)
return ucb1x00_reg_read(ucb, UCB_IO_DATA);
}
+static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+ struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ucb->io_lock, flags);
+ if (value)
+ ucb->io_out |= 1 << offset;
+ else
+ ucb->io_out &= ~(1 << offset);
+
+ ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+ spin_unlock_irqrestore(&ucb->io_lock, flags);
+}
+
+static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+ struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset);
+}
+
+static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+ struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ucb->io_lock, flags);
+ ucb->io_dir &= ~(1 << offset);
+ ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+ spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+ return 0;
+}
+
+static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset
+ , int value)
+{
+ struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ucb->io_lock, flags);
+ ucb->io_dir |= (1 << offset);
+ ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
+
+ if (value)
+ ucb->io_out |= 1 << offset;
+ else
+ ucb->io_out &= ~(1 << offset);
+ ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out);
+ spin_unlock_irqrestore(&ucb->io_lock, flags);
+
+ return 0;
+}
+
/*
* UCB1300 data sheet says we must:
* 1. enable ADC => 5us (including reference startup time)
@@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp)
struct ucb1x00_driver *drv;
unsigned int id;
int ret = -ENODEV;
+ int temp;
mcp_enable(mcp);
id = mcp_reg_read(mcp, UCB_ID);
@@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp)
goto err_free;
}
+ ucb->gpio.base = -1;
+ if (mcp->gpio_base != 0) {
+ ucb->gpio.label = dev_name(&ucb->dev);
+ ucb->gpio.base = mcp->gpio_base;
+ ucb->gpio.ngpio = 10;
+ ucb->gpio.set = ucb1x00_gpio_set;
+ ucb->gpio.get = ucb1x00_gpio_get;
+ ucb->gpio.direction_input = ucb1x00_gpio_direction_input;
+ ucb->gpio.direction_output = ucb1x00_gpio_direction_output;
+ ret = gpiochip_add(&ucb->gpio);
+ if (ret)
+ goto err_free;
+ } else
+ dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
+
ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
"UCB1x00", ucb);
if (ret) {
printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
ucb->irq, ret);
- goto err_free;
+ goto err_gpio;
}
mcp_set_drvdata(mcp, ucb);
@@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp)
if (ret)
goto err_irq;
+
INIT_LIST_HEAD(&ucb->devs);
mutex_lock(&ucb1x00_mutex);
list_add(&ucb->node, &ucb1x00_devices);
@@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp)
ucb1x00_add_dev(ucb, drv);
}
mutex_unlock(&ucb1x00_mutex);
+
goto out;
err_irq:
free_irq(ucb->irq, ucb);
+ err_gpio:
+ if (ucb->gpio.base != -1)
+ temp = gpiochip_remove(&ucb->gpio);
err_free:
kfree(ucb);
err_disable:
@@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp)
{
struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
struct list_head *l, *n;
+ int ret;
mutex_lock(&ucb1x00_mutex);
list_del(&ucb->node);
@@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp)
}
mutex_unlock(&ucb1x00_mutex);
+ if (ucb->gpio.base != -1) {
+ ret = gpiochip_remove(&ucb->gpio);
+ if (ret)
+ dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret);
+ }
+
free_irq(ucb->irq, ucb);
device_unregister(&ucb->dev);
}
@@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp)
struct ucb1x00 *ucb = mcp_get_drvdata(mcp);
struct ucb1x00_dev *dev;
+ ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir);
mutex_lock(&ucb1x00_mutex);
list_for_each_entry(dev, &ucb->devs, dev_node) {
if (dev->drv->resume)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 61b7d3eb9a2f..000cb414a78a 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -30,12 +30,12 @@
#include <linux/freezer.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/mfd/ucb1x00.h>
#include <mach/dma.h>
#include <mach/collie.h>
#include <asm/mach-types.h>
-#include "ucb1x00.h"
struct ucb1x00_ts {
diff --git a/drivers/mfd/ucb1x00.h b/drivers/mfd/ucb1x00.h
deleted file mode 100644
index a8ad8a0ed5db..000000000000
--- a/drivers/mfd/ucb1x00.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * linux/drivers/mfd/ucb1x00.h
- *
- * Copyright (C) 2001 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- */
-#ifndef UCB1200_H
-#define UCB1200_H
-
-#define UCB_IO_DATA 0x00
-#define UCB_IO_DIR 0x01
-
-#define UCB_IO_0 (1 << 0)
-#define UCB_IO_1 (1 << 1)
-#define UCB_IO_2 (1 << 2)
-#define UCB_IO_3 (1 << 3)
-#define UCB_IO_4 (1 << 4)
-#define UCB_IO_5 (1 << 5)
-#define UCB_IO_6 (1 << 6)
-#define UCB_IO_7 (1 << 7)
-#define UCB_IO_8 (1 << 8)
-#define UCB_IO_9 (1 << 9)
-
-#define UCB_IE_RIS 0x02
-#define UCB_IE_FAL 0x03
-#define UCB_IE_STATUS 0x04
-#define UCB_IE_CLEAR 0x04
-#define UCB_IE_ADC (1 << 11)
-#define UCB_IE_TSPX (1 << 12)
-#define UCB_IE_TSMX (1 << 13)
-#define UCB_IE_TCLIP (1 << 14)
-#define UCB_IE_ACLIP (1 << 15)
-
-#define UCB_IRQ_TSPX 12
-
-#define UCB_TC_A 0x05
-#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */
-#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */
-
-#define UCB_TC_B 0x06
-#define UCB_TC_B_VOICE_ENA (1 << 3)
-#define UCB_TC_B_CLIP (1 << 4)
-#define UCB_TC_B_ATT (1 << 6)
-#define UCB_TC_B_SIDE_ENA (1 << 11)
-#define UCB_TC_B_MUTE (1 << 13)
-#define UCB_TC_B_IN_ENA (1 << 14)
-#define UCB_TC_B_OUT_ENA (1 << 15)
-
-#define UCB_AC_A 0x07
-#define UCB_AC_B 0x08
-#define UCB_AC_B_LOOP (1 << 8)
-#define UCB_AC_B_MUTE (1 << 13)
-#define UCB_AC_B_IN_ENA (1 << 14)
-#define UCB_AC_B_OUT_ENA (1 << 15)
-
-#define UCB_TS_CR 0x09
-#define UCB_TS_CR_TSMX_POW (1 << 0)
-#define UCB_TS_CR_TSPX_POW (1 << 1)
-#define UCB_TS_CR_TSMY_POW (1 << 2)
-#define UCB_TS_CR_TSPY_POW (1 << 3)
-#define UCB_TS_CR_TSMX_GND (1 << 4)
-#define UCB_TS_CR_TSPX_GND (1 << 5)
-#define UCB_TS_CR_TSMY_GND (1 << 6)
-#define UCB_TS_CR_TSPY_GND (1 << 7)
-#define UCB_TS_CR_MODE_INT (0 << 8)
-#define UCB_TS_CR_MODE_PRES (1 << 8)
-#define UCB_TS_CR_MODE_POS (2 << 8)
-#define UCB_TS_CR_BIAS_ENA (1 << 11)
-#define UCB_TS_CR_TSPX_LOW (1 << 12)
-#define UCB_TS_CR_TSMX_LOW (1 << 13)
-
-#define UCB_ADC_CR 0x0a
-#define UCB_ADC_SYNC_ENA (1 << 0)
-#define UCB_ADC_VREFBYP_CON (1 << 1)
-#define UCB_ADC_INP_TSPX (0 << 2)
-#define UCB_ADC_INP_TSMX (1 << 2)
-#define UCB_ADC_INP_TSPY (2 << 2)
-#define UCB_ADC_INP_TSMY (3 << 2)
-#define UCB_ADC_INP_AD0 (4 << 2)
-#define UCB_ADC_INP_AD1 (5 << 2)
-#define UCB_ADC_INP_AD2 (6 << 2)
-#define UCB_ADC_INP_AD3 (7 << 2)
-#define UCB_ADC_EXT_REF (1 << 5)
-#define UCB_ADC_START (1 << 7)
-#define UCB_ADC_ENA (1 << 15)
-
-#define UCB_ADC_DATA 0x0b
-#define UCB_ADC_DAT_VAL (1 << 15)
-#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5)
-
-#define UCB_ID 0x0c
-#define UCB_ID_1200 0x1004
-#define UCB_ID_1300 0x1005
-#define UCB_ID_TC35143 0x9712
-
-#define UCB_MODE 0x0d
-#define UCB_MODE_DYN_VFLAG_ENA (1 << 12)
-#define UCB_MODE_AUD_OFF_CAN (1 << 13)
-
-#include "mcp.h"
-
-struct ucb1x00_irq {
- void *devid;
- void (*fn)(int, void *);
-};
-
-struct ucb1x00 {
- spinlock_t lock;
- struct mcp *mcp;
- unsigned int irq;
- struct semaphore adc_sem;
- spinlock_t io_lock;
- u16 id;
- u16 io_dir;
- u16 io_out;
- u16 adc_cr;
- u16 irq_fal_enbl;
- u16 irq_ris_enbl;
- struct ucb1x00_irq irq_handler[16];
- struct device dev;
- struct list_head node;
- struct list_head devs;
-};
-
-struct ucb1x00_driver;
-
-struct ucb1x00_dev {
- struct list_head dev_node;
- struct list_head drv_node;
- struct ucb1x00 *ucb;
- struct ucb1x00_driver *drv;
- void *priv;
-};
-
-struct ucb1x00_driver {
- struct list_head node;
- struct list_head devs;
- int (*add)(struct ucb1x00_dev *dev);
- void (*remove)(struct ucb1x00_dev *dev);
- int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state);
- int (*resume)(struct ucb1x00_dev *dev);
-};
-
-#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev)
-
-int ucb1x00_register_driver(struct ucb1x00_driver *);
-void ucb1x00_unregister_driver(struct ucb1x00_driver *);
-
-/**
- * ucb1x00_clkrate - return the UCB1x00 SIB clock rate
- * @ucb: UCB1x00 structure describing chip
- *
- * Return the SIB clock rate in Hz.
- */
-static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb)
-{
- return mcp_get_sclk_rate(ucb->mcp);
-}
-
-/**
- * ucb1x00_enable - enable the UCB1x00 SIB clock
- * @ucb: UCB1x00 structure describing chip
- *
- * Enable the SIB clock. This can be called multiple times.
- */
-static inline void ucb1x00_enable(struct ucb1x00 *ucb)
-{
- mcp_enable(ucb->mcp);
-}
-
-/**
- * ucb1x00_disable - disable the UCB1x00 SIB clock
- * @ucb: UCB1x00 structure describing chip
- *
- * Disable the SIB clock. The SIB clock will only be disabled
- * when the number of ucb1x00_enable calls match the number of
- * ucb1x00_disable calls.
- */
-static inline void ucb1x00_disable(struct ucb1x00 *ucb)
-{
- mcp_disable(ucb->mcp);
-}
-
-/**
- * ucb1x00_reg_write - write a UCB1x00 register
- * @ucb: UCB1x00 structure describing chip
- * @reg: UCB1x00 4-bit register index to write
- * @val: UCB1x00 16-bit value to write
- *
- * Write the UCB1x00 register @reg with value @val. The SIB
- * clock must be running for this function to return.
- */
-static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val)
-{
- mcp_reg_write(ucb->mcp, reg, val);
-}
-
-/**
- * ucb1x00_reg_read - read a UCB1x00 register
- * @ucb: UCB1x00 structure describing chip
- * @reg: UCB1x00 4-bit register index to write
- *
- * Read the UCB1x00 register @reg and return its value. The SIB
- * clock must be running for this function to return.
- */
-static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg)
-{
- return mcp_reg_read(ucb->mcp, reg);
-}
-/**
- * ucb1x00_set_audio_divisor -
- * @ucb: UCB1x00 structure describing chip
- * @div: SIB clock divisor
- */
-static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
- mcp_set_audio_divisor(ucb->mcp, div);
-}
-
-/**
- * ucb1x00_set_telecom_divisor -
- * @ucb: UCB1x00 structure describing chip
- * @div: SIB clock divisor
- */
-static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div)
-{
- mcp_set_telecom_divisor(ucb->mcp, div);
-}
-
-void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int);
-void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int);
-unsigned int ucb1x00_io_read(struct ucb1x00 *ucb);
-
-#define UCB_NOSYNC (0)
-#define UCB_SYNC (1)
-
-unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync);
-void ucb1x00_adc_enable(struct ucb1x00 *ucb);
-void ucb1x00_adc_disable(struct ucb1x00 *ucb);
-
-/*
- * Which edges of the IRQ do you want to control today?
- */
-#define UCB_RISING (1 << 0)
-#define UCB_FALLING (1 << 1)
-
-int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid);
-void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges);
-int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid);
-
-#endif
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index a2ea383105a6..2c16ca6501d5 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -246,6 +246,16 @@ config EP93XX_PWM
To compile this driver as a module, choose M here: the module will
be called ep93xx_pwm.
+config DS1682
+ tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
+ depends on I2C && EXPERIMENTAL
+ help
+ If you say yes here you get support for Dallas Semiconductor
+ DS1682 Total Elapsed Time Recorder.
+
+ This driver can also be built as a module. If so, the module
+ will be called ds1682.
+
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index e311267a355f..906a0edcea40 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SGI_GRU) += sgi-gru/
obj-$(CONFIG_HP_ILO) += hpilo.o
obj-$(CONFIG_ISL29003) += isl29003.o
obj-$(CONFIG_EP93XX_PWM) += ep93xx_pwm.o
+obj-$(CONFIG_DS1682) += ds1682.o
obj-$(CONFIG_C2PORT) += c2port/
obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/
obj-y += eeprom/
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/misc/ds1682.c
index f3ee4a1abb77..f3ee4a1abb77 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/misc/ds1682.c
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 6e43ab4231ae..4bb7a3af9ad9 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -417,32 +417,25 @@ static int ics932s401_detect(struct i2c_client *client, int kind,
struct i2c_board_info *info)
{
struct i2c_adapter *adapter = client->adapter;
+ int vendor, device, revision;
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
return -ENODEV;
- if (kind <= 0) {
- int vendor, device, revision;
-
- vendor = i2c_smbus_read_word_data(client,
- ICS932S401_REG_VENDOR_REV);
- vendor >>= 8;
- revision = vendor >> ICS932S401_REV_SHIFT;
- vendor &= ICS932S401_VENDOR_MASK;
- if (vendor != ICS932S401_VENDOR)
- return -ENODEV;
-
- device = i2c_smbus_read_word_data(client,
- ICS932S401_REG_DEVICE);
- device >>= 8;
- if (device != ICS932S401_DEVICE)
- return -ENODEV;
-
- if (revision != ICS932S401_REV)
- dev_info(&adapter->dev, "Unknown revision %d\n",
- revision);
- } else
- dev_dbg(&adapter->dev, "detection forced\n");
+ vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV);
+ vendor >>= 8;
+ revision = vendor >> ICS932S401_REV_SHIFT;
+ vendor &= ICS932S401_VENDOR_MASK;
+ if (vendor != ICS932S401_VENDOR)
+ return -ENODEV;
+
+ device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE);
+ device >>= 8;
+ if (device != ICS932S401_DEVICE)
+ return -ENODEV;
+
+ if (revision != ICS932S401_REV)
+ dev_info(&adapter->dev, "Unknown revision %d\n", revision);
strlcpy(info->type, "ics932s401", I2C_NAME_SIZE);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index fd3688a3e23f..832ed4c88cf7 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -89,48 +89,40 @@ static int xpc_disengage_max_timelimit = 120;
static ctl_table xpc_sys_xpc_hb_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hb_interval",
.data = &xpc_hb_interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xpc_hb_min_interval,
.extra2 = &xpc_hb_max_interval},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hb_check_interval",
.data = &xpc_hb_check_interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xpc_hb_check_min_interval,
.extra2 = &xpc_hb_check_max_interval},
{}
};
static ctl_table xpc_sys_xpc_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "hb",
.mode = 0555,
.child = xpc_sys_xpc_hb_dir},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "disengage_timelimit",
.data = &xpc_disengage_timelimit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &xpc_disengage_min_timelimit,
.extra2 = &xpc_disengage_max_timelimit},
{}
};
static ctl_table xpc_sys_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "xpc",
.mode = 0555,
.child = xpc_sys_xpc_dir},
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c76677afda1b..b5bbe59f9c57 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
#if defined CONFIG_X86_64
- mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+ mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+ UV_AFFINITY_CPU);
if (mq->irq < 0) {
dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
-mq->irq);
@@ -136,7 +137,7 @@ static void
xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
{
#if defined CONFIG_X86_64
- uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+ uv_teardown_irq(mq->irq);
#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
int mmr_pnode;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 705a5894a6bb..90d168ad03b6 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -56,7 +56,7 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
clk = 255;
host->cclk = host->mclk / (2 * (clk + 1));
}
- if (host->hw_designer == 0x80)
+ if (host->hw_designer == AMBA_VENDOR_ST)
clk |= MCI_FCEN; /* Bug fix in ST IP block */
clk |= MCI_CLK_ENABLE;
/* This hasn't proven to be worthwhile */
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index b8fd7af1ceeb..5f970e253e50 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -30,12 +30,12 @@
#include <asm/io.h>
#include <asm/irq.h>
-#include <mach/board.h>
-#include <mach/mmc.h>
+#include <plat/board.h>
+#include <plat/mmc.h>
#include <mach/gpio.h>
-#include <mach/dma.h>
-#include <mach/mux.h>
-#include <mach/fpga.h>
+#include <plat/dma.h>
+#include <plat/mux.h>
+#include <plat/fpga.h>
#define OMAP_MMC_REG_CMD 0x00
#define OMAP_MMC_REG_ARGL 0x04
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 0aecaaebef3d..4b2322518909 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -30,11 +30,11 @@
#include <linux/mmc/core.h>
#include <linux/io.h>
#include <linux/semaphore.h>
-#include <mach/dma.h>
+#include <plat/dma.h>
#include <mach/hardware.h>
-#include <mach/board.h>
-#include <mach/mmc.h>
-#include <mach/cpu.h>
+#include <plat/board.h>
+#include <plat/mmc.h>
+#include <plat/cpu.h>
/* OMAP HSMMC Host Controller Registers */
#define OMAP_HSMMC_SYSCONFIG 0x0010
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 9fb480bb0e0a..bb47ff465c04 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -43,6 +43,9 @@
#define NR_SG 1
#define CLKRT_OFF (~0)
+#define mmc_has_26MHz() (cpu_is_pxa300() || cpu_is_pxa310() \
+ || cpu_is_pxa935())
+
struct pxamci_host {
struct mmc_host *mmc;
spinlock_t lock;
@@ -457,7 +460,7 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
clk_enable(host->clk);
if (ios->clock == 26000000) {
- /* to support 26MHz on pxa300/pxa310 */
+ /* to support 26MHz */
host->clkrt = 7;
} else {
/* to handle (19.5MHz, 26MHz) */
@@ -608,8 +611,7 @@ static int pxamci_probe(struct platform_device *pdev)
* Calculate minimum clock rate, rounding up.
*/
mmc->f_min = (host->clkrate + 63) / 64;
- mmc->f_max = (cpu_is_pxa300() || cpu_is_pxa310()) ? 26000000
- : host->clkrate;
+ mmc->f_max = (mmc_has_26MHz()) ? 26000000 : host->clkrate;
pxamci_init_ocr(host);
@@ -618,7 +620,7 @@ static int pxamci_probe(struct platform_device *pdev)
if (!cpu_is_pxa25x()) {
mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
host->cmdat |= CMDAT_SDIO_INT_EN;
- if (cpu_is_pxa300() || cpu_is_pxa310())
+ if (mmc_has_26MHz())
mmc->caps |= MMC_CAP_MMC_HIGHSPEED |
MMC_CAP_SD_HIGHSPEED;
}
diff --git a/drivers/mtd/maps/omap_nor.c b/drivers/mtd/maps/omap_nor.c
index a24478102b11..ead0b2fab670 100644
--- a/drivers/mtd/maps/omap_nor.c
+++ b/drivers/mtd/maps/omap_nor.c
@@ -45,7 +45,7 @@
#include <asm/io.h>
#include <mach/hardware.h>
#include <asm/mach/flash.h>
-#include <mach/tc.h>
+#include <plat/tc.h>
#ifdef CONFIG_MTD_PARTITIONS
static const char *part_probes[] = { /* "RedBoot", */ "cmdlinepart", NULL };
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 8ca17a3e96ea..64e2b379a350 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -59,12 +59,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
return -EIO;
+ rq_flush_dcache_pages(req);
return 0;
case WRITE:
if (!tr->writesect)
return -EIO;
+ rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
return -EIO;
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 2fda0b615246..8f8e87b7ed64 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -358,7 +358,7 @@ endchoice
config MTD_NAND_PXA3xx
tristate "Support for NAND flash devices on PXA3xx"
- depends on MTD_NAND && PXA3xx
+ depends on MTD_NAND && (PXA3xx || ARCH_MMP)
help
This enables the driver for the NAND flash device found on
PXA3xx processors
diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index 005b91f096f2..2548e1065bf8 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c
@@ -25,7 +25,7 @@
#include <mach/hardware.h>
#include <asm/sizes.h>
#include <mach/gpio.h>
-#include <mach/board-ams-delta.h>
+#include <plat/board-ams-delta.h>
/*
* MTD structure for E3 (Delta)
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 090ab87086b5..1bb799f0125c 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -18,9 +18,9 @@
#include <linux/mtd/partitions.h>
#include <linux/io.h>
-#include <mach/dma.h>
-#include <mach/gpmc.h>
-#include <mach/nand.h>
+#include <plat/dma.h>
+#include <plat/gpmc.h>
+#include <plat/nand.h>
#define GPMC_IRQ_STATUS 0x18
#define GPMC_ECC_CONFIG 0x1F4
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 6ea520ae2410..1a5a0365c983 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -9,6 +9,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
@@ -22,7 +23,7 @@
#include <linux/irq.h>
#include <mach/dma.h>
-#include <mach/pxa3xx_nand.h>
+#include <plat/pxa3xx_nand.h>
#define CHIP_DELAY_TIMEOUT (2 * HZ/10)
@@ -84,10 +85,6 @@
#define NDCB0_CMD1_MASK (0xff)
#define NDCB0_ADDR_CYC_SHIFT (16)
-/* dma-able I/O address for the NAND data and commands */
-#define NDCB0_DMA_ADDR (0x43100048)
-#define NDDB_DMA_ADDR (0x43100040)
-
/* macros for registers read/write */
#define nand_writel(info, off, val) \
__raw_writel((val), (info)->mmio_base + (off))
@@ -123,6 +120,7 @@ struct pxa3xx_nand_info {
struct clk *clk;
void __iomem *mmio_base;
+ unsigned long mmio_phys;
unsigned int buf_start;
unsigned int buf_count;
@@ -228,13 +226,35 @@ static struct pxa3xx_nand_flash samsung512MbX16 = {
.chip_id = 0x46ec,
};
+static struct pxa3xx_nand_flash samsung2GbX8 = {
+ .timing = &samsung512MbX16_timing,
+ .cmdset = &smallpage_cmdset,
+ .page_per_block = 64,
+ .page_size = 2048,
+ .flash_width = 8,
+ .dfc_width = 8,
+ .num_blocks = 2048,
+ .chip_id = 0xdaec,
+};
+
+static struct pxa3xx_nand_flash samsung32GbX8 = {
+ .timing = &samsung512MbX16_timing,
+ .cmdset = &smallpage_cmdset,
+ .page_per_block = 128,
+ .page_size = 4096,
+ .flash_width = 8,
+ .dfc_width = 8,
+ .num_blocks = 8192,
+ .chip_id = 0xd7ec,
+};
+
static struct pxa3xx_nand_timing micron_timing = {
.tCH = 10,
.tCS = 25,
.tWH = 15,
.tWP = 25,
.tRH = 15,
- .tRP = 25,
+ .tRP = 30,
.tR = 25000,
.tWHR = 60,
.tAR = 10,
@@ -262,6 +282,28 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
.chip_id = 0xb12c,
};
+static struct pxa3xx_nand_flash micron4GbX8 = {
+ .timing = &micron_timing,
+ .cmdset = &largepage_cmdset,
+ .page_per_block = 64,
+ .page_size = 2048,
+ .flash_width = 8,
+ .dfc_width = 8,
+ .num_blocks = 4096,
+ .chip_id = 0xdc2c,
+};
+
+static struct pxa3xx_nand_flash micron4GbX16 = {
+ .timing = &micron_timing,
+ .cmdset = &largepage_cmdset,
+ .page_per_block = 64,
+ .page_size = 2048,
+ .flash_width = 16,
+ .dfc_width = 16,
+ .num_blocks = 4096,
+ .chip_id = 0xcc2c,
+};
+
static struct pxa3xx_nand_timing stm2GbX16_timing = {
.tCH = 10,
.tCS = 35,
@@ -287,8 +329,12 @@ static struct pxa3xx_nand_flash stm2GbX16 = {
static struct pxa3xx_nand_flash *builtin_flash_types[] = {
&samsung512MbX16,
+ &samsung2GbX8,
+ &samsung32GbX8,
&micron1GbX8,
&micron1GbX16,
+ &micron4GbX8,
+ &micron4GbX16,
&stm2GbX16,
};
#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
@@ -489,7 +535,7 @@ static int handle_data_pio(struct pxa3xx_nand_info *info)
switch (info->state) {
case STATE_PIO_WRITING:
__raw_writesl(info->mmio_base + NDDB, info->data_buff,
- info->data_size << 2);
+ DIV_ROUND_UP(info->data_size, 4));
enable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
@@ -501,7 +547,7 @@ static int handle_data_pio(struct pxa3xx_nand_info *info)
break;
case STATE_PIO_READING:
__raw_readsl(info->mmio_base + NDDB, info->data_buff,
- info->data_size << 2);
+ DIV_ROUND_UP(info->data_size, 4));
break;
default:
printk(KERN_ERR "%s: invalid state %d\n", __func__,
@@ -523,11 +569,11 @@ static void start_data_dma(struct pxa3xx_nand_info *info, int dir_out)
if (dir_out) {
desc->dsadr = info->data_buff_phys;
- desc->dtadr = NDDB_DMA_ADDR;
+ desc->dtadr = info->mmio_phys + NDDB;
desc->dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG;
} else {
desc->dtadr = info->data_buff_phys;
- desc->dsadr = NDDB_DMA_ADDR;
+ desc->dsadr = info->mmio_phys + NDDB;
desc->dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC;
}
@@ -669,6 +715,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
/* disable HW ECC to get all the OOB data */
info->buf_count = mtd->writesize + mtd->oobsize;
info->buf_start = mtd->writesize + column;
+ memset(info->data_buff, 0xFF, info->buf_count);
if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr))
break;
@@ -1239,13 +1286,17 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
ret = -ENODEV;
goto fail_free_res;
}
+ info->mmio_phys = r->start;
ret = pxa3xx_nand_init_buff(info);
if (ret)
goto fail_free_io;
- ret = request_irq(IRQ_NAND, pxa3xx_nand_irq, IRQF_DISABLED,
- pdev->name, info);
+ /* initialize all interrupts to be disabled */
+ disable_int(info, NDSR_MASK);
+
+ ret = request_irq(irq, pxa3xx_nand_irq, IRQF_DISABLED,
+ pdev->name, info);
if (ret < 0) {
dev_err(&pdev->dev, "failed to request IRQ\n");
goto fail_free_buf;
@@ -1271,7 +1322,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
return add_mtd_partitions(mtd, pdata->parts, pdata->nr_parts);
fail_free_irq:
- free_irq(IRQ_NAND, info);
+ free_irq(irq, info);
fail_free_buf:
if (use_dma) {
pxa_free_dma(info->data_dma_ch);
@@ -1296,12 +1347,15 @@ static int pxa3xx_nand_remove(struct platform_device *pdev)
struct mtd_info *mtd = platform_get_drvdata(pdev);
struct pxa3xx_nand_info *info = mtd->priv;
struct resource *r;
+ int irq;
platform_set_drvdata(pdev, NULL);
del_mtd_device(mtd);
del_mtd_partitions(mtd);
- free_irq(IRQ_NAND, info);
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0)
+ free_irq(irq, info);
if (use_dma) {
pxa_free_dma(info->data_dma_ch);
dma_free_writecombine(&pdev->dev, info->data_buff_size,
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 0108ed42e877..86c4f6dcdc65 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -36,13 +36,13 @@
#include <linux/io.h>
#include <asm/mach/flash.h>
-#include <mach/gpmc.h>
-#include <mach/onenand.h>
+#include <plat/gpmc.h>
+#include <plat/onenand.h>
#include <mach/gpio.h>
-#include <mach/dma.h>
+#include <plat/dma.h>
-#include <mach/board.h>
+#include <plat/board.h>
#define DRIVER_NAME "omap2-onenand"
diff --git a/drivers/of/base.c b/drivers/of/base.c
index ddf224d456b2..e6627b2320f1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -9,7 +9,8 @@
*
* Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net
*
- * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell.
+ * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and
+ * Grant Likely.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -82,6 +83,29 @@ struct property *of_find_property(const struct device_node *np,
}
EXPORT_SYMBOL(of_find_property);
+/**
+ * of_find_all_nodes - Get next node in global list
+ * @prev: Previous node or NULL to start iteration
+ * of_node_put() will be called on it
+ *
+ * Returns a node pointer with refcount incremented, use
+ * of_node_put() on it when done.
+ */
+struct device_node *of_find_all_nodes(struct device_node *prev)
+{
+ struct device_node *np;
+
+ read_lock(&devtree_lock);
+ np = prev ? prev->allnext : allnodes;
+ for (; np != NULL; np = np->allnext)
+ if (of_node_get(np))
+ break;
+ of_node_put(prev);
+ read_unlock(&devtree_lock);
+ return np;
+}
+EXPORT_SYMBOL(of_find_all_nodes);
+
/*
* Find a property with a given name for a given node
* and return the value.
diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c
index 6dec9ba5ed28..362db31d8ca6 100644
--- a/drivers/parport/parport_mfc3.c
+++ b/drivers/parport/parport_mfc3.c
@@ -386,7 +386,7 @@ static void __exit parport_mfc3_exit(void)
if (!this_port[i])
continue;
parport_remove_port(this_port[i]);
- if (!this_port[i]->irq != PARPORT_IRQ_NONE) {
+ if (this_port[i]->irq != PARPORT_IRQ_NONE) {
if (--use_cnt == 0)
free_irq(IRQ_AMIGA_PORTS, &pp_mfc3_ops);
}
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 8eefe56f1cbe..3f56bc086cb5 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -233,10 +233,10 @@ static int do_hardware_modes (ctl_table *table, int write,
return copy_to_user(result, buffer, len) ? -EFAULT : 0;
}
-#define PARPORT_PORT_DIR(CHILD) { .ctl_name = 0, .procname = NULL, .mode = 0555, .child = CHILD }
-#define PARPORT_PARPORT_DIR(CHILD) { .ctl_name = DEV_PARPORT, .procname = "parport", \
+#define PARPORT_PORT_DIR(CHILD) { .procname = NULL, .mode = 0555, .child = CHILD }
+#define PARPORT_PARPORT_DIR(CHILD) { .procname = "parport", \
.mode = 0555, .child = CHILD }
-#define PARPORT_DEV_DIR(CHILD) { .ctl_name = CTL_DEV, .procname = "dev", .mode = 0555, .child = CHILD }
+#define PARPORT_DEV_DIR(CHILD) { .procname = "dev", .mode = 0555, .child = CHILD }
#define PARPORT_DEVICES_ROOT_DIR { .procname = "devices", \
.mode = 0555, .child = NULL }
@@ -270,7 +270,7 @@ static const struct parport_sysctl_table parport_sysctl_template = {
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (void*) &parport_min_spintime_value,
.extra2 = (void*) &parport_max_spintime_value
},
@@ -279,28 +279,28 @@ static const struct parport_sysctl_table parport_sysctl_template = {
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_hardware_base_addr
+ .proc_handler = do_hardware_base_addr
},
{
.procname = "irq",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_hardware_irq
+ .proc_handler = do_hardware_irq
},
{
.procname = "dma",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_hardware_dma
+ .proc_handler = do_hardware_dma
},
{
.procname = "modes",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_hardware_modes
+ .proc_handler = do_hardware_modes
},
PARPORT_DEVICES_ROOT_DIR,
#ifdef CONFIG_PARPORT_1284
@@ -309,35 +309,35 @@ static const struct parport_sysctl_table parport_sysctl_template = {
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_autoprobe
+ .proc_handler = do_autoprobe
},
{
.procname = "autoprobe0",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_autoprobe
+ .proc_handler = do_autoprobe
},
{
.procname = "autoprobe1",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_autoprobe
+ .proc_handler = do_autoprobe
},
{
.procname = "autoprobe2",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_autoprobe
+ .proc_handler = do_autoprobe
},
{
.procname = "autoprobe3",
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_autoprobe
+ .proc_handler = do_autoprobe
},
#endif /* IEEE 1284 support */
{}
@@ -348,7 +348,7 @@ static const struct parport_sysctl_table parport_sysctl_template = {
.data = NULL,
.maxlen = 0,
.mode = 0444,
- .proc_handler = &do_active_device
+ .proc_handler = do_active_device
},
{}
},
@@ -386,14 +386,13 @@ parport_device_sysctl_template = {
.data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
.extra1 = (void*) &parport_min_timeslice_value,
.extra2 = (void*) &parport_max_timeslice_value
},
},
{
{
- .ctl_name = 0,
.procname = NULL,
.data = NULL,
.maxlen = 0,
@@ -438,7 +437,7 @@ parport_default_sysctl_table = {
.data = &parport_default_timeslice,
.maxlen = sizeof(parport_default_timeslice),
.mode = 0644,
- .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
.extra1 = (void*) &parport_min_timeslice_value,
.extra2 = (void*) &parport_max_timeslice_value
},
@@ -447,7 +446,7 @@ parport_default_sysctl_table = {
.data = &parport_default_spintime,
.maxlen = sizeof(parport_default_spintime),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = (void*) &parport_min_spintime_value,
.extra2 = (void*) &parport_max_spintime_value
},
@@ -455,7 +454,6 @@ parport_default_sysctl_table = {
},
{
{
- .ctl_name = DEV_PARPORT_DEFAULT,
.procname = "default",
.mode = 0555,
.child = parport_default_sysctl_table.vars
@@ -495,7 +493,6 @@ int parport_proc_register(struct parport *port)
t->vars[6 + i].extra2 = &port->probe_info[i];
t->port_dir[0].procname = port->name;
- t->port_dir[0].ctl_name = 0;
t->port_dir[0].child = t->vars;
t->parport_dir[0].child = t->port_dir;
@@ -534,11 +531,9 @@ int parport_device_proc_register(struct pardevice *device)
t->dev_dir[0].child = t->parport_dir;
t->parport_dir[0].child = t->port_dir;
t->port_dir[0].procname = port->name;
- t->port_dir[0].ctl_name = 0;
t->port_dir[0].child = t->devices_root_dir;
t->devices_root_dir[0].child = t->device_dir;
- t->device_dir[0].ctl_name = 0;
t->device_dir[0].procname = device->name;
t->device_dir[0].child = t->vars;
t->vars[0].data = &device->timeslice;
diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
index f3ccbccf5f21..cd5082d3ca19 100644
--- a/drivers/pcmcia/Kconfig
+++ b/drivers/pcmcia/Kconfig
@@ -179,7 +179,7 @@ config PCMCIA_BCM63XX
depends on BCM63XX && PCMCIA
config PCMCIA_SOC_COMMON
- bool
+ tristate
config PCMCIA_SA1100
tristate "SA1100 support"
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index 68570bc3ac86..663781d20129 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -23,8 +23,8 @@
#include <asm/io.h>
#include <asm/sizes.h>
-#include <mach/mux.h>
-#include <mach/tc.h>
+#include <plat/mux.h>
+#include <plat/tc.h>
/* NOTE: don't expect this to support many I/O cards. The 16xx chips have
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index 11cc3ba1260a..8db86b90c200 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -51,7 +51,7 @@ static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = {
#ifdef CONFIG_SA1100_CERF
pcmcia_cerf_init,
#endif
-#ifdef CONFIG_SA1100_H3600
+#if defined(CONFIG_SA1100_H3100) || defined(CONFIG_SA1100_H3600)
pcmcia_h3600_init,
#endif
#ifdef CONFIG_SA1100_SHANNON
diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c
index 3a121ac697d6..56329ad575a9 100644
--- a/drivers/pcmcia/sa1100_h3600.c
+++ b/drivers/pcmcia/sa1100_h3600.c
@@ -10,47 +10,139 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/gpio.h>
#include <mach/hardware.h>
#include <asm/irq.h>
#include <asm/mach-types.h>
-#include <mach/h3600.h>
+#include <mach/h3xxx.h>
#include "sa1100_generic.h"
static struct pcmcia_irqs irqs[] = {
- { 0, IRQ_GPIO_H3600_PCMCIA_CD0, "PCMCIA CD0" },
- { 1, IRQ_GPIO_H3600_PCMCIA_CD1, "PCMCIA CD1" }
+ { .sock = 0, .str = "PCMCIA CD0" }, /* .irq will be filled later */
+ { .sock = 1, .str = "PCMCIA CD1" }
};
static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
{
- skt->socket.pci_irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1
- : IRQ_GPIO_H3600_PCMCIA_IRQ0;
+ int err;
+ switch (skt->nr) {
+ case 0:
+ err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ0, "PCMCIA IRQ0");
+ if (err)
+ goto err00;
+ err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ0);
+ if (err)
+ goto err01;
+ skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ0);
+
+ err = gpio_request(H3XXX_GPIO_PCMCIA_CD0, "PCMCIA CD0");
+ if (err)
+ goto err01;
+ err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD0);
+ if (err)
+ goto err02;
+ irqs[0].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD0);
+
+ err = gpio_request(H3XXX_EGPIO_OPT_NVRAM_ON, "OPT NVRAM ON");
+ if (err)
+ goto err02;
+ err = gpio_direction_output(H3XXX_EGPIO_OPT_NVRAM_ON, 0);
+ if (err)
+ goto err03;
+ err = gpio_request(H3XXX_EGPIO_OPT_ON, "OPT ON");
+ if (err)
+ goto err03;
+ err = gpio_direction_output(H3XXX_EGPIO_OPT_ON, 0);
+ if (err)
+ goto err04;
+ err = gpio_request(H3XXX_EGPIO_OPT_RESET, "OPT RESET");
+ if (err)
+ goto err04;
+ err = gpio_direction_output(H3XXX_EGPIO_OPT_RESET, 0);
+ if (err)
+ goto err05;
+ err = gpio_request(H3XXX_EGPIO_CARD_RESET, "PCMCIA CARD RESET");
+ if (err)
+ goto err05;
+ err = gpio_direction_output(H3XXX_EGPIO_CARD_RESET, 0);
+ if (err)
+ goto err06;
+ err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
+ if (err)
+ goto err06;
+ break;
+ case 1:
+ err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ1, "PCMCIA IRQ1");
+ if (err)
+ goto err10;
+ err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ1);
+ if (err)
+ goto err11;
+ skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ1);
+
+ err = gpio_request(H3XXX_GPIO_PCMCIA_CD1, "PCMCIA CD1");
+ if (err)
+ goto err11;
+ err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD1);
+ if (err)
+ goto err12;
+ irqs[1].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD1);
+
+ err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
+ if (err)
+ goto err12;
+ break;
+ }
+ return 0;
- return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
+err06: gpio_free(H3XXX_EGPIO_CARD_RESET);
+err05: gpio_free(H3XXX_EGPIO_OPT_RESET);
+err04: gpio_free(H3XXX_EGPIO_OPT_ON);
+err03: gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON);
+err02: gpio_free(H3XXX_GPIO_PCMCIA_CD0);
+err01: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0);
+err00: return err;
+
+err12: gpio_free(H3XXX_GPIO_PCMCIA_CD0);
+err11: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0);
+err10: return err;
}
static void h3600_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
{
soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
- /* Disable CF bus: */
- assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0);
- assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0);
- assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1);
+ switch (skt->nr) {
+ case 0:
+ /* Disable CF bus: */
+ gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0);
+ gpio_set_value(H3XXX_EGPIO_OPT_ON, 0);
+ gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1);
+
+ gpio_free(H3XXX_EGPIO_CARD_RESET);
+ gpio_free(H3XXX_EGPIO_OPT_RESET);
+ gpio_free(H3XXX_EGPIO_OPT_ON);
+ gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON);
+ gpio_free(H3XXX_GPIO_PCMCIA_CD0);
+ gpio_free(H3XXX_GPIO_PCMCIA_IRQ0);
+ break;
+ case 1:
+ gpio_free(H3XXX_GPIO_PCMCIA_CD1);
+ gpio_free(H3XXX_GPIO_PCMCIA_IRQ1);
+ break;
+ }
}
static void
h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state)
{
- unsigned long levels = GPLR;
-
switch (skt->nr) {
case 0:
- state->detect = levels & GPIO_H3600_PCMCIA_CD0 ? 0 : 1;
- state->ready = levels & GPIO_H3600_PCMCIA_IRQ0 ? 1 : 0;
+ state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD0);
+ state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ0);
state->bvd1 = 0;
state->bvd2 = 0;
state->wrprot = 0; /* Not available on H3600. */
@@ -59,8 +151,8 @@ h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *st
break;
case 1:
- state->detect = levels & GPIO_H3600_PCMCIA_CD1 ? 0 : 1;
- state->ready = levels & GPIO_H3600_PCMCIA_IRQ1 ? 1 : 0;
+ state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD1);
+ state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ1);
state->bvd1 = 0;
state->bvd2 = 0;
state->wrprot = 0; /* Not available on H3600. */
@@ -79,7 +171,7 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_
return -1;
}
- assign_h3600_egpio(IPAQ_EGPIO_CARD_RESET, !!(state->flags & SS_RESET));
+ gpio_set_value(H3XXX_EGPIO_CARD_RESET, !!(state->flags & SS_RESET));
/* Silently ignore Vpp, output enable, speaker enable. */
@@ -89,9 +181,9 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_
static void h3600_pcmcia_socket_init(struct soc_pcmcia_socket *skt)
{
/* Enable CF bus: */
- assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 1);
- assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 1);
- assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 0);
+ gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 1);
+ gpio_set_value(H3XXX_EGPIO_OPT_ON, 1);
+ gpio_set_value(H3XXX_EGPIO_OPT_RESET, 0);
msleep(10);
@@ -109,10 +201,10 @@ static void h3600_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
* socket 0 then socket 1.
*/
if (skt->nr == 1) {
- assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0);
- assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0);
+ gpio_set_value(H3XXX_EGPIO_OPT_ON, 0);
+ gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0);
/* hmm, does this suck power? */
- assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1);
+ gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1);
}
}
@@ -131,7 +223,7 @@ int __init pcmcia_h3600_init(struct device *dev)
{
int ret = -ENODEV;
- if (machine_is_h3600())
+ if (machine_is_h3600() || machine_is_h3100())
ret = sa11xx_drv_pcmcia_probe(dev, &h3600_pcmcia_ops, 0, 2);
return ret;
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index cea6cef27e89..118674925516 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -77,6 +77,13 @@ config BATTERY_TOSA
Say Y to enable support for the battery on the Sharp Zaurus
SL-6000 (tosa) models.
+config BATTERY_COLLIE
+ tristate "Sharp SL-5500 (collie) battery"
+ depends on SA1100_COLLIE && MCP_UCB1200
+ help
+ Say Y to enable support for the battery on the Sharp Zaurus
+ SL-5500 (collie) models.
+
config BATTERY_WM97XX
bool "WM97xx generic battery driver"
depends on TOUCHSCREEN_WM97XX=y
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index b96f29d91c28..356cdfd3c8b2 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o
obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o
obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o
obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o
+obj-$(CONFIG_BATTERY_COLLIE) += collie_battery.o
obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o
obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o
obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o
diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c
new file mode 100644
index 000000000000..039f41ae217d
--- /dev/null
+++ b/drivers/power/collie_battery.c
@@ -0,0 +1,418 @@
+/*
+ * Battery and Power Management code for the Sharp SL-5x00
+ *
+ * Copyright (C) 2009 Thomas Kunze
+ *
+ * based on tosa_battery.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/power_supply.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/mfd/ucb1x00.h>
+
+#include <asm/mach/sharpsl_param.h>
+#include <asm/mach-types.h>
+#include <mach/collie.h>
+
+static DEFINE_MUTEX(bat_lock); /* protects gpio pins */
+static struct work_struct bat_work;
+static struct ucb1x00 *ucb;
+
+struct collie_bat {
+ int status;
+ struct power_supply psy;
+ int full_chrg;
+
+ struct mutex work_lock; /* protects data */
+
+ bool (*is_present)(struct collie_bat *bat);
+ int gpio_full;
+ int gpio_charge_on;
+
+ int technology;
+
+ int gpio_bat;
+ int adc_bat;
+ int adc_bat_divider;
+ int bat_max;
+ int bat_min;
+
+ int gpio_temp;
+ int adc_temp;
+ int adc_temp_divider;
+};
+
+static struct collie_bat collie_bat_main;
+
+static unsigned long collie_read_bat(struct collie_bat *bat)
+{
+ unsigned long value = 0;
+
+ if (bat->gpio_bat < 0 || bat->adc_bat < 0)
+ return 0;
+ mutex_lock(&bat_lock);
+ gpio_set_value(bat->gpio_bat, 1);
+ msleep(5);
+ ucb1x00_adc_enable(ucb);
+ value = ucb1x00_adc_read(ucb, bat->adc_bat, UCB_SYNC);
+ ucb1x00_adc_disable(ucb);
+ gpio_set_value(bat->gpio_bat, 0);
+ mutex_unlock(&bat_lock);
+ value = value * 1000000 / bat->adc_bat_divider;
+
+ return value;
+}
+
+static unsigned long collie_read_temp(struct collie_bat *bat)
+{
+ unsigned long value = 0;
+ if (bat->gpio_temp < 0 || bat->adc_temp < 0)
+ return 0;
+
+ mutex_lock(&bat_lock);
+ gpio_set_value(bat->gpio_temp, 1);
+ msleep(5);
+ ucb1x00_adc_enable(ucb);
+ value = ucb1x00_adc_read(ucb, bat->adc_temp, UCB_SYNC);
+ ucb1x00_adc_disable(ucb);
+ gpio_set_value(bat->gpio_temp, 0);
+ mutex_unlock(&bat_lock);
+
+ value = value * 10000 / bat->adc_temp_divider;
+
+ return value;
+}
+
+static int collie_bat_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ int ret = 0;
+ struct collie_bat *bat = container_of(psy, struct collie_bat, psy);
+
+ if (bat->is_present && !bat->is_present(bat)
+ && psp != POWER_SUPPLY_PROP_PRESENT) {
+ return -ENODEV;
+ }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = bat->status;
+ break;
+ case POWER_SUPPLY_PROP_TECHNOLOGY:
+ val->intval = bat->technology;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ val->intval = collie_read_bat(bat);
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MAX:
+ if (bat->full_chrg == -1)
+ val->intval = bat->bat_max;
+ else
+ val->intval = bat->full_chrg;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+ val->intval = bat->bat_max;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+ val->intval = bat->bat_min;
+ break;
+ case POWER_SUPPLY_PROP_TEMP:
+ val->intval = collie_read_temp(bat);
+ break;
+ case POWER_SUPPLY_PROP_PRESENT:
+ val->intval = bat->is_present ? bat->is_present(bat) : 1;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static void collie_bat_external_power_changed(struct power_supply *psy)
+{
+ schedule_work(&bat_work);
+}
+
+static irqreturn_t collie_bat_gpio_isr(int irq, void *data)
+{
+ pr_info("collie_bat_gpio irq: %d\n", gpio_get_value(irq_to_gpio(irq)));
+ schedule_work(&bat_work);
+ return IRQ_HANDLED;
+}
+
+static void collie_bat_update(struct collie_bat *bat)
+{
+ int old;
+ struct power_supply *psy = &bat->psy;
+
+ mutex_lock(&bat->work_lock);
+
+ old = bat->status;
+
+ if (bat->is_present && !bat->is_present(bat)) {
+ printk(KERN_NOTICE "%s not present\n", psy->name);
+ bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
+ bat->full_chrg = -1;
+ } else if (power_supply_am_i_supplied(psy)) {
+ if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) {
+ gpio_set_value(bat->gpio_charge_on, 1);
+ mdelay(15);
+ }
+
+ if (gpio_get_value(bat->gpio_full)) {
+ if (old == POWER_SUPPLY_STATUS_CHARGING ||
+ bat->full_chrg == -1)
+ bat->full_chrg = collie_read_bat(bat);
+
+ gpio_set_value(bat->gpio_charge_on, 0);
+ bat->status = POWER_SUPPLY_STATUS_FULL;
+ } else {
+ gpio_set_value(bat->gpio_charge_on, 1);
+ bat->status = POWER_SUPPLY_STATUS_CHARGING;
+ }
+ } else {
+ gpio_set_value(bat->gpio_charge_on, 0);
+ bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
+ }
+
+ if (old != bat->status)
+ power_supply_changed(psy);
+
+ mutex_unlock(&bat->work_lock);
+}
+
+static void collie_bat_work(struct work_struct *work)
+{
+ collie_bat_update(&collie_bat_main);
+}
+
+
+static enum power_supply_property collie_bat_main_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_PRESENT,
+ POWER_SUPPLY_PROP_TEMP,
+};
+
+static enum power_supply_property collie_bat_bu_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_PRESENT,
+};
+
+static struct collie_bat collie_bat_main = {
+ .status = POWER_SUPPLY_STATUS_DISCHARGING,
+ .full_chrg = -1,
+ .psy = {
+ .name = "main-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = collie_bat_main_props,
+ .num_properties = ARRAY_SIZE(collie_bat_main_props),
+ .get_property = collie_bat_get_property,
+ .external_power_changed = collie_bat_external_power_changed,
+ .use_for_apm = 1,
+ },
+
+ .gpio_full = COLLIE_GPIO_CO,
+ .gpio_charge_on = COLLIE_GPIO_CHARGE_ON,
+
+ .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
+
+ .gpio_bat = COLLIE_GPIO_MBAT_ON,
+ .adc_bat = UCB_ADC_INP_AD1,
+ .adc_bat_divider = 155,
+ .bat_max = 4310000,
+ .bat_min = 1551 * 1000000 / 414,
+
+ .gpio_temp = COLLIE_GPIO_TMP_ON,
+ .adc_temp = UCB_ADC_INP_AD0,
+ .adc_temp_divider = 10000,
+};
+
+static struct collie_bat collie_bat_bu = {
+ .status = POWER_SUPPLY_STATUS_UNKNOWN,
+ .full_chrg = -1,
+
+ .psy = {
+ .name = "backup-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = collie_bat_bu_props,
+ .num_properties = ARRAY_SIZE(collie_bat_bu_props),
+ .get_property = collie_bat_get_property,
+ .external_power_changed = collie_bat_external_power_changed,
+ },
+
+ .gpio_full = -1,
+ .gpio_charge_on = -1,
+
+ .technology = POWER_SUPPLY_TECHNOLOGY_LiMn,
+
+ .gpio_bat = COLLIE_GPIO_BBAT_ON,
+ .adc_bat = UCB_ADC_INP_AD1,
+ .adc_bat_divider = 155,
+ .bat_max = 3000000,
+ .bat_min = 1900000,
+
+ .gpio_temp = -1,
+ .adc_temp = -1,
+ .adc_temp_divider = -1,
+};
+
+static struct {
+ int gpio;
+ char *name;
+ bool output;
+ int value;
+} gpios[] = {
+ { COLLIE_GPIO_CO, "main battery full", 0, 0 },
+ { COLLIE_GPIO_MAIN_BAT_LOW, "main battery low", 0, 0 },
+ { COLLIE_GPIO_CHARGE_ON, "main charge on", 1, 0 },
+ { COLLIE_GPIO_MBAT_ON, "main battery", 1, 0 },
+ { COLLIE_GPIO_TMP_ON, "main battery temp", 1, 0 },
+ { COLLIE_GPIO_BBAT_ON, "backup battery", 1, 0 },
+};
+
+#ifdef CONFIG_PM
+static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state)
+{
+ /* flush all pending status updates */
+ flush_scheduled_work();
+ return 0;
+}
+
+static int collie_bat_resume(struct ucb1x00_dev *dev)
+{
+ /* things may have changed while we were away */
+ schedule_work(&bat_work);
+ return 0;
+}
+#else
+#define collie_bat_suspend NULL
+#define collie_bat_resume NULL
+#endif
+
+static int __devinit collie_bat_probe(struct ucb1x00_dev *dev)
+{
+ int ret;
+ int i;
+
+ if (!machine_is_collie())
+ return -ENODEV;
+
+ ucb = dev->ucb;
+
+ for (i = 0; i < ARRAY_SIZE(gpios); i++) {
+ ret = gpio_request(gpios[i].gpio, gpios[i].name);
+ if (ret) {
+ i--;
+ goto err_gpio;
+ }
+
+ if (gpios[i].output)
+ ret = gpio_direction_output(gpios[i].gpio,
+ gpios[i].value);
+ else
+ ret = gpio_direction_input(gpios[i].gpio);
+
+ if (ret)
+ goto err_gpio;
+ }
+
+ mutex_init(&collie_bat_main.work_lock);
+
+ INIT_WORK(&bat_work, collie_bat_work);
+
+ ret = power_supply_register(&dev->ucb->dev, &collie_bat_main.psy);
+ if (ret)
+ goto err_psy_reg_main;
+ ret = power_supply_register(&dev->ucb->dev, &collie_bat_bu.psy);
+ if (ret)
+ goto err_psy_reg_bu;
+
+ ret = request_irq(gpio_to_irq(COLLIE_GPIO_CO),
+ collie_bat_gpio_isr,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "main full", &collie_bat_main);
+ if (!ret) {
+ schedule_work(&bat_work);
+ return 0;
+ }
+ power_supply_unregister(&collie_bat_bu.psy);
+err_psy_reg_bu:
+ power_supply_unregister(&collie_bat_main.psy);
+err_psy_reg_main:
+
+ /* see comment in collie_bat_remove */
+ flush_scheduled_work();
+
+ i--;
+err_gpio:
+ for (; i >= 0; i--)
+ gpio_free(gpios[i].gpio);
+
+ return ret;
+}
+
+static void __devexit collie_bat_remove(struct ucb1x00_dev *dev)
+{
+ int i;
+
+ free_irq(gpio_to_irq(COLLIE_GPIO_CO), &collie_bat_main);
+
+ power_supply_unregister(&collie_bat_bu.psy);
+ power_supply_unregister(&collie_bat_main.psy);
+
+ /*
+ * now flush all pending work.
+ * we won't get any more schedules, since all
+ * sources (isr and external_power_changed)
+ * are unregistered now.
+ */
+ flush_scheduled_work();
+
+ for (i = ARRAY_SIZE(gpios) - 1; i >= 0; i--)
+ gpio_free(gpios[i].gpio);
+}
+
+static struct ucb1x00_driver collie_bat_driver = {
+ .add = collie_bat_probe,
+ .remove = __devexit_p(collie_bat_remove),
+ .suspend = collie_bat_suspend,
+ .resume = collie_bat_resume,
+};
+
+static int __init collie_bat_init(void)
+{
+ return ucb1x00_register_driver(&collie_bat_driver);
+}
+
+static void __exit collie_bat_exit(void)
+{
+ ucb1x00_unregister_driver(&collie_bat_driver);
+}
+
+module_init(collie_bat_init);
+module_exit(collie_bat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Kunze");
+MODULE_DESCRIPTION("Collie battery driver");
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 3c20dae43ce2..f2e1004d12c7 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -509,6 +509,15 @@ config RTC_DRV_M48T59
This driver can also be built as a module, if so, the module
will be called "rtc-m48t59".
+config RTC_DRV_MSM6242
+ tristate "Oki MSM6242"
+ help
+ If you say yes here you get support for the Oki MSM6242
+ timekeeping chip. It is used in some Amiga models (e.g. A2000).
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-msm6242.
+
config RTC_MXC
tristate "Freescale MXC Real Time Clock"
depends on ARCH_MXC
@@ -529,6 +538,16 @@ config RTC_DRV_BQ4802
This driver can also be built as a module. If so, the module
will be called rtc-bq4802.
+config RTC_DRV_RP5C01
+ tristate "Ricoh RP5C01"
+ help
+ If you say yes here you get support for the Ricoh RP5C01
+ timekeeping chip. It is used in some Amiga models (e.g. A3000
+ and A4000).
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-rp5c01.
+
config RTC_DRV_V3020
tristate "EM Microelectronic V3020"
help
@@ -780,7 +799,7 @@ config RTC_DRV_TX4939
config RTC_DRV_MV
tristate "Marvell SoC RTC"
- depends on ARCH_KIRKWOOD
+ depends on ARCH_KIRKWOOD || ARCH_DOVE
help
If you say yes here you will get support for the in-chip RTC
that can be found in some of Marvell's SoC devices, such as
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index aa3fbd5517a1..af1ba7ae2857 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
obj-$(CONFIG_RTC_MXC) += rtc-mxc.o
obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
+obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o
obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o
obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o
@@ -64,6 +65,7 @@ obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
obj-$(CONFIG_RTC_DRV_PS3) += rtc-ps3.o
obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o
obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
+obj-$(CONFIG_RTC_DRV_RP5C01) += rtc-rp5c01.o
obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o
diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c
new file mode 100644
index 000000000000..5f5968a48925
--- /dev/null
+++ b/drivers/rtc/rtc-msm6242.c
@@ -0,0 +1,269 @@
+/*
+ * Oki MSM6242 RTC Driver
+ *
+ * Copyright 2009 Geert Uytterhoeven
+ *
+ * Based on the A2000 TOD code in arch/m68k/amiga/config.c
+ * Copyright (C) 1993 Hamish Macdonald
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+
+enum {
+ MSM6242_SECOND1 = 0x0, /* 1-second digit register */
+ MSM6242_SECOND10 = 0x1, /* 10-second digit register */
+ MSM6242_MINUTE1 = 0x2, /* 1-minute digit register */
+ MSM6242_MINUTE10 = 0x3, /* 10-minute digit register */
+ MSM6242_HOUR1 = 0x4, /* 1-hour digit register */
+ MSM6242_HOUR10 = 0x5, /* PM/AM, 10-hour digit register */
+ MSM6242_DAY1 = 0x6, /* 1-day digit register */
+ MSM6242_DAY10 = 0x7, /* 10-day digit register */
+ MSM6242_MONTH1 = 0x8, /* 1-month digit register */
+ MSM6242_MONTH10 = 0x9, /* 10-month digit register */
+ MSM6242_YEAR1 = 0xa, /* 1-year digit register */
+ MSM6242_YEAR10 = 0xb, /* 10-year digit register */
+ MSM6242_WEEK = 0xc, /* Week register */
+ MSM6242_CD = 0xd, /* Control Register D */
+ MSM6242_CE = 0xe, /* Control Register E */
+ MSM6242_CF = 0xf, /* Control Register F */
+};
+
+#define MSM6242_HOUR10_AM (0 << 2)
+#define MSM6242_HOUR10_PM (1 << 2)
+#define MSM6242_HOUR10_HR_MASK (3 << 0)
+
+#define MSM6242_WEEK_SUNDAY 0
+#define MSM6242_WEEK_MONDAY 1
+#define MSM6242_WEEK_TUESDAY 2
+#define MSM6242_WEEK_WEDNESDAY 3
+#define MSM6242_WEEK_THURSDAY 4
+#define MSM6242_WEEK_FRIDAY 5
+#define MSM6242_WEEK_SATURDAY 6
+
+#define MSM6242_CD_30_S_ADJ (1 << 3) /* 30-second adjustment */
+#define MSM6242_CD_IRQ_FLAG (1 << 2)
+#define MSM6242_CD_BUSY (1 << 1)
+#define MSM6242_CD_HOLD (1 << 0)
+
+#define MSM6242_CE_T_MASK (3 << 2)
+#define MSM6242_CE_T_64HZ (0 << 2) /* period 1/64 second */
+#define MSM6242_CE_T_1HZ (1 << 2) /* period 1 second */
+#define MSM6242_CE_T_1MINUTE (2 << 2) /* period 1 minute */
+#define MSM6242_CE_T_1HOUR (3 << 2) /* period 1 hour */
+
+#define MSM6242_CE_ITRPT_STND (1 << 1)
+#define MSM6242_CE_MASK (1 << 0) /* STD.P output control */
+
+#define MSM6242_CF_TEST (1 << 3)
+#define MSM6242_CF_12H (0 << 2)
+#define MSM6242_CF_24H (1 << 2)
+#define MSM6242_CF_STOP (1 << 1)
+#define MSM6242_CF_REST (1 << 0) /* reset */
+
+
+struct msm6242_priv {
+ u32 __iomem *regs;
+ struct rtc_device *rtc;
+};
+
+static inline unsigned int msm6242_read(struct msm6242_priv *priv,
+ unsigned int reg)
+{
+ return __raw_readl(&priv->regs[reg]) & 0xf;
+}
+
+static inline void msm6242_write(struct msm6242_priv *priv, unsigned int val,
+ unsigned int reg)
+{
+ return __raw_writel(val, &priv->regs[reg]);
+}
+
+static inline void msm6242_set(struct msm6242_priv *priv, unsigned int val,
+ unsigned int reg)
+{
+ msm6242_write(priv, msm6242_read(priv, reg) | val, reg);
+}
+
+static inline void msm6242_clear(struct msm6242_priv *priv, unsigned int val,
+ unsigned int reg)
+{
+ msm6242_write(priv, msm6242_read(priv, reg) & ~val, reg);
+}
+
+static void msm6242_lock(struct msm6242_priv *priv)
+{
+ int cnt = 5;
+
+ msm6242_set(priv, MSM6242_CD_HOLD, MSM6242_CD);
+
+ while ((msm6242_read(priv, MSM6242_CD) & MSM6242_CD_BUSY) && cnt) {
+ msm6242_clear(priv, MSM6242_CD_HOLD, MSM6242_CD);
+ udelay(70);
+ msm6242_set(priv, MSM6242_CD_HOLD, MSM6242_CD);
+ cnt--;
+ }
+
+ if (!cnt)
+ pr_warning("msm6242: timed out waiting for RTC (0x%x)\n",
+ msm6242_read(priv, MSM6242_CD));
+}
+
+static void msm6242_unlock(struct msm6242_priv *priv)
+{
+ msm6242_clear(priv, MSM6242_CD_HOLD, MSM6242_CD);
+}
+
+static int msm6242_read_time(struct device *dev, struct rtc_time *tm)
+{
+ struct msm6242_priv *priv = dev_get_drvdata(dev);
+
+ msm6242_lock(priv);
+
+ tm->tm_sec = msm6242_read(priv, MSM6242_SECOND10) * 10 +
+ msm6242_read(priv, MSM6242_SECOND1);
+ tm->tm_min = msm6242_read(priv, MSM6242_MINUTE10) * 10 +
+ msm6242_read(priv, MSM6242_MINUTE1);
+ tm->tm_hour = (msm6242_read(priv, MSM6242_HOUR10 & 3)) * 10 +
+ msm6242_read(priv, MSM6242_HOUR1);
+ tm->tm_mday = msm6242_read(priv, MSM6242_DAY10) * 10 +
+ msm6242_read(priv, MSM6242_DAY1);
+ tm->tm_wday = msm6242_read(priv, MSM6242_WEEK);
+ tm->tm_mon = msm6242_read(priv, MSM6242_MONTH10) * 10 +
+ msm6242_read(priv, MSM6242_MONTH1) - 1;
+ tm->tm_year = msm6242_read(priv, MSM6242_YEAR10) * 10 +
+ msm6242_read(priv, MSM6242_YEAR1);
+ if (tm->tm_year <= 69)
+ tm->tm_year += 100;
+
+ if (!(msm6242_read(priv, MSM6242_CF) & MSM6242_CF_24H)) {
+ unsigned int pm = msm6242_read(priv, MSM6242_HOUR10) &
+ MSM6242_HOUR10_PM;
+ if (!pm && tm->tm_hour == 12)
+ tm->tm_hour = 0;
+ else if (pm && tm->tm_hour != 12)
+ tm->tm_hour += 12;
+ }
+
+ msm6242_unlock(priv);
+
+ return rtc_valid_tm(tm);
+}
+
+static int msm6242_set_time(struct device *dev, struct rtc_time *tm)
+{
+ struct msm6242_priv *priv = dev_get_drvdata(dev);
+
+ msm6242_lock(priv);
+
+ msm6242_write(priv, tm->tm_sec / 10, MSM6242_SECOND10);
+ msm6242_write(priv, tm->tm_sec % 10, MSM6242_SECOND1);
+ msm6242_write(priv, tm->tm_min / 10, MSM6242_MINUTE10);
+ msm6242_write(priv, tm->tm_min % 10, MSM6242_MINUTE1);
+ if (msm6242_read(priv, MSM6242_CF) & MSM6242_CF_24H)
+ msm6242_write(priv, tm->tm_hour / 10, MSM6242_HOUR10);
+ else if (tm->tm_hour >= 12)
+ msm6242_write(priv, MSM6242_HOUR10_PM + (tm->tm_hour - 12) / 10,
+ MSM6242_HOUR10);
+ else
+ msm6242_write(priv, tm->tm_hour / 10, MSM6242_HOUR10);
+ msm6242_write(priv, tm->tm_hour % 10, MSM6242_HOUR1);
+ msm6242_write(priv, tm->tm_mday / 10, MSM6242_DAY10);
+ msm6242_write(priv, tm->tm_mday % 10, MSM6242_DAY1);
+ if (tm->tm_wday != -1)
+ msm6242_write(priv, tm->tm_wday, MSM6242_WEEK);
+ msm6242_write(priv, (tm->tm_mon + 1) / 10, MSM6242_MONTH10);
+ msm6242_write(priv, (tm->tm_mon + 1) % 10, MSM6242_MONTH1);
+ if (tm->tm_year >= 100)
+ tm->tm_year -= 100;
+ msm6242_write(priv, tm->tm_year / 10, MSM6242_YEAR10);
+ msm6242_write(priv, tm->tm_year % 10, MSM6242_YEAR1);
+
+ msm6242_unlock(priv);
+ return 0;
+}
+
+static const struct rtc_class_ops msm6242_rtc_ops = {
+ .read_time = msm6242_read_time,
+ .set_time = msm6242_set_time,
+};
+
+static int __init msm6242_rtc_probe(struct platform_device *dev)
+{
+ struct resource *res;
+ struct msm6242_priv *priv;
+ struct rtc_device *rtc;
+ int error;
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->regs = ioremap(res->start, resource_size(res));
+ if (!priv->regs) {
+ error = -ENOMEM;
+ goto out_free_priv;
+ }
+
+ rtc = rtc_device_register("rtc-msm6242", &dev->dev, &msm6242_rtc_ops,
+ THIS_MODULE);
+ if (IS_ERR(rtc)) {
+ error = PTR_ERR(rtc);
+ goto out_unmap;
+ }
+
+ priv->rtc = rtc;
+ platform_set_drvdata(dev, priv);
+ return 0;
+
+out_unmap:
+ iounmap(priv->regs);
+out_free_priv:
+ kfree(priv);
+ return error;
+}
+
+static int __exit msm6242_rtc_remove(struct platform_device *dev)
+{
+ struct msm6242_priv *priv = platform_get_drvdata(dev);
+
+ rtc_device_unregister(priv->rtc);
+ iounmap(priv->regs);
+ kfree(priv);
+ return 0;
+}
+
+static struct platform_driver msm6242_rtc_driver = {
+ .driver = {
+ .name = "rtc-msm6242",
+ .owner = THIS_MODULE,
+ },
+ .remove = __exit_p(msm6242_rtc_remove),
+};
+
+static int __init msm6242_rtc_init(void)
+{
+ return platform_driver_probe(&msm6242_rtc_driver, msm6242_rtc_probe);
+}
+
+static void __exit msm6242_rtc_fini(void)
+{
+ platform_driver_unregister(&msm6242_rtc_driver);
+}
+
+module_init(msm6242_rtc_init);
+module_exit(msm6242_rtc_fini);
+
+MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Oki MSM6242 RTC driver");
+MODULE_ALIAS("platform:rtc-msm6242");
diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
new file mode 100644
index 000000000000..e1313feb060f
--- /dev/null
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -0,0 +1,222 @@
+/*
+ * Ricoh RP5C01 RTC Driver
+ *
+ * Copyright 2009 Geert Uytterhoeven
+ *
+ * Based on the A3000 TOD code in arch/m68k/amiga/config.c
+ * Copyright (C) 1993 Hamish Macdonald
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+
+enum {
+ RP5C01_1_SECOND = 0x0, /* MODE 00 */
+ RP5C01_10_SECOND = 0x1, /* MODE 00 */
+ RP5C01_1_MINUTE = 0x2, /* MODE 00 and MODE 01 */
+ RP5C01_10_MINUTE = 0x3, /* MODE 00 and MODE 01 */
+ RP5C01_1_HOUR = 0x4, /* MODE 00 and MODE 01 */
+ RP5C01_10_HOUR = 0x5, /* MODE 00 and MODE 01 */
+ RP5C01_DAY_OF_WEEK = 0x6, /* MODE 00 and MODE 01 */
+ RP5C01_1_DAY = 0x7, /* MODE 00 and MODE 01 */
+ RP5C01_10_DAY = 0x8, /* MODE 00 and MODE 01 */
+ RP5C01_1_MONTH = 0x9, /* MODE 00 */
+ RP5C01_10_MONTH = 0xa, /* MODE 00 */
+ RP5C01_1_YEAR = 0xb, /* MODE 00 */
+ RP5C01_10_YEAR = 0xc, /* MODE 00 */
+
+ RP5C01_12_24_SELECT = 0xa, /* MODE 01 */
+ RP5C01_LEAP_YEAR = 0xb, /* MODE 01 */
+
+ RP5C01_MODE = 0xd, /* all modes */
+ RP5C01_TEST = 0xe, /* all modes */
+ RP5C01_RESET = 0xf, /* all modes */
+};
+
+#define RP5C01_12_24_SELECT_12 (0 << 0)
+#define RP5C01_12_24_SELECT_24 (1 << 0)
+
+#define RP5C01_10_HOUR_AM (0 << 1)
+#define RP5C01_10_HOUR_PM (1 << 1)
+
+#define RP5C01_MODE_TIMER_EN (1 << 3) /* timer enable */
+#define RP5C01_MODE_ALARM_EN (1 << 2) /* alarm enable */
+
+#define RP5C01_MODE_MODE_MASK (3 << 0)
+#define RP5C01_MODE_MODE00 (0 << 0) /* time */
+#define RP5C01_MODE_MODE01 (1 << 0) /* alarm, 12h/24h, leap year */
+#define RP5C01_MODE_RAM_BLOCK10 (2 << 0) /* RAM 4 bits x 13 */
+#define RP5C01_MODE_RAM_BLOCK11 (3 << 0) /* RAM 4 bits x 13 */
+
+#define RP5C01_RESET_1HZ_PULSE (1 << 3)
+#define RP5C01_RESET_16HZ_PULSE (1 << 2)
+#define RP5C01_RESET_SECOND (1 << 1) /* reset divider stages for */
+ /* seconds or smaller units */
+#define RP5C01_RESET_ALARM (1 << 0) /* reset all alarm registers */
+
+
+struct rp5c01_priv {
+ u32 __iomem *regs;
+ struct rtc_device *rtc;
+};
+
+static inline unsigned int rp5c01_read(struct rp5c01_priv *priv,
+ unsigned int reg)
+{
+ return __raw_readl(&priv->regs[reg]) & 0xf;
+}
+
+static inline void rp5c01_write(struct rp5c01_priv *priv, unsigned int val,
+ unsigned int reg)
+{
+ return __raw_writel(val, &priv->regs[reg]);
+}
+
+static void rp5c01_lock(struct rp5c01_priv *priv)
+{
+ rp5c01_write(priv, RP5C01_MODE_MODE00, RP5C01_MODE);
+}
+
+static void rp5c01_unlock(struct rp5c01_priv *priv)
+{
+ rp5c01_write(priv, RP5C01_MODE_TIMER_EN | RP5C01_MODE_MODE01,
+ RP5C01_MODE);
+}
+
+static int rp5c01_read_time(struct device *dev, struct rtc_time *tm)
+{
+ struct rp5c01_priv *priv = dev_get_drvdata(dev);
+
+ rp5c01_lock(priv);
+
+ tm->tm_sec = rp5c01_read(priv, RP5C01_10_SECOND) * 10 +
+ rp5c01_read(priv, RP5C01_1_SECOND);
+ tm->tm_min = rp5c01_read(priv, RP5C01_10_MINUTE) * 10 +
+ rp5c01_read(priv, RP5C01_1_MINUTE);
+ tm->tm_hour = rp5c01_read(priv, RP5C01_10_HOUR) * 10 +
+ rp5c01_read(priv, RP5C01_1_HOUR);
+ tm->tm_mday = rp5c01_read(priv, RP5C01_10_DAY) * 10 +
+ rp5c01_read(priv, RP5C01_1_DAY);
+ tm->tm_wday = rp5c01_read(priv, RP5C01_DAY_OF_WEEK);
+ tm->tm_mon = rp5c01_read(priv, RP5C01_10_MONTH) * 10 +
+ rp5c01_read(priv, RP5C01_1_MONTH) - 1;
+ tm->tm_year = rp5c01_read(priv, RP5C01_10_YEAR) * 10 +
+ rp5c01_read(priv, RP5C01_1_YEAR);
+ if (tm->tm_year <= 69)
+ tm->tm_year += 100;
+
+ rp5c01_unlock(priv);
+
+ return rtc_valid_tm(tm);
+}
+
+static int rp5c01_set_time(struct device *dev, struct rtc_time *tm)
+{
+ struct rp5c01_priv *priv = dev_get_drvdata(dev);
+
+ rp5c01_lock(priv);
+
+ rp5c01_write(priv, tm->tm_sec / 10, RP5C01_10_SECOND);
+ rp5c01_write(priv, tm->tm_sec % 10, RP5C01_1_SECOND);
+ rp5c01_write(priv, tm->tm_min / 10, RP5C01_10_MINUTE);
+ rp5c01_write(priv, tm->tm_min % 10, RP5C01_1_MINUTE);
+ rp5c01_write(priv, tm->tm_hour / 10, RP5C01_10_HOUR);
+ rp5c01_write(priv, tm->tm_hour % 10, RP5C01_1_HOUR);
+ rp5c01_write(priv, tm->tm_mday / 10, RP5C01_10_DAY);
+ rp5c01_write(priv, tm->tm_mday % 10, RP5C01_1_DAY);
+ if (tm->tm_wday != -1)
+ rp5c01_write(priv, tm->tm_wday, RP5C01_DAY_OF_WEEK);
+ rp5c01_write(priv, (tm->tm_mon + 1) / 10, RP5C01_10_MONTH);
+ rp5c01_write(priv, (tm->tm_mon + 1) % 10, RP5C01_1_MONTH);
+ if (tm->tm_year >= 100)
+ tm->tm_year -= 100;
+ rp5c01_write(priv, tm->tm_year / 10, RP5C01_10_YEAR);
+ rp5c01_write(priv, tm->tm_year % 10, RP5C01_1_YEAR);
+
+ rp5c01_unlock(priv);
+ return 0;
+}
+
+static const struct rtc_class_ops rp5c01_rtc_ops = {
+ .read_time = rp5c01_read_time,
+ .set_time = rp5c01_set_time,
+};
+
+static int __init rp5c01_rtc_probe(struct platform_device *dev)
+{
+ struct resource *res;
+ struct rp5c01_priv *priv;
+ struct rtc_device *rtc;
+ int error;
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->regs = ioremap(res->start, resource_size(res));
+ if (!priv->regs) {
+ error = -ENOMEM;
+ goto out_free_priv;
+ }
+
+ rtc = rtc_device_register("rtc-rp5c01", &dev->dev, &rp5c01_rtc_ops,
+ THIS_MODULE);
+ if (IS_ERR(rtc)) {
+ error = PTR_ERR(rtc);
+ goto out_unmap;
+ }
+
+ priv->rtc = rtc;
+ platform_set_drvdata(dev, priv);
+ return 0;
+
+out_unmap:
+ iounmap(priv->regs);
+out_free_priv:
+ kfree(priv);
+ return error;
+}
+
+static int __exit rp5c01_rtc_remove(struct platform_device *dev)
+{
+ struct rp5c01_priv *priv = platform_get_drvdata(dev);
+
+ rtc_device_unregister(priv->rtc);
+ iounmap(priv->regs);
+ kfree(priv);
+ return 0;
+}
+
+static struct platform_driver rp5c01_rtc_driver = {
+ .driver = {
+ .name = "rtc-rp5c01",
+ .owner = THIS_MODULE,
+ },
+ .remove = __exit_p(rp5c01_rtc_remove),
+};
+
+static int __init rp5c01_rtc_init(void)
+{
+ return platform_driver_probe(&rp5c01_rtc_driver, rp5c01_rtc_probe);
+}
+
+static void __exit rp5c01_rtc_fini(void)
+{
+ platform_driver_unregister(&rp5c01_rtc_driver);
+}
+
+module_init(rp5c01_rtc_init);
+module_exit(rp5c01_rtc_fini);
+
+MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ricoh RP5C01 RTC driver");
+MODULE_ALIAS("platform:rtc-rp5c01");
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
index b44462a6c6d3..740fe405c395 100644
--- a/drivers/s390/char/sclp_async.c
+++ b/drivers/s390/char/sclp_async.c
@@ -101,18 +101,17 @@ static struct ctl_table callhome_table[] = {
.mode = 0644,
.proc_handler = proc_handler_callhome,
},
- { .ctl_name = 0 }
+ {}
};
static struct ctl_table kern_dir_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.maxlen = 0,
.mode = 0555,
.child = callhome_table,
},
- { .ctl_name = 0 }
+ {}
};
/*
diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c
index 63a30f566f3a..2b6b93f7d8ef 100644
--- a/drivers/scsi/scsi_sysctl.c
+++ b/drivers/scsi/scsi_sysctl.c
@@ -13,26 +13,23 @@
static ctl_table scsi_table[] = {
- { .ctl_name = DEV_SCSI_LOGGING_LEVEL,
- .procname = "logging_level",
+ { .procname = "logging_level",
.data = &scsi_logging_level,
.maxlen = sizeof(scsi_logging_level),
.mode = 0644,
- .proc_handler = &proc_dointvec },
+ .proc_handler = proc_dointvec },
{ }
};
static ctl_table scsi_dir_table[] = {
- { .ctl_name = DEV_SCSI,
- .procname = "scsi",
+ { .procname = "scsi",
.mode = 0555,
.child = scsi_table },
{ }
};
static ctl_table scsi_root_table[] = {
- { .ctl_name = CTL_DEV,
- .procname = "dev",
+ { .procname = "dev",
.mode = 0555,
.child = scsi_dir_table },
{ }
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index e52257257279..50943ff78f4b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1477,4 +1477,17 @@ config SERIAL_BCM63XX_CONSOLE
If you have enabled the serial port on the bcm63xx CPU
you can make it the console by answering Y to this option.
+config SERIAL_GRLIB_GAISLER_APBUART
+ tristate "GRLIB APBUART serial support"
+ depends on OF
+ ---help---
+ Add support for the GRLIB APBUART serial port.
+
+config SERIAL_GRLIB_GAISLER_APBUART_CONSOLE
+ bool "Console on GRLIB APBUART serial port"
+ depends on SERIAL_GRLIB_GAISLER_APBUART=y
+ select SERIAL_CORE_CONSOLE
+ help
+ Support for running a console on the GRLIB APBUART
+
endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d21d5dd5d048..5548fe7df61d 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -81,3 +81,4 @@ obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o
+obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
diff --git a/drivers/serial/apbuart.c b/drivers/serial/apbuart.c
new file mode 100644
index 000000000000..fe91319b5f65
--- /dev/null
+++ b/drivers/serial/apbuart.c
@@ -0,0 +1,710 @@
+/*
+ * Driver for GRLIB serial ports (APBUART)
+ *
+ * Based on linux/drivers/serial/amba.c
+ *
+ * Copyright (C) 2000 Deep Blue Solutions Ltd.
+ * Copyright (C) 2003 Konrad Eisele <eiselekd@web.de>
+ * Copyright (C) 2006 Daniel Hellstrom <daniel@gaisler.com>, Aeroflex Gaisler AB
+ * Copyright (C) 2008 Gilead Kutnick <kutnickg@zin-tech.com>
+ * Copyright (C) 2009 Kristoffer Glembo <kristoffer@gaisler.com>, Aeroflex Gaisler AB
+ */
+
+#if defined(CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/kthread.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/serial_core.h>
+#include <asm/irq.h>
+
+#include "apbuart.h"
+
+#define SERIAL_APBUART_MAJOR TTY_MAJOR
+#define SERIAL_APBUART_MINOR 64
+#define UART_DUMMY_RSR_RX 0x8000 /* for ignore all read */
+
+static void apbuart_tx_chars(struct uart_port *port);
+
+static void apbuart_stop_tx(struct uart_port *port)
+{
+ unsigned int cr;
+
+ cr = UART_GET_CTRL(port);
+ cr &= ~UART_CTRL_TI;
+ UART_PUT_CTRL(port, cr);
+}
+
+static void apbuart_start_tx(struct uart_port *port)
+{
+ unsigned int cr;
+
+ cr = UART_GET_CTRL(port);
+ cr |= UART_CTRL_TI;
+ UART_PUT_CTRL(port, cr);
+
+ if (UART_GET_STATUS(port) & UART_STATUS_THE)
+ apbuart_tx_chars(port);
+}
+
+static void apbuart_stop_rx(struct uart_port *port)
+{
+ unsigned int cr;
+
+ cr = UART_GET_CTRL(port);
+ cr &= ~(UART_CTRL_RI);
+ UART_PUT_CTRL(port, cr);
+}
+
+static void apbuart_enable_ms(struct uart_port *port)
+{
+ /* No modem status change interrupts for APBUART */
+}
+
+static void apbuart_rx_chars(struct uart_port *port)
+{
+ struct tty_struct *tty = port->state->port.tty;
+ unsigned int status, ch, rsr, flag;
+ unsigned int max_chars = port->fifosize;
+
+ status = UART_GET_STATUS(port);
+
+ while (UART_RX_DATA(status) && (max_chars--)) {
+
+ ch = UART_GET_CHAR(port);
+ flag = TTY_NORMAL;
+
+ port->icount.rx++;
+
+ rsr = UART_GET_STATUS(port) | UART_DUMMY_RSR_RX;
+ UART_PUT_STATUS(port, 0);
+ if (rsr & UART_STATUS_ERR) {
+
+ if (rsr & UART_STATUS_BR) {
+ rsr &= ~(UART_STATUS_FE | UART_STATUS_PE);
+ port->icount.brk++;
+ if (uart_handle_break(port))
+ goto ignore_char;
+ } else if (rsr & UART_STATUS_PE) {
+ port->icount.parity++;
+ } else if (rsr & UART_STATUS_FE) {
+ port->icount.frame++;
+ }
+ if (rsr & UART_STATUS_OE)
+ port->icount.overrun++;
+
+ rsr &= port->read_status_mask;
+
+ if (rsr & UART_STATUS_PE)
+ flag = TTY_PARITY;
+ else if (rsr & UART_STATUS_FE)
+ flag = TTY_FRAME;
+ }
+
+ if (uart_handle_sysrq_char(port, ch))
+ goto ignore_char;
+
+ uart_insert_char(port, rsr, UART_STATUS_OE, ch, flag);
+
+
+ ignore_char:
+ status = UART_GET_STATUS(port);
+ }
+
+ tty_flip_buffer_push(tty);
+}
+
+static void apbuart_tx_chars(struct uart_port *port)
+{
+ struct circ_buf *xmit = &port->state->xmit;
+ int count;
+
+ if (port->x_char) {
+ UART_PUT_CHAR(port, port->x_char);
+ port->icount.tx++;
+ port->x_char = 0;
+ return;
+ }
+
+ if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+ apbuart_stop_tx(port);
+ return;
+ }
+
+ /* amba: fill FIFO */
+ count = port->fifosize >> 1;
+ do {
+ UART_PUT_CHAR(port, xmit->buf[xmit->tail]);
+ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+ port->icount.tx++;
+ if (uart_circ_empty(xmit))
+ break;
+ } while (--count > 0);
+
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+
+ if (uart_circ_empty(xmit))
+ apbuart_stop_tx(port);
+}
+
+static irqreturn_t apbuart_int(int irq, void *dev_id)
+{
+ struct uart_port *port = dev_id;
+ unsigned int status;
+
+ spin_lock(&port->lock);
+
+ status = UART_GET_STATUS(port);
+ if (status & UART_STATUS_DR)
+ apbuart_rx_chars(port);
+ if (status & UART_STATUS_THE)
+ apbuart_tx_chars(port);
+
+ spin_unlock(&port->lock);
+
+ return IRQ_HANDLED;
+}
+
+static unsigned int apbuart_tx_empty(struct uart_port *port)
+{
+ unsigned int status = UART_GET_STATUS(port);
+ return status & UART_STATUS_THE ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int apbuart_get_mctrl(struct uart_port *port)
+{
+ /* The GRLIB APBUART handles flow control in hardware */
+ return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+static void apbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+ /* The GRLIB APBUART handles flow control in hardware */
+}
+
+static void apbuart_break_ctl(struct uart_port *port, int break_state)
+{
+ /* We don't support sending break */
+}
+
+static int apbuart_startup(struct uart_port *port)
+{
+ int retval;
+ unsigned int cr;
+
+ /* Allocate the IRQ */
+ retval = request_irq(port->irq, apbuart_int, 0, "apbuart", port);
+ if (retval)
+ return retval;
+
+ /* Finally, enable interrupts */
+ cr = UART_GET_CTRL(port);
+ UART_PUT_CTRL(port,
+ cr | UART_CTRL_RE | UART_CTRL_TE |
+ UART_CTRL_RI | UART_CTRL_TI);
+
+ return 0;
+}
+
+static void apbuart_shutdown(struct uart_port *port)
+{
+ unsigned int cr;
+
+ /* disable all interrupts, disable the port */
+ cr = UART_GET_CTRL(port);
+ UART_PUT_CTRL(port,
+ cr & ~(UART_CTRL_RE | UART_CTRL_TE |
+ UART_CTRL_RI | UART_CTRL_TI));
+
+ /* Free the interrupt */
+ free_irq(port->irq, port);
+}
+
+static void apbuart_set_termios(struct uart_port *port,
+ struct ktermios *termios, struct ktermios *old)
+{
+ unsigned int cr;
+ unsigned long flags;
+ unsigned int baud, quot;
+
+ /* Ask the core to calculate the divisor for us. */
+ baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+ if (baud == 0)
+ panic("invalid baudrate %i\n", port->uartclk / 16);
+
+ /* uart_get_divisor calc a *16 uart freq, apbuart is *8 */
+ quot = (uart_get_divisor(port, baud)) * 2;
+ cr = UART_GET_CTRL(port);
+ cr &= ~(UART_CTRL_PE | UART_CTRL_PS);
+
+ if (termios->c_cflag & PARENB) {
+ cr |= UART_CTRL_PE;
+ if ((termios->c_cflag & PARODD))
+ cr |= UART_CTRL_PS;
+ }
+
+ /* Enable flow control. */
+ if (termios->c_cflag & CRTSCTS)
+ cr |= UART_CTRL_FL;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ /* Update the per-port timeout. */
+ uart_update_timeout(port, termios->c_cflag, baud);
+
+ port->read_status_mask = UART_STATUS_OE;
+ if (termios->c_iflag & INPCK)
+ port->read_status_mask |= UART_STATUS_FE | UART_STATUS_PE;
+
+ /* Characters to ignore */
+ port->ignore_status_mask = 0;
+ if (termios->c_iflag & IGNPAR)
+ port->ignore_status_mask |= UART_STATUS_FE | UART_STATUS_PE;
+
+ /* Ignore all characters if CREAD is not set. */
+ if ((termios->c_cflag & CREAD) == 0)
+ port->ignore_status_mask |= UART_DUMMY_RSR_RX;
+
+ /* Set baud rate */
+ quot -= 1;
+ UART_PUT_SCAL(port, quot);
+ UART_PUT_CTRL(port, cr);
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *apbuart_type(struct uart_port *port)
+{
+ return port->type == PORT_APBUART ? "GRLIB/APBUART" : NULL;
+}
+
+static void apbuart_release_port(struct uart_port *port)
+{
+ release_mem_region(port->mapbase, 0x100);
+}
+
+static int apbuart_request_port(struct uart_port *port)
+{
+ return request_mem_region(port->mapbase, 0x100, "grlib-apbuart")
+ != NULL ? 0 : -EBUSY;
+ return 0;
+}
+
+/* Configure/autoconfigure the port */
+static void apbuart_config_port(struct uart_port *port, int flags)
+{
+ if (flags & UART_CONFIG_TYPE) {
+ port->type = PORT_APBUART;
+ apbuart_request_port(port);
+ }
+}
+
+/* Verify the new serial_struct (for TIOCSSERIAL) */
+static int apbuart_verify_port(struct uart_port *port,
+ struct serial_struct *ser)
+{
+ int ret = 0;
+ if (ser->type != PORT_UNKNOWN && ser->type != PORT_APBUART)
+ ret = -EINVAL;
+ if (ser->irq < 0 || ser->irq >= NR_IRQS)
+ ret = -EINVAL;
+ if (ser->baud_base < 9600)
+ ret = -EINVAL;
+ return ret;
+}
+
+static struct uart_ops grlib_apbuart_ops = {
+ .tx_empty = apbuart_tx_empty,
+ .set_mctrl = apbuart_set_mctrl,
+ .get_mctrl = apbuart_get_mctrl,
+ .stop_tx = apbuart_stop_tx,
+ .start_tx = apbuart_start_tx,
+ .stop_rx = apbuart_stop_rx,
+ .enable_ms = apbuart_enable_ms,
+ .break_ctl = apbuart_break_ctl,
+ .startup = apbuart_startup,
+ .shutdown = apbuart_shutdown,
+ .set_termios = apbuart_set_termios,
+ .type = apbuart_type,
+ .release_port = apbuart_release_port,
+ .request_port = apbuart_request_port,
+ .config_port = apbuart_config_port,
+ .verify_port = apbuart_verify_port,
+};
+
+static struct uart_port grlib_apbuart_ports[UART_NR];
+static struct device_node *grlib_apbuart_nodes[UART_NR];
+
+static int apbuart_scan_fifo_size(struct uart_port *port, int portnumber)
+{
+ int ctrl, loop = 0;
+ int status;
+ int fifosize;
+ unsigned long flags;
+
+ ctrl = UART_GET_CTRL(port);
+
+ /*
+ * Enable the transceiver and wait for it to be ready to send data.
+ * Clear interrupts so that this process will not be externally
+ * interrupted in the middle (which can cause the transceiver to
+ * drain prematurely).
+ */
+
+ local_irq_save(flags);
+
+ UART_PUT_CTRL(port, ctrl | UART_CTRL_TE);
+
+ while (!UART_TX_READY(UART_GET_STATUS(port)))
+ loop++;
+
+ /*
+ * Disable the transceiver so data isn't actually sent during the
+ * actual test.
+ */
+
+ UART_PUT_CTRL(port, ctrl & ~(UART_CTRL_TE));
+
+ fifosize = 1;
+ UART_PUT_CHAR(port, 0);
+
+ /*
+ * So long as transmitting a character increments the tranceivier FIFO
+ * length the FIFO must be at least that big. These bytes will
+ * automatically drain off of the FIFO.
+ */
+
+ status = UART_GET_STATUS(port);
+ while (((status >> 20) & 0x3F) == fifosize) {
+ fifosize++;
+ UART_PUT_CHAR(port, 0);
+ status = UART_GET_STATUS(port);
+ }
+
+ fifosize--;
+
+ UART_PUT_CTRL(port, ctrl);
+ local_irq_restore(flags);
+
+ if (fifosize == 0)
+ fifosize = 1;
+
+ return fifosize;
+}
+
+static void apbuart_flush_fifo(struct uart_port *port)
+{
+ int i;
+
+ for (i = 0; i < port->fifosize; i++)
+ UART_GET_CHAR(port);
+}
+
+
+/* ======================================================================== */
+/* Console driver, if enabled */
+/* ======================================================================== */
+
+#ifdef CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE
+
+static void apbuart_console_putchar(struct uart_port *port, int ch)
+{
+ unsigned int status;
+ do {
+ status = UART_GET_STATUS(port);
+ } while (!UART_TX_READY(status));
+ UART_PUT_CHAR(port, ch);
+}
+
+static void
+apbuart_console_write(struct console *co, const char *s, unsigned int count)
+{
+ struct uart_port *port = &grlib_apbuart_ports[co->index];
+ unsigned int status, old_cr, new_cr;
+
+ /* First save the CR then disable the interrupts */
+ old_cr = UART_GET_CTRL(port);
+ new_cr = old_cr & ~(UART_CTRL_RI | UART_CTRL_TI);
+ UART_PUT_CTRL(port, new_cr);
+
+ uart_console_write(port, s, count, apbuart_console_putchar);
+
+ /*
+ * Finally, wait for transmitter to become empty
+ * and restore the TCR
+ */
+ do {
+ status = UART_GET_STATUS(port);
+ } while (!UART_TX_READY(status));
+ UART_PUT_CTRL(port, old_cr);
+}
+
+static void __init
+apbuart_console_get_options(struct uart_port *port, int *baud,
+ int *parity, int *bits)
+{
+ if (UART_GET_CTRL(port) & (UART_CTRL_RE | UART_CTRL_TE)) {
+
+ unsigned int quot, status;
+ status = UART_GET_STATUS(port);
+
+ *parity = 'n';
+ if (status & UART_CTRL_PE) {
+ if ((status & UART_CTRL_PS) == 0)
+ *parity = 'e';
+ else
+ *parity = 'o';
+ }
+
+ *bits = 8;
+ quot = UART_GET_SCAL(port) / 8;
+ *baud = port->uartclk / (16 * (quot + 1));
+ }
+}
+
+static int __init apbuart_console_setup(struct console *co, char *options)
+{
+ struct uart_port *port;
+ int baud = 38400;
+ int bits = 8;
+ int parity = 'n';
+ int flow = 'n';
+
+ pr_debug("apbuart_console_setup co=%p, co->index=%i, options=%s\n",
+ co, co->index, options);
+
+ /*
+ * Check whether an invalid uart number has been specified, and
+ * if so, search for the first available port that does have
+ * console support.
+ */
+ if (co->index >= grlib_apbuart_port_nr)
+ co->index = 0;
+
+ port = &grlib_apbuart_ports[co->index];
+
+ spin_lock_init(&port->lock);
+
+ if (options)
+ uart_parse_options(options, &baud, &parity, &bits, &flow);
+ else
+ apbuart_console_get_options(port, &baud, &parity, &bits);
+
+ return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver grlib_apbuart_driver;
+
+static struct console grlib_apbuart_console = {
+ .name = "ttyS",
+ .write = apbuart_console_write,
+ .device = uart_console_device,
+ .setup = apbuart_console_setup,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+ .data = &grlib_apbuart_driver,
+};
+
+
+static void grlib_apbuart_configure(void);
+
+static int __init apbuart_console_init(void)
+{
+ grlib_apbuart_configure();
+ register_console(&grlib_apbuart_console);
+ return 0;
+}
+
+console_initcall(apbuart_console_init);
+
+#define APBUART_CONSOLE (&grlib_apbuart_console)
+#else
+#define APBUART_CONSOLE NULL
+#endif
+
+static struct uart_driver grlib_apbuart_driver = {
+ .owner = THIS_MODULE,
+ .driver_name = "serial",
+ .dev_name = "ttyS",
+ .major = SERIAL_APBUART_MAJOR,
+ .minor = SERIAL_APBUART_MINOR,
+ .nr = UART_NR,
+ .cons = APBUART_CONSOLE,
+};
+
+
+/* ======================================================================== */
+/* OF Platform Driver */
+/* ======================================================================== */
+
+static int __devinit apbuart_probe(struct of_device *op,
+ const struct of_device_id *match)
+{
+ int i = -1;
+ struct uart_port *port = NULL;
+
+ i = 0;
+ for (i = 0; i < grlib_apbuart_port_nr; i++) {
+ if (op->node == grlib_apbuart_nodes[i])
+ break;
+ }
+
+ port = &grlib_apbuart_ports[i];
+ port->dev = &op->dev;
+
+ uart_add_one_port(&grlib_apbuart_driver, (struct uart_port *) port);
+
+ apbuart_flush_fifo((struct uart_port *) port);
+
+ printk(KERN_INFO "grlib-apbuart at 0x%llx, irq %d\n",
+ (unsigned long long) port->mapbase, port->irq);
+ return 0;
+
+}
+
+static struct of_device_id __initdata apbuart_match[] = {
+ {
+ .name = "GAISLER_APBUART",
+ },
+ {},
+};
+
+static struct of_platform_driver grlib_apbuart_of_driver = {
+ .match_table = apbuart_match,
+ .probe = apbuart_probe,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "grlib-apbuart",
+ },
+};
+
+
+static void grlib_apbuart_configure(void)
+{
+ static int enum_done;
+ struct device_node *np, *rp;
+ struct uart_port *port = NULL;
+ const u32 *prop;
+ int freq_khz;
+ int v = 0, d = 0;
+ unsigned int addr;
+ int irq, line;
+ struct amba_prom_registers *regs;
+
+ if (enum_done)
+ return;
+
+ /* Get bus frequency */
+ rp = of_find_node_by_path("/");
+ rp = of_get_next_child(rp, NULL);
+ prop = of_get_property(rp, "clock-frequency", NULL);
+ freq_khz = *prop;
+
+ line = 0;
+ for_each_matching_node(np, apbuart_match) {
+
+ int *vendor = (int *) of_get_property(np, "vendor", NULL);
+ int *device = (int *) of_get_property(np, "device", NULL);
+ int *irqs = (int *) of_get_property(np, "interrupts", NULL);
+ regs = (struct amba_prom_registers *)
+ of_get_property(np, "reg", NULL);
+
+ if (vendor)
+ v = *vendor;
+ if (device)
+ d = *device;
+
+ if (!irqs || !regs)
+ return;
+
+ grlib_apbuart_nodes[line] = np;
+
+ addr = regs->phys_addr;
+ irq = *irqs;
+
+ port = &grlib_apbuart_ports[line];
+
+ port->mapbase = addr;
+ port->membase = ioremap(addr, sizeof(struct grlib_apbuart_regs_map));
+ port->irq = irq;
+ port->iotype = UPIO_MEM;
+ port->ops = &grlib_apbuart_ops;
+ port->flags = UPF_BOOT_AUTOCONF;
+ port->line = line;
+ port->uartclk = freq_khz * 1000;
+ port->fifosize = apbuart_scan_fifo_size((struct uart_port *) port, line);
+ line++;
+
+ /* We support maximum UART_NR uarts ... */
+ if (line == UART_NR)
+ break;
+
+ }
+
+ enum_done = 1;
+
+ grlib_apbuart_driver.nr = grlib_apbuart_port_nr = line;
+}
+
+static int __init grlib_apbuart_init(void)
+{
+ int ret;
+
+ /* Find all APBUARTS in device the tree and initialize their ports */
+ grlib_apbuart_configure();
+
+ printk(KERN_INFO "Serial: GRLIB APBUART driver\n");
+
+ ret = uart_register_driver(&grlib_apbuart_driver);
+
+ if (ret) {
+ printk(KERN_ERR "%s: uart_register_driver failed (%i)\n",
+ __FILE__, ret);
+ return ret;
+ }
+
+ ret = of_register_platform_driver(&grlib_apbuart_of_driver);
+ if (ret) {
+ printk(KERN_ERR
+ "%s: of_register_platform_driver failed (%i)\n",
+ __FILE__, ret);
+ uart_unregister_driver(&grlib_apbuart_driver);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void __exit grlib_apbuart_exit(void)
+{
+ int i;
+
+ for (i = 0; i < grlib_apbuart_port_nr; i++)
+ uart_remove_one_port(&grlib_apbuart_driver,
+ &grlib_apbuart_ports[i]);
+
+ uart_unregister_driver(&grlib_apbuart_driver);
+ of_unregister_platform_driver(&grlib_apbuart_of_driver);
+}
+
+module_init(grlib_apbuart_init);
+module_exit(grlib_apbuart_exit);
+
+MODULE_AUTHOR("Aeroflex Gaisler AB");
+MODULE_DESCRIPTION("GRLIB APBUART serial driver");
+MODULE_VERSION("2.1");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/apbuart.h b/drivers/serial/apbuart.h
new file mode 100644
index 000000000000..5faf87c8d2bc
--- /dev/null
+++ b/drivers/serial/apbuart.h
@@ -0,0 +1,64 @@
+#ifndef __GRLIB_APBUART_H__
+#define __GRLIB_APBUART_H__
+
+#include <asm/io.h>
+
+#define UART_NR 8
+static int grlib_apbuart_port_nr;
+
+struct grlib_apbuart_regs_map {
+ u32 data;
+ u32 status;
+ u32 ctrl;
+ u32 scaler;
+};
+
+struct amba_prom_registers {
+ unsigned int phys_addr;
+ unsigned int reg_size;
+};
+
+/*
+ * The following defines the bits in the APBUART Status Registers.
+ */
+#define UART_STATUS_DR 0x00000001 /* Data Ready */
+#define UART_STATUS_TSE 0x00000002 /* TX Send Register Empty */
+#define UART_STATUS_THE 0x00000004 /* TX Hold Register Empty */
+#define UART_STATUS_BR 0x00000008 /* Break Error */
+#define UART_STATUS_OE 0x00000010 /* RX Overrun Error */
+#define UART_STATUS_PE 0x00000020 /* RX Parity Error */
+#define UART_STATUS_FE 0x00000040 /* RX Framing Error */
+#define UART_STATUS_ERR 0x00000078 /* Error Mask */
+
+/*
+ * The following defines the bits in the APBUART Ctrl Registers.
+ */
+#define UART_CTRL_RE 0x00000001 /* Receiver enable */
+#define UART_CTRL_TE 0x00000002 /* Transmitter enable */
+#define UART_CTRL_RI 0x00000004 /* Receiver interrupt enable */
+#define UART_CTRL_TI 0x00000008 /* Transmitter irq */
+#define UART_CTRL_PS 0x00000010 /* Parity select */
+#define UART_CTRL_PE 0x00000020 /* Parity enable */
+#define UART_CTRL_FL 0x00000040 /* Flow control enable */
+#define UART_CTRL_LB 0x00000080 /* Loopback enable */
+
+#define APBBASE(port) ((struct grlib_apbuart_regs_map *)((port)->membase))
+
+#define APBBASE_DATA_P(port) (&(APBBASE(port)->data))
+#define APBBASE_STATUS_P(port) (&(APBBASE(port)->status))
+#define APBBASE_CTRL_P(port) (&(APBBASE(port)->ctrl))
+#define APBBASE_SCALAR_P(port) (&(APBBASE(port)->scaler))
+
+#define UART_GET_CHAR(port) (__raw_readl(APBBASE_DATA_P(port)))
+#define UART_PUT_CHAR(port, v) (__raw_writel(v, APBBASE_DATA_P(port)))
+#define UART_GET_STATUS(port) (__raw_readl(APBBASE_STATUS_P(port)))
+#define UART_PUT_STATUS(port, v)(__raw_writel(v, APBBASE_STATUS_P(port)))
+#define UART_GET_CTRL(port) (__raw_readl(APBBASE_CTRL_P(port)))
+#define UART_PUT_CTRL(port, v) (__raw_writel(v, APBBASE_CTRL_P(port)))
+#define UART_GET_SCAL(port) (__raw_readl(APBBASE_SCALAR_P(port)))
+#define UART_PUT_SCAL(port, v) (__raw_writel(v, APBBASE_SCALAR_P(port)))
+
+#define UART_RX_DATA(s) (((s) & UART_STATUS_DR) != 0)
+#define UART_TX_READY(s) (((s) & UART_STATUS_THE) != 0)
+
+#endif /* __GRLIB_APBUART_H__ */
diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c
index c99f0821cae3..73f089d3efd6 100644
--- a/drivers/serial/s3c2410.c
+++ b/drivers/serial/s3c2410.c
@@ -2,7 +2,7 @@
*
* Driver for Samsung S3C2410 SoC onboard UARTs.
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/serial/s3c2412.c b/drivers/serial/s3c2412.c
index 6e057d8809d3..ce75e28e36ef 100644
--- a/drivers/serial/s3c2412.c
+++ b/drivers/serial/s3c2412.c
@@ -2,7 +2,7 @@
*
* Driver for Samsung S3C2412 and S3C2413 SoC onboard UARTs.
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/serial/s3c2440.c b/drivers/serial/s3c2440.c
index 69ff5d340f04..094cc3904b13 100644
--- a/drivers/serial/s3c2440.c
+++ b/drivers/serial/s3c2440.c
@@ -2,7 +2,7 @@
*
* Driver for Samsung S3C2440 and S3C2442 SoC onboard UARTs.
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/serial/s3c24a0.c b/drivers/serial/s3c24a0.c
index 26c49e18bdd1..fad6083ca427 100644
--- a/drivers/serial/s3c24a0.c
+++ b/drivers/serial/s3c24a0.c
@@ -6,7 +6,7 @@
*
* Author: Sandeep Patil <sandeep.patil@azingo.com>
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c
index 1523e8d9ae77..52e3df113ec0 100644
--- a/drivers/serial/samsung.c
+++ b/drivers/serial/samsung.c
@@ -2,7 +2,7 @@
*
* Driver core for Samsung SoC onboard UARTs.
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/serial/samsung.h b/drivers/serial/samsung.h
index d3fe315969f6..1fb22343df42 100644
--- a/drivers/serial/samsung.h
+++ b/drivers/serial/samsung.h
@@ -2,7 +2,7 @@
*
* Driver for Samsung SoC onboard UARTs.
*
- * Ben Dooks, Copyright (c) 2003-2005,2008 Simtec Electronics
+ * Ben Dooks, Copyright (c) 2003-2008 Simtec Electronics
* http://armlinux.simtec.co.uk/
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index ba1a872b221e..bf5f95a19413 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -35,8 +35,8 @@
#include <linux/spi/spi.h>
-#include <mach/dma.h>
-#include <mach/clock.h>
+#include <plat/dma.h>
+#include <plat/clock.h>
#define OMAP2_MCSPI_MAX_FREQ 48000000
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index e75ba9b28898..6c3a8557db27 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -51,8 +51,8 @@
#include <asm/io.h>
#include <asm/mach-types.h>
-#include <mach/mux.h>
-#include <mach/omap730.h> /* OMAP730_IO_CONF registers */
+#include <plat/mux.h>
+#include <plat/omap7xx.h> /* OMAP7XX_IO_CONF registers */
/* FIXME address is now a platform device resource,
@@ -504,7 +504,7 @@ static int __init uwire_probe(struct platform_device *pdev)
}
clk_enable(uwire->ck);
- if (cpu_is_omap730())
+ if (cpu_is_omap7xx())
uwire_idx_shift = 1;
else
uwire_idx_shift = 2;
@@ -573,8 +573,8 @@ static int __init omap_uwire_init(void)
}
if (machine_is_omap_perseus2()) {
/* configure pins: MPU_UW_nSCS1, MPU_UW_SDO, MPU_UW_SCLK */
- int val = omap_readl(OMAP730_IO_CONF_9) & ~0x00EEE000;
- omap_writel(val | 0x00AAA000, OMAP730_IO_CONF_9);
+ int val = omap_readl(OMAP7XX_IO_CONF_9) & ~0x00EEE000;
+ omap_writel(val | 0x00AAA000, OMAP7XX_IO_CONF_9);
}
return platform_driver_probe(&uwire_driver, uwire_probe);
diff --git a/drivers/staging/arlan/arlan-proc.c b/drivers/staging/arlan/arlan-proc.c
index a8b689635a3b..b22983e6c0cf 100644
--- a/drivers/staging/arlan/arlan-proc.c
+++ b/drivers/staging/arlan/arlan-proc.c
@@ -816,84 +816,83 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write,
/* Place files in /proc/sys/dev/arlan */
-#define CTBLN(num,card,nam) \
- { .ctl_name = num,\
- .procname = #nam,\
+#define CTBLN(card,nam) \
+ { .procname = #nam,\
.data = &(arlan_conf[card].nam),\
- .maxlen = sizeof(int), .mode = 0600, .proc_handler = &proc_dointvec}
+ .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec}
#ifdef ARLAN_DEBUGGING
#define ARLAN_PROC_DEBUG_ENTRIES \
- { .ctl_name = 48, .procname = "entry_exit_debug",\
+ { .procname = "entry_exit_debug",\
.data = &arlan_entry_and_exit_debug,\
- .maxlen = sizeof(int), .mode = 0600, .proc_handler = &proc_dointvec},\
- { .ctl_name = 49, .procname = "debug", .data = &arlan_debug,\
- .maxlen = sizeof(int), .mode = 0600, .proc_handler = &proc_dointvec},
+ .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec},\
+ { .procname = "debug", .data = &arlan_debug,\
+ .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec},
#else
#define ARLAN_PROC_DEBUG_ENTRIES
#endif
#define ARLAN_SYSCTL_TABLE_TOTAL(cardNo)\
- CTBLN(1,cardNo,spreadingCode),\
- CTBLN(2,cardNo, channelNumber),\
- CTBLN(3,cardNo, scramblingDisable),\
- CTBLN(4,cardNo, txAttenuation),\
- CTBLN(5,cardNo, systemId), \
- CTBLN(6,cardNo, maxDatagramSize),\
- CTBLN(7,cardNo, maxFrameSize),\
- CTBLN(8,cardNo, maxRetries),\
- CTBLN(9,cardNo, receiveMode),\
- CTBLN(10,cardNo, priority),\
- CTBLN(11,cardNo, rootOrRepeater),\
- CTBLN(12,cardNo, SID),\
- CTBLN(13,cardNo, registrationMode),\
- CTBLN(14,cardNo, registrationFill),\
- CTBLN(15,cardNo, localTalkAddress),\
- CTBLN(16,cardNo, codeFormat),\
- CTBLN(17,cardNo, numChannels),\
- CTBLN(18,cardNo, channel1),\
- CTBLN(19,cardNo, channel2),\
- CTBLN(20,cardNo, channel3),\
- CTBLN(21,cardNo, channel4),\
- CTBLN(22,cardNo, txClear),\
- CTBLN(23,cardNo, txRetries),\
- CTBLN(24,cardNo, txRouting),\
- CTBLN(25,cardNo, txScrambled),\
- CTBLN(26,cardNo, rxParameter),\
- CTBLN(27,cardNo, txTimeoutMs),\
- CTBLN(28,cardNo, waitCardTimeout),\
- CTBLN(29,cardNo, channelSet), \
- {.ctl_name = 30, .procname = "name",\
+ CTBLN(cardNo,spreadingCode),\
+ CTBLN(cardNo, channelNumber),\
+ CTBLN(cardNo, scramblingDisable),\
+ CTBLN(cardNo, txAttenuation),\
+ CTBLN(cardNo, systemId), \
+ CTBLN(cardNo, maxDatagramSize),\
+ CTBLN(cardNo, maxFrameSize),\
+ CTBLN(cardNo, maxRetries),\
+ CTBLN(cardNo, receiveMode),\
+ CTBLN(cardNo, priority),\
+ CTBLN(cardNo, rootOrRepeater),\
+ CTBLN(cardNo, SID),\
+ CTBLN(cardNo, registrationMode),\
+ CTBLN(cardNo, registrationFill),\
+ CTBLN(cardNo, localTalkAddress),\
+ CTBLN(cardNo, codeFormat),\
+ CTBLN(cardNo, numChannels),\
+ CTBLN(cardNo, channel1),\
+ CTBLN(cardNo, channel2),\
+ CTBLN(cardNo, channel3),\
+ CTBLN(cardNo, channel4),\
+ CTBLN(cardNo, txClear),\
+ CTBLN(cardNo, txRetries),\
+ CTBLN(cardNo, txRouting),\
+ CTBLN(cardNo, txScrambled),\
+ CTBLN(cardNo, rxParameter),\
+ CTBLN(cardNo, txTimeoutMs),\
+ CTBLN(cardNo, waitCardTimeout),\
+ CTBLN(cardNo, channelSet), \
+ { .procname = "name",\
.data = arlan_conf[cardNo].siteName,\
- .maxlen = 16, .mode = 0600, .proc_handler = &proc_dostring},\
- CTBLN(31,cardNo,waitTime),\
- CTBLN(32,cardNo,lParameter),\
- CTBLN(33,cardNo,_15),\
- CTBLN(34,cardNo,headerSize),\
- CTBLN(36,cardNo,tx_delay_ms),\
- CTBLN(37,cardNo,retries),\
- CTBLN(38,cardNo,ReTransmitPacketMaxSize),\
- CTBLN(39,cardNo,waitReTransmitPacketMaxSize),\
- CTBLN(40,cardNo,fastReTransCount),\
- CTBLN(41,cardNo,driverRetransmissions),\
- CTBLN(42,cardNo,txAckTimeoutMs),\
- CTBLN(43,cardNo,registrationInterrupts),\
- CTBLN(44,cardNo,hardwareType),\
- CTBLN(45,cardNo,radioType),\
- CTBLN(46,cardNo,writeEEPROM),\
- CTBLN(47,cardNo,writeRadioType),\
+ .maxlen = 16, .mode = 0600, .proc_handler = proc_dostring},\
+ CTBLN(cardNo,waitTime),\
+ CTBLN(cardNo,lParameter),\
+ CTBLN(cardNo,_15),\
+ CTBLN(cardNo,headerSize),\
+ CTBLN(cardNo,tx_delay_ms),\
+ CTBLN(cardNo,retries),\
+ CTBLN(cardNo,ReTransmitPacketMaxSize),\
+ CTBLN(cardNo,waitReTransmitPacketMaxSize),\
+ CTBLN(cardNo,fastReTransCount),\
+ CTBLN(cardNo,driverRetransmissions),\
+ CTBLN(cardNo,txAckTimeoutMs),\
+ CTBLN(cardNo,registrationInterrupts),\
+ CTBLN(cardNo,hardwareType),\
+ CTBLN(cardNo,radioType),\
+ CTBLN(cardNo,writeEEPROM),\
+ CTBLN(cardNo,writeRadioType),\
ARLAN_PROC_DEBUG_ENTRIES\
- CTBLN(50,cardNo,in_speed),\
- CTBLN(51,cardNo,out_speed),\
- CTBLN(52,cardNo,in_speed10),\
- CTBLN(53,cardNo,out_speed10),\
- CTBLN(54,cardNo,in_speed_max),\
- CTBLN(55,cardNo,out_speed_max),\
- CTBLN(56,cardNo,measure_rate),\
- CTBLN(57,cardNo,pre_Command_Wait),\
- CTBLN(58,cardNo,rx_tweak1),\
- CTBLN(59,cardNo,rx_tweak2),\
- CTBLN(60,cardNo,tx_queue_len),\
+ CTBLN(cardNo,in_speed),\
+ CTBLN(cardNo,out_speed),\
+ CTBLN(cardNo,in_speed10),\
+ CTBLN(cardNo,out_speed10),\
+ CTBLN(cardNo,in_speed_max),\
+ CTBLN(cardNo,out_speed_max),\
+ CTBLN(cardNo,measure_rate),\
+ CTBLN(cardNo,pre_Command_Wait),\
+ CTBLN(cardNo,rx_tweak1),\
+ CTBLN(cardNo,rx_tweak2),\
+ CTBLN(cardNo,tx_queue_len),\
@@ -903,63 +902,56 @@ static ctl_table arlan_conf_table0[] =
#ifdef ARLAN_PROC_SHM_DUMP
{
- .ctl_name = 150,
.procname = "arlan0-txRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_infotxRing,
+ .proc_handler = arlan_sysctl_infotxRing,
},
{
- .ctl_name = 151,
.procname = "arlan0-rxRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_inforxRing,
+ .proc_handler = arlan_sysctl_inforxRing,
},
{
- .ctl_name = 152,
.procname = "arlan0-18",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info18,
+ .proc_handler = arlan_sysctl_info18,
},
{
- .ctl_name = 153,
.procname = "arlan0-ring",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info161719,
+ .proc_handler = arlan_sysctl_info161719,
},
{
- .ctl_name = 154,
.procname = "arlan0-shm-cpy",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info,
+ .proc_handler = arlan_sysctl_info,
},
#endif
{
- .ctl_name = 155,
.procname = "config0",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_configure
+ .proc_handler = arlan_configure
},
{
- .ctl_name = 156,
.procname = "reset0",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_sysctl_reset,
+ .proc_handler = arlan_sysctl_reset,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table arlan_conf_table1[] =
@@ -969,63 +961,56 @@ static ctl_table arlan_conf_table1[] =
#ifdef ARLAN_PROC_SHM_DUMP
{
- .ctl_name = 150,
.procname = "arlan1-txRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_infotxRing,
+ .proc_handler = arlan_sysctl_infotxRing,
},
{
- .ctl_name = 151,
.procname = "arlan1-rxRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_inforxRing,
+ .proc_handler = arlan_sysctl_inforxRing,
},
{
- .ctl_name = 152,
.procname = "arlan1-18",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info18,
+ .proc_handler = arlan_sysctl_info18,
},
{
- .ctl_name = 153,
.procname = "arlan1-ring",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info161719,
+ .proc_handler = arlan_sysctl_info161719,
},
{
- .ctl_name = 154,
.procname = "arlan1-shm-cpy",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info,
+ .proc_handler = arlan_sysctl_info,
},
#endif
{
- .ctl_name = 155,
.procname = "config1",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_configure,
+ .proc_handler = arlan_configure,
},
{
- .ctl_name = 156,
.procname = "reset1",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_sysctl_reset,
+ .proc_handler = arlan_sysctl_reset,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table arlan_conf_table2[] =
@@ -1035,63 +1020,56 @@ static ctl_table arlan_conf_table2[] =
#ifdef ARLAN_PROC_SHM_DUMP
{
- .ctl_name = 150,
.procname = "arlan2-txRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_infotxRing,
+ .proc_handler = arlan_sysctl_infotxRing,
},
{
- .ctl_name = 151,
.procname = "arlan2-rxRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_inforxRing,
+ .proc_handler = arlan_sysctl_inforxRing,
},
{
- .ctl_name = 152,
.procname = "arlan2-18",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info18,
+ .proc_handler = arlan_sysctl_info18,
},
{
- .ctl_name = 153,
.procname = "arlan2-ring",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info161719,
+ .proc_handler = arlan_sysctl_info161719,
},
{
- .ctl_name = 154,
.procname = "arlan2-shm-cpy",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info,
+ .proc_handler = arlan_sysctl_info,
},
#endif
{
- .ctl_name = 155,
.procname = "config2",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_configure,
+ .proc_handler = arlan_configure,
},
{
- .ctl_name = 156,
.procname = "reset2",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_sysctl_reset,
+ .proc_handler = arlan_sysctl_reset,
},
- { .ctl_name = 0 }
+ { }
};
static ctl_table arlan_conf_table3[] =
@@ -1101,63 +1079,56 @@ static ctl_table arlan_conf_table3[] =
#ifdef ARLAN_PROC_SHM_DUMP
{
- .ctl_name = 150,
.procname = "arlan3-txRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_infotxRing,
+ .proc_handler = arlan_sysctl_infotxRing,
},
{
- .ctl_name = 151,
.procname = "arlan3-rxRing",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_inforxRing,
+ .proc_handler = arlan_sysctl_inforxRing,
},
{
- .ctl_name = 152,
.procname = "arlan3-18",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info18,
+ .proc_handler = arlan_sysctl_info18,
},
{
- .ctl_name = 153,
.procname = "arlan3-ring",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info161719,
+ .proc_handler = arlan_sysctl_info161719,
},
{
- .ctl_name = 154,
.procname = "arlan3-shm-cpy",
.data = &arlan_drive_info,
.maxlen = ARLAN_STR_SIZE,
.mode = 0400,
- .proc_handler = &arlan_sysctl_info,
+ .proc_handler = arlan_sysctl_info,
},
#endif
{
- .ctl_name = 155,
.procname = "config3",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_configure,
+ .proc_handler = arlan_configure,
},
{
- .ctl_name = 156,
.procname = "reset3",
.data = &conf_reset_result,
.maxlen = 100,
.mode = 0400,
- .proc_handler = &arlan_sysctl_reset,
+ .proc_handler = arlan_sysctl_reset,
},
- { .ctl_name = 0 }
+ { }
};
@@ -1165,41 +1136,37 @@ static ctl_table arlan_conf_table3[] =
static ctl_table arlan_table[] =
{
{
- .ctl_name = 0,
.procname = "arlan0",
.maxlen = 0,
.mode = 0600,
.child = arlan_conf_table0,
},
{
- .ctl_name = 0,
.procname = "arlan1",
.maxlen = 0,
.mode = 0600,
.child = arlan_conf_table1,
},
{
- .ctl_name = 0,
.procname = "arlan2",
.maxlen = 0,
.mode = 0600,
.child = arlan_conf_table2,
},
{
- .ctl_name = 0,
.procname = "arlan3",
.maxlen = 0,
.mode = 0600,
.child = arlan_conf_table3,
},
- { .ctl_name = 0 }
+ { }
};
#else
-static ctl_table arlan_table[MAX_ARLANS + 1] =
+static ctl_table arlan_table[] =
{
- { .ctl_name = 0 }
+ { }
};
#endif
@@ -1209,22 +1176,14 @@ static ctl_table arlan_table[MAX_ARLANS + 1] =
static ctl_table arlan_root_table[] =
{
{
- .ctl_name = CTL_ARLAN,
.procname = "arlan",
.maxlen = 0,
.mode = 0555,
.child = arlan_table,
},
- { .ctl_name = 0 }
+ { }
};
-/* Make sure that /proc/sys/dev is there */
-//static ctl_table arlan_device_root_table[] =
-//{
-// {CTL_DEV, "dev", NULL, 0, 0555, arlan_root_table},
-// {0}
-//};
-
static struct ctl_table_header *arlan_device_sysctl_header;
@@ -1234,8 +1193,6 @@ int __init init_arlan_proc(void)
int i = 0;
if (arlan_device_sysctl_header)
return 0;
- for (i = 0; i < MAX_ARLANS && arlan_device[i]; i++)
- arlan_table[i].ctl_name = i + 1;
arlan_device_sysctl_header = register_sysctl_table(arlan_root_table);
if (!arlan_device_sysctl_header)
return -1;
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index c94de3139223..f69b7783027f 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -143,7 +143,6 @@ static int pohmelfs_writepages(struct address_space *mapping, struct writeback_c
struct inode *inode = mapping->host;
struct pohmelfs_inode *pi = POHMELFS_I(inode);
struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb);
- struct backing_dev_info *bdi = mapping->backing_dev_info;
int err = 0;
int done = 0;
int nr_pages;
@@ -152,11 +151,6 @@ static int pohmelfs_writepages(struct address_space *mapping, struct writeback_c
int scanned = 0;
int range_whole = 0;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- return 0;
- }
-
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
@@ -248,10 +242,6 @@ retry:
if (wbc->nr_to_write <= 0)
done = 1;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- done = 1;
- }
continue;
out_continue:
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index a2db0e174f2c..f81e4f025f23 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -52,9 +52,9 @@
#include <asm/unaligned.h>
#include <asm/mach-types.h>
-#include <mach/dma.h>
-#include <mach/usb.h>
-#include <mach/control.h>
+#include <plat/dma.h>
+#include <plat/usb.h>
+#include <plat/control.h>
#include "omap_udc.h"
@@ -2098,6 +2098,7 @@ static inline int machine_without_vbus_sense(void)
|| machine_is_omap_h4()
#endif
|| machine_is_sx1()
+ || cpu_is_omap7xx() /* No known omap7xx boards with vbus sense */
);
}
@@ -2838,6 +2839,16 @@ static int __init omap_udc_probe(struct platform_device *pdev)
udelay(100);
}
+ if (cpu_is_omap7xx()) {
+ dc_clk = clk_get(&pdev->dev, "usb_dc_ck");
+ hhc_clk = clk_get(&pdev->dev, "l3_ocpi_ck");
+ BUG_ON(IS_ERR(dc_clk) || IS_ERR(hhc_clk));
+ /* can't use omap_udc_enable_clock yet */
+ clk_enable(dc_clk);
+ clk_enable(hhc_clk);
+ udelay(100);
+ }
+
INFO("OMAP UDC rev %d.%d%s\n",
omap_readw(UDC_REV) >> 4, omap_readw(UDC_REV) & 0xf,
config->otg ? ", Mini-AB" : "");
@@ -2970,7 +2981,7 @@ known:
goto cleanup3;
}
#endif
- if (cpu_is_omap16xx()) {
+ if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
udc->dc_clk = dc_clk;
udc->hhc_clk = hhc_clk;
clk_disable(hhc_clk);
@@ -3008,7 +3019,7 @@ cleanup0:
if (xceiv)
otg_put_transceiver(xceiv);
- if (cpu_is_omap16xx() || cpu_is_omap24xx()) {
+ if (cpu_is_omap16xx() || cpu_is_omap24xx() || cpu_is_omap7xx()) {
clk_disable(hhc_clk);
clk_disable(dc_clk);
clk_put(hhc_clk);
@@ -3115,6 +3126,10 @@ static struct platform_driver udc_driver = {
static int __init udc_init(void)
{
+ /* Disable DMA for omap7xx -- it doesn't work right. */
+ if (cpu_is_omap7xx())
+ use_dma = 0;
+
INFO("%s, version: " DRIVER_VERSION
#ifdef USE_ISO
" (iso)"
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 83cbecd2a1ed..5645f70b9214 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -24,10 +24,10 @@
#include <asm/io.h>
#include <asm/mach-types.h>
-#include <mach/mux.h>
+#include <plat/mux.h>
#include <mach/irqs.h>
-#include <mach/fpga.h>
-#include <mach/usb.h>
+#include <plat/fpga.h>
+#include <plat/usb.h>
/* OMAP-1510 OHCI has its own MMU for DMA */
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 34875201ee04..6761d2088db8 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -35,7 +35,7 @@
#include <asm/mach-types.h>
#include <mach/hardware.h>
-#include <mach/mux.h>
+#include <plat/mux.h>
#include "musb_core.h"
#include "omap2430.h"
diff --git a/drivers/usb/musb/omap2430.h b/drivers/usb/musb/omap2430.h
index dc7670718cd2..fbede7798aed 100644
--- a/drivers/usb/musb/omap2430.h
+++ b/drivers/usb/musb/omap2430.h
@@ -12,7 +12,7 @@
#if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3430)
#include <mach/hardware.h>
-#include <mach/usb.h>
+#include <plat/usb.h>
/*
* OMAP2430-specific definitions
diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index 7e073a0d7ac9..e13c77052e5e 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -15,8 +15,8 @@
#include <linux/usb.h>
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
-#include <mach/dma.h>
-#include <mach/mux.h>
+#include <plat/dma.h>
+#include <plat/mux.h>
#include "musb_core.h"
diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index 77a5f4188999..d54460a88173 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -36,8 +36,8 @@
#include <asm/irq.h>
#include <asm/mach-types.h>
-#include <mach/usb.h>
-#include <mach/mux.h>
+#include <plat/usb.h>
+#include <plat/mux.h>
#ifndef DEBUG
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 188e1ba3b69f..6b89eb55ed32 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -5,6 +5,9 @@
menu "Graphics support"
depends on HAS_IOMEM
+config HAVE_FB_ATMEL
+ bool
+
source "drivers/char/agp/Kconfig"
source "drivers/gpu/vga/Kconfig"
@@ -937,7 +940,7 @@ config FB_S1D13XXX
config FB_ATMEL
tristate "AT91/AT32 LCD Controller support"
- depends on FB && (ARCH_AT91SAM9261 || ARCH_AT91SAM9G10 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45 || ARCH_AT91CAP9 || AVR32)
+ depends on FB && HAVE_FB_ATMEL
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 37624f74e88b..b7687c55fe16 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -2242,6 +2242,9 @@ static int ext_setcolreg(unsigned int regno, unsigned int red,
if (!external_vgaiobase)
return 1;
+ if (regno > 255)
+ return 1;
+
switch (external_card_type) {
case IS_VGA:
OUTB(0x3c8, regno);
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 701a1081e199..7fcb0eb54c60 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -25,6 +25,7 @@
#define DA9034_WLED_CONTROL1 0x3C
#define DA9034_WLED_CONTROL2 0x3D
+#define DA9034_WLED_ISET(x) ((x) & 0x1f)
#define DA9034_WLED_BOOST_EN (1 << 5)
@@ -101,6 +102,7 @@ static struct backlight_ops da903x_backlight_ops = {
static int da903x_backlight_probe(struct platform_device *pdev)
{
+ struct da9034_backlight_pdata *pdata = pdev->dev.platform_data;
struct da903x_backlight_data *data;
struct backlight_device *bl;
int max_brightness;
@@ -127,6 +129,11 @@ static int da903x_backlight_probe(struct platform_device *pdev)
data->da903x_dev = pdev->dev.parent;
data->current_brightness = 0;
+ /* adjust the WLED output current */
+ if (pdata)
+ da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
+ DA9034_WLED_ISET(pdata->output_current));
+
bl = backlight_device_register(pdev->name, data->da903x_dev,
data, &da903x_backlight_ops);
if (IS_ERR(bl)) {
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index cbad67e89826..8693e5fcd2eb 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -26,8 +26,8 @@
#include <linux/backlight.h>
#include <mach/hardware.h>
-#include <mach/board.h>
-#include <mach/mux.h>
+#include <plat/board.h>
+#include <plat/mux.h>
#define OMAPBL_MAX_INTENSITY 0xff
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index bbfb502add67..4a3d46e08016 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -367,6 +367,7 @@ static int __devinit tdo24m_probe(struct spi_device *spi)
spi_message_init(m);
+ x->cs_change = 1;
x->tx_buf = &lcd->buf[0];
spi_message_add_tail(x, m);
diff --git a/drivers/video/omap/Makefile b/drivers/video/omap/Makefile
index b63b198d1f03..49226a1b909e 100644
--- a/drivers/video/omap/Makefile
+++ b/drivers/video/omap/Makefile
@@ -35,6 +35,7 @@ objs-y$(CONFIG_MACH_OMAP3EVM) += lcd_omap3evm.o
objs-y$(CONFIG_MACH_OMAP3_BEAGLE) += lcd_omap3beagle.o
objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
objs-y$(CONFIG_MACH_OVERO) += lcd_overo.o
+objs-y$(CONFIG_MACH_HERALD) += lcd_htcherald.o
omapfb-objs := $(objs-yy)
diff --git a/drivers/video/omap/blizzard.c b/drivers/video/omap/blizzard.c
index 70dadf9d2334..f5d75f22cef9 100644
--- a/drivers/video/omap/blizzard.c
+++ b/drivers/video/omap/blizzard.c
@@ -26,9 +26,9 @@
#include <linux/delay.h>
#include <linux/clk.h>
-#include <mach/dma.h>
-#include <mach/omapfb.h>
-#include <mach/blizzard.h>
+#include <plat/dma.h>
+#include <plat/omapfb.h>
+#include <plat/blizzard.h>
#include "dispc.h"
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index f16e42154229..7c833db4f9b7 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -25,9 +25,9 @@
#include <linux/clk.h>
#include <linux/io.h>
-#include <mach/sram.h>
-#include <mach/omapfb.h>
-#include <mach/board.h>
+#include <plat/sram.h>
+#include <plat/omapfb.h>
+#include <plat/board.h>
#include "dispc.h"
@@ -204,6 +204,7 @@ static u32 inline dispc_read_reg(int idx)
/* Select RFBI or bypass mode */
static void enable_rfbi_mode(int enable)
{
+ void __iomem *rfbi_control;
u32 l;
l = dispc_read_reg(DISPC_CONTROL);
@@ -216,9 +217,15 @@ static void enable_rfbi_mode(int enable)
dispc_write_reg(DISPC_CONTROL, l);
/* Set bypass mode in RFBI module */
- l = __raw_readl(OMAP2_IO_ADDRESS(RFBI_CONTROL));
+ rfbi_control = ioremap(RFBI_CONTROL, SZ_1K);
+ if (!rfbi_control) {
+ pr_err("Unable to ioremap rfbi_control\n");
+ return;
+ }
+ l = __raw_readl(rfbi_control);
l |= enable ? 0 : (1 << 1);
- __raw_writel(l, OMAP2_IO_ADDRESS(RFBI_CONTROL));
+ __raw_writel(l, rfbi_control);
+ iounmap(rfbi_control);
}
static void set_lcd_data_lines(int data_lines)
@@ -1367,6 +1374,7 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
int r;
u32 l;
struct lcd_panel *panel = fbdev->panel;
+ void __iomem *ram_fw_base;
int tmo = 10000;
int skip_init = 0;
int i;
@@ -1441,7 +1449,13 @@ static int omap_dispc_init(struct omapfb_device *fbdev, int ext_mode,
}
/* L3 firewall setting: enable access to OCM RAM */
- __raw_writel(0x402000b0, OMAP2_IO_ADDRESS(0x680050a0));
+ ram_fw_base = ioremap(0x68005000, SZ_1K);
+ if (!ram_fw_base) {
+ dev_err(dispc.fbdev->dev, "Cannot ioremap to enable OCM RAM\n");
+ goto fail1;
+ }
+ __raw_writel(0x402000b0, ram_fw_base + 0xa0);
+ iounmap(ram_fw_base);
if ((r = alloc_palette_ram()) < 0)
goto fail2;
diff --git a/drivers/video/omap/hwa742.c b/drivers/video/omap/hwa742.c
index ca51583ec98a..17a975e4c9c9 100644
--- a/drivers/video/omap/hwa742.c
+++ b/drivers/video/omap/hwa742.c
@@ -26,9 +26,9 @@
#include <linux/delay.h>
#include <linux/clk.h>
-#include <mach/dma.h>
-#include <mach/omapfb.h>
-#include <mach/hwa742.h>
+#include <plat/dma.h>
+#include <plat/omapfb.h>
+#include <plat/hwa742.h>
#define HWA742_REV_CODE_REG 0x0
#define HWA742_CONFIG_REG 0x2
diff --git a/drivers/video/omap/lcd_2430sdp.c b/drivers/video/omap/lcd_2430sdp.c
index 393712b6f369..fea7feee0b77 100644
--- a/drivers/video/omap/lcd_2430sdp.c
+++ b/drivers/video/omap/lcd_2430sdp.c
@@ -27,8 +27,8 @@
#include <linux/gpio.h>
#include <linux/i2c/twl4030.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define SDP2430_LCD_PANEL_BACKLIGHT_GPIO 91
diff --git a/drivers/video/omap/lcd_ams_delta.c b/drivers/video/omap/lcd_ams_delta.c
index 1f7439955e02..3d5277252ca3 100644
--- a/drivers/video/omap/lcd_ams_delta.c
+++ b/drivers/video/omap/lcd_ams_delta.c
@@ -25,9 +25,9 @@
#include <linux/io.h>
#include <linux/delay.h>
-#include <mach/board-ams-delta.h>
+#include <plat/board-ams-delta.h>
#include <mach/hardware.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
#define AMS_DELTA_DEFAULT_CONTRAST 112
diff --git a/drivers/video/omap/lcd_apollon.c b/drivers/video/omap/lcd_apollon.c
index 626ae3a532ff..4c5cefc5153b 100644
--- a/drivers/video/omap/lcd_apollon.c
+++ b/drivers/video/omap/lcd_apollon.c
@@ -25,8 +25,8 @@
#include <linux/platform_device.h>
#include <mach/gpio.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
/* #define USE_35INCH_LCD 1 */
diff --git a/drivers/video/omap/lcd_h3.c b/drivers/video/omap/lcd_h3.c
index 417ae5efa8bb..240b4fb10741 100644
--- a/drivers/video/omap/lcd_h3.c
+++ b/drivers/video/omap/lcd_h3.c
@@ -24,7 +24,7 @@
#include <linux/i2c/tps65010.h>
#include <mach/gpio.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
#define MODULE_NAME "omapfb-lcd_h3"
diff --git a/drivers/video/omap/lcd_h4.c b/drivers/video/omap/lcd_h4.c
index 0c398bda7601..720625da1f4e 100644
--- a/drivers/video/omap/lcd_h4.c
+++ b/drivers/video/omap/lcd_h4.c
@@ -22,7 +22,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
static int h4_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
{
diff --git a/drivers/video/omap/lcd_htcherald.c b/drivers/video/omap/lcd_htcherald.c
new file mode 100644
index 000000000000..2e0c81ea7483
--- /dev/null
+++ b/drivers/video/omap/lcd_htcherald.c
@@ -0,0 +1,130 @@
+/*
+ * File: drivers/video/omap/lcd-htcherald.c
+ *
+ * LCD panel support for the HTC Herald
+ *
+ * Copyright (C) 2009 Cory Maccarrone <darkstar6262@gmail.com>
+ * Copyright (C) 2009 Wing Linux
+ *
+ * Based on the lcd_htcwizard.c file from the linwizard project:
+ * Copyright (C) linwizard.sourceforge.net
+ * Author: Angelo Arrifano <miknix@gmail.com>
+ * Based on lcd_h4 by Imre Deak <imre.deak@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <plat/omapfb.h>
+
+static int htcherald_panel_init(struct lcd_panel *panel,
+ struct omapfb_device *fbdev)
+{
+ return 0;
+}
+
+static void htcherald_panel_cleanup(struct lcd_panel *panel)
+{
+}
+
+static int htcherald_panel_enable(struct lcd_panel *panel)
+{
+ return 0;
+}
+
+static void htcherald_panel_disable(struct lcd_panel *panel)
+{
+}
+
+static unsigned long htcherald_panel_get_caps(struct lcd_panel *panel)
+{
+ return 0;
+}
+
+/* Found on WIZ200 (miknix) and some HERA110 models (darkstar62) */
+struct lcd_panel htcherald_panel_1 = {
+ .name = "lcd_herald",
+ .config = OMAP_LCDC_PANEL_TFT |
+ OMAP_LCDC_INV_HSYNC |
+ OMAP_LCDC_INV_VSYNC |
+ OMAP_LCDC_INV_PIX_CLOCK,
+ .bpp = 16,
+ .data_lines = 16,
+ .x_res = 240,
+ .y_res = 320,
+ .pixel_clock = 6093,
+ .pcd = 0, /* 15 */
+ .hsw = 10,
+ .hfp = 10,
+ .hbp = 20,
+ .vsw = 3,
+ .vfp = 2,
+ .vbp = 2,
+
+ .init = htcherald_panel_init,
+ .cleanup = htcherald_panel_cleanup,
+ .enable = htcherald_panel_enable,
+ .disable = htcherald_panel_disable,
+ .get_caps = htcherald_panel_get_caps,
+};
+
+static int htcherald_panel_probe(struct platform_device *pdev)
+{
+ omapfb_register_panel(&htcherald_panel_1);
+ return 0;
+}
+
+static int htcherald_panel_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static int htcherald_panel_suspend(struct platform_device *pdev,
+ pm_message_t mesg)
+{
+ return 0;
+}
+
+static int htcherald_panel_resume(struct platform_device *pdev)
+{
+ return 0;
+}
+
+struct platform_driver htcherald_panel_driver = {
+ .probe = htcherald_panel_probe,
+ .remove = htcherald_panel_remove,
+ .suspend = htcherald_panel_suspend,
+ .resume = htcherald_panel_resume,
+ .driver = {
+ .name = "lcd_htcherald",
+ .owner = THIS_MODULE,
+ },
+};
+
+static int htcherald_panel_drv_init(void)
+{
+ return platform_driver_register(&htcherald_panel_driver);
+}
+
+static void htcherald_panel_drv_cleanup(void)
+{
+ platform_driver_unregister(&htcherald_panel_driver);
+}
+
+module_init(htcherald_panel_drv_init);
+module_exit(htcherald_panel_drv_cleanup);
+
diff --git a/drivers/video/omap/lcd_inn1510.c b/drivers/video/omap/lcd_inn1510.c
index cdbd8bb607be..aafe9b497e2d 100644
--- a/drivers/video/omap/lcd_inn1510.c
+++ b/drivers/video/omap/lcd_inn1510.c
@@ -23,8 +23,8 @@
#include <linux/platform_device.h>
#include <linux/io.h>
-#include <mach/fpga.h>
-#include <mach/omapfb.h>
+#include <plat/fpga.h>
+#include <plat/omapfb.h>
static int innovator1510_panel_init(struct lcd_panel *panel,
struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_inn1610.c b/drivers/video/omap/lcd_inn1610.c
index 268f7f808a4e..0de338264a8a 100644
--- a/drivers/video/omap/lcd_inn1610.c
+++ b/drivers/video/omap/lcd_inn1610.c
@@ -23,7 +23,7 @@
#include <linux/platform_device.h>
#include <mach/gpio.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
#define MODULE_NAME "omapfb-lcd_h3"
diff --git a/drivers/video/omap/lcd_ldp.c b/drivers/video/omap/lcd_ldp.c
index dbfe8974fb94..6a260dfdadc5 100644
--- a/drivers/video/omap/lcd_ldp.c
+++ b/drivers/video/omap/lcd_ldp.c
@@ -27,8 +27,8 @@
#include <linux/i2c/twl4030.h>
#include <mach/gpio.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define LCD_PANEL_BACKLIGHT_GPIO (15 + OMAP_MAX_GPIO_LINES)
diff --git a/drivers/video/omap/lcd_mipid.c b/drivers/video/omap/lcd_mipid.c
index 918ee8934196..2162eb09e0fe 100644
--- a/drivers/video/omap/lcd_mipid.c
+++ b/drivers/video/omap/lcd_mipid.c
@@ -23,8 +23,8 @@
#include <linux/workqueue.h>
#include <linux/spi/spi.h>
-#include <mach/omapfb.h>
-#include <mach/lcd_mipid.h>
+#include <plat/omapfb.h>
+#include <plat/lcd_mipid.h>
#define MIPID_MODULE_NAME "lcd_mipid"
diff --git a/drivers/video/omap/lcd_omap2evm.c b/drivers/video/omap/lcd_omap2evm.c
index 7a2bbe2ecec3..e1a38abca3e7 100644
--- a/drivers/video/omap/lcd_omap2evm.c
+++ b/drivers/video/omap/lcd_omap2evm.c
@@ -26,8 +26,8 @@
#include <linux/gpio.h>
#include <linux/i2c/twl4030.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define LCD_PANEL_ENABLE_GPIO 154
diff --git a/drivers/video/omap/lcd_omap3beagle.c b/drivers/video/omap/lcd_omap3beagle.c
index 4011910123bf..ccec084ed647 100644
--- a/drivers/video/omap/lcd_omap3beagle.c
+++ b/drivers/video/omap/lcd_omap3beagle.c
@@ -25,8 +25,8 @@
#include <linux/gpio.h>
#include <linux/i2c/twl4030.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define LCD_PANEL_ENABLE_GPIO 170
diff --git a/drivers/video/omap/lcd_omap3evm.c b/drivers/video/omap/lcd_omap3evm.c
index b6a4c2c57a2f..556eb31db24c 100644
--- a/drivers/video/omap/lcd_omap3evm.c
+++ b/drivers/video/omap/lcd_omap3evm.c
@@ -25,8 +25,8 @@
#include <linux/gpio.h>
#include <linux/i2c/twl4030.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define LCD_PANEL_ENABLE_GPIO 153
diff --git a/drivers/video/omap/lcd_osk.c b/drivers/video/omap/lcd_osk.c
index b3fa88bc6269..bb21d7dca39e 100644
--- a/drivers/video/omap/lcd_osk.c
+++ b/drivers/video/omap/lcd_osk.c
@@ -24,8 +24,8 @@
#include <linux/platform_device.h>
#include <mach/gpio.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
static int osk_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev)
{
diff --git a/drivers/video/omap/lcd_overo.c b/drivers/video/omap/lcd_overo.c
index 2bc5c9268e5e..b0f86e514cde 100644
--- a/drivers/video/omap/lcd_overo.c
+++ b/drivers/video/omap/lcd_overo.c
@@ -24,8 +24,8 @@
#include <linux/i2c/twl4030.h>
#include <mach/gpio.h>
-#include <mach/mux.h>
-#include <mach/omapfb.h>
+#include <plat/mux.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
#define LCD_ENABLE 144
diff --git a/drivers/video/omap/lcd_palmte.c b/drivers/video/omap/lcd_palmte.c
index 4bf3c79f3cc7..d30289603ce8 100644
--- a/drivers/video/omap/lcd_palmte.c
+++ b/drivers/video/omap/lcd_palmte.c
@@ -23,8 +23,8 @@
#include <linux/platform_device.h>
#include <linux/io.h>
-#include <mach/fpga.h>
-#include <mach/omapfb.h>
+#include <plat/fpga.h>
+#include <plat/omapfb.h>
static int palmte_panel_init(struct lcd_panel *panel,
struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_palmtt.c b/drivers/video/omap/lcd_palmtt.c
index 48ea1f9f2cbf..557424fb6df1 100644
--- a/drivers/video/omap/lcd_palmtt.c
+++ b/drivers/video/omap/lcd_palmtt.c
@@ -30,7 +30,7 @@ GPIO13 - screen blanking
#include <linux/io.h>
#include <mach/gpio.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
static int palmtt_panel_init(struct lcd_panel *panel,
struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcd_palmz71.c b/drivers/video/omap/lcd_palmz71.c
index 0697d29b4d3b..5f4b5b2c1f41 100644
--- a/drivers/video/omap/lcd_palmz71.c
+++ b/drivers/video/omap/lcd_palmz71.c
@@ -24,7 +24,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
static int palmz71_panel_init(struct lcd_panel *panel,
struct omapfb_device *fbdev)
diff --git a/drivers/video/omap/lcdc.c b/drivers/video/omap/lcdc.c
index ab3949256677..5f32cafbf74c 100644
--- a/drivers/video/omap/lcdc.c
+++ b/drivers/video/omap/lcdc.c
@@ -29,8 +29,8 @@
#include <linux/vmalloc.h>
#include <linux/clk.h>
-#include <mach/dma.h>
-#include <mach/omapfb.h>
+#include <plat/dma.h>
+#include <plat/omapfb.h>
#include <asm/mach-types.h>
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 0d0c8c8b9b56..f900a43db8d7 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -28,8 +28,8 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
-#include <mach/dma.h>
-#include <mach/omapfb.h>
+#include <plat/dma.h>
+#include <plat/omapfb.h>
#include "lcdc.h"
#include "dispc.h"
diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c
index ee01e84e19c1..c90fa39486b4 100644
--- a/drivers/video/omap/rfbi.c
+++ b/drivers/video/omap/rfbi.c
@@ -27,7 +27,7 @@
#include <linux/clk.h>
#include <linux/io.h>
-#include <mach/omapfb.h>
+#include <plat/omapfb.h>
#include "dispc.h"
diff --git a/drivers/video/omap/sossi.c b/drivers/video/omap/sossi.c
index a76946220249..79dc84f09713 100644
--- a/drivers/video/omap/sossi.c
+++ b/drivers/video/omap/sossi.c
@@ -24,8 +24,8 @@
#include <linux/irq.h>
#include <linux/io.h>
-#include <mach/dma.h>
-#include <mach/omapfb.h>
+#include <plat/dma.h>
+#include <plat/omapfb.h>
#include "lcdc.h"
diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c
index 84d8327e47db..75285d3f393c 100644
--- a/drivers/video/pxa168fb.c
+++ b/drivers/video/pxa168fb.c
@@ -687,6 +687,7 @@ static int __init pxa168fb_probe(struct platform_device *pdev)
}
info->fix.smem_start = (unsigned long)fbi->fb_start_dma;
+ set_graphics_start(info, 0, 0);
/*
* Set video mode according to platform data.
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 1820c4a24434..f58a3aae6ea6 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -80,7 +80,8 @@
static int pxafb_activate_var(struct fb_var_screeninfo *var,
struct pxafb_info *);
static void set_ctrlr_state(struct pxafb_info *fbi, u_int state);
-static void setup_base_frame(struct pxafb_info *fbi, int branch);
+static void setup_base_frame(struct pxafb_info *fbi,
+ struct fb_var_screeninfo *var, int branch);
static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal,
unsigned long offset, size_t size);
@@ -397,6 +398,7 @@ static void pxafb_setmode(struct fb_var_screeninfo *var,
var->lower_margin = mode->lower_margin;
var->sync = mode->sync;
var->grayscale = mode->cmap_greyscale;
+ var->transp.length = mode->transparency;
/* set the initial RGBA bitfields */
pxafb_set_pixfmt(var, mode->depth);
@@ -531,12 +533,22 @@ static int pxafb_pan_display(struct fb_var_screeninfo *var,
struct fb_info *info)
{
struct pxafb_info *fbi = (struct pxafb_info *)info;
+ struct fb_var_screeninfo newvar;
int dma = DMA_MAX + DMA_BASE;
if (fbi->state != C_ENABLE)
return 0;
- setup_base_frame(fbi, 1);
+ /* Only take .xoffset, .yoffset and .vmode & FB_VMODE_YWRAP from what
+ * was passed in and copy the rest from the old screeninfo.
+ */
+ memcpy(&newvar, &fbi->fb.var, sizeof(newvar));
+ newvar.xoffset = var->xoffset;
+ newvar.yoffset = var->yoffset;
+ newvar.vmode &= ~FB_VMODE_YWRAP;
+ newvar.vmode |= var->vmode & FB_VMODE_YWRAP;
+
+ setup_base_frame(fbi, &newvar, 1);
if (fbi->lccr0 & LCCR0_SDS)
lcd_writel(fbi, FBR1, fbi->fdadr[dma + 1] | 0x1);
@@ -1052,9 +1064,10 @@ static int setup_frame_dma(struct pxafb_info *fbi, int dma, int pal,
return 0;
}
-static void setup_base_frame(struct pxafb_info *fbi, int branch)
+static void setup_base_frame(struct pxafb_info *fbi,
+ struct fb_var_screeninfo *var,
+ int branch)
{
- struct fb_var_screeninfo *var = &fbi->fb.var;
struct fb_fix_screeninfo *fix = &fbi->fb.fix;
int nbytes, dma, pal, bpp = var->bits_per_pixel;
unsigned long offset;
@@ -1332,7 +1345,7 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var,
#endif
setup_parallel_timing(fbi, var);
- setup_base_frame(fbi, 0);
+ setup_base_frame(fbi, var, 0);
fbi->reg_lccr0 = fbi->lccr0 |
(LCCR0_LDM | LCCR0_SFM | LCCR0_IUM | LCCR0_EFM |
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 3ed571a2ab18..429ea99eaee5 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -43,7 +43,7 @@
#include <linux/io.h>
#include <linux/uaccess.h>
#include <mach/hardware.h>
-#include <mach/prcm.h>
+#include <plat/prcm.h>
#include "omap_wdt.h"
diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c
index d3c824dc2358..c14ae8676903 100644
--- a/drivers/watchdog/riowd.c
+++ b/drivers/watchdog/riowd.c
@@ -10,7 +10,6 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
-#include <linux/smp_lock.h>
#include <linux/watchdog.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -75,7 +74,6 @@ static void riowd_writereg(struct riowd *p, u8 val, int index)
static int riowd_open(struct inode *inode, struct file *filp)
{
- cycle_kernel_lock();
nonseekable_open(inode, filp);
return 0;
}
@@ -194,6 +192,8 @@ static int __devinit riowd_probe(struct of_device *op,
printk(KERN_ERR PFX "Cannot map registers.\n");
goto out_free;
}
+ /* Make miscdev useable right away */
+ riowd_device = p;
err = misc_register(&riowd_miscdev);
if (err) {
@@ -205,10 +205,10 @@ static int __devinit riowd_probe(struct of_device *op,
"regs at %p\n", riowd_timeout, p->regs);
dev_set_drvdata(&op->dev, p);
- riowd_device = p;
return 0;
out_iounmap:
+ riowd_device = NULL;
of_iounmap(&op->resource[0], p->regs, 2);
out_free:
diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c
index e6c4390d8bd6..53180a37cc9a 100644
--- a/drivers/zorro/zorro-driver.c
+++ b/drivers/zorro/zorro-driver.c
@@ -156,5 +156,4 @@ postcore_initcall(zorro_driver_init);
EXPORT_SYMBOL(zorro_match_device);
EXPORT_SYMBOL(zorro_register_driver);
EXPORT_SYMBOL(zorro_unregister_driver);
-EXPORT_SYMBOL(zorro_dev_driver);
EXPORT_SYMBOL(zorro_bus_type);