From e7751617dd0599ceadf4221cb08e04307b00aa1f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Jun 2019 11:47:10 -0300 Subject: docs: blockdev: add it to the admin-guide The blockdev book basically contains user-faced documentation. Signed-off-by: Mauro Carvalho Chehab --- .../blockdev/drbd/DRBD-8.3-data-packets.svg | 588 +++++++++++++++++++++ .../blockdev/drbd/DRBD-data-packets.svg | 459 ++++++++++++++++ .../admin-guide/blockdev/drbd/conn-states-8.dot | 18 + .../blockdev/drbd/data-structure-v9.rst | 42 ++ .../admin-guide/blockdev/drbd/disk-states-8.dot | 16 + .../drbd/drbd-connection-state-overview.dot | 85 +++ .../admin-guide/blockdev/drbd/figures.rst | 28 + Documentation/admin-guide/blockdev/drbd/index.rst | 19 + .../admin-guide/blockdev/drbd/node-states-8.dot | 13 + Documentation/admin-guide/blockdev/floppy.rst | 255 +++++++++ Documentation/admin-guide/blockdev/index.rst | 14 + Documentation/admin-guide/blockdev/nbd.rst | 31 ++ Documentation/admin-guide/blockdev/paride.rst | 439 +++++++++++++++ Documentation/admin-guide/blockdev/ramdisk.rst | 177 +++++++ Documentation/admin-guide/blockdev/zram.rst | 422 +++++++++++++++ 15 files changed, 2606 insertions(+) create mode 100644 Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg create mode 100644 Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg create mode 100644 Documentation/admin-guide/blockdev/drbd/conn-states-8.dot create mode 100644 Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst create mode 100644 Documentation/admin-guide/blockdev/drbd/disk-states-8.dot create mode 100644 Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot create mode 100644 Documentation/admin-guide/blockdev/drbd/figures.rst create mode 100644 Documentation/admin-guide/blockdev/drbd/index.rst create mode 100644 Documentation/admin-guide/blockdev/drbd/node-states-8.dot create mode 100644 Documentation/admin-guide/blockdev/floppy.rst create mode 100644 Documentation/admin-guide/blockdev/index.rst create mode 100644 Documentation/admin-guide/blockdev/nbd.rst create mode 100644 Documentation/admin-guide/blockdev/paride.rst create mode 100644 Documentation/admin-guide/blockdev/ramdisk.rst create mode 100644 Documentation/admin-guide/blockdev/zram.rst (limited to 'Documentation/admin-guide/blockdev') diff --git a/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg new file mode 100644 index 000000000000..f87cfa0dc2fb --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -0,0 +1,588 @@ + + + + + + Master slide + + + + + + + + + + RSDataReply + + + + + + + CsumRSRequest + + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + + WriteAck + + + + got_BlockAck() + + + Checksum based Resync, case not in sync + + + DRBD-8.3 data flow + + + w_e_send_csum() + + + + + + + + RSIsInSync + + + + + + + CsumRSRequest + + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + got_IsInSync() + + + Checksum based Resync, case in sync + + + + + + + + + + OVReply + + + + + + + OVRequest + + + + receive_OVRequest() + + + drbd_endio_read_sec() + + + w_e_end_ov_req() + + + receive_OVReply() + + + drbd_endio_read_sec() + + + w_e_end_ov_reply() + + + + + + OVResult + + + + got_OVResult() + + + Online verify + + + w_make_ov_request() + + + + + + + + drbd_endio_read_sec() + + + w_make_resync_request() + + + w_e_send_csum() + + + + + drbd_endio_read_sec() + + + + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + diff --git a/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg new file mode 100644 index 000000000000..48a1e2165fec --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg @@ -0,0 +1,459 @@ + + + + + + Master slide + + + + + + + + + RSDataReply + + + + + RSDataRequest + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_rsdata_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + WriteAck + + + got_BlockAck() + + + Resync blocks, 4-32K + + + + + + + WriteAck + + + + + Data + + + drbd_make_request() + + + receive_Data() + + + drbd_endio_write_sec() + + + e_end_block() + + + got_BlockAck() + + + Regular mirrored write, 512-32K + + + w_send_dblock() + + + + + drbd_endio_write_pri() + + + + + + + DataReply + + + + + DataRequest + + + drbd_make_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_data_req() + + + Drawing + + receive_DataReply() + + + + Diskless read, 512-32K + + + w_send_read_req() + + + DRBD 8 data flow + + + + + + al_begin_io() + + + al_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + diff --git a/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot new file mode 100644 index 000000000000..025e8cf5e64a --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot @@ -0,0 +1,18 @@ +digraph conn_states { + StandAllone -> WFConnection [ label = "ioctl_set_net()" ] + WFConnection -> Unconnected [ label = "unable to bind()" ] + WFConnection -> WFReportParams [ label = "in connect() after accept" ] + WFReportParams -> StandAllone [ label = "checks in receive_param()" ] + WFReportParams -> Connected [ label = "in receive_param()" ] + WFReportParams -> WFBitMapS [ label = "sync_handshake()" ] + WFReportParams -> WFBitMapT [ label = "sync_handshake()" ] + WFBitMapS -> SyncSource [ label = "receive_bitmap()" ] + WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ] + SyncSource -> Connected + SyncTarget -> Connected + SyncSource -> PausedSyncS + SyncTarget -> PausedSyncT + PausedSyncS -> SyncSource + PausedSyncT -> SyncTarget + Connected -> WFConnection [ label = "* on network error" ] +} diff --git a/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst new file mode 100644 index 000000000000..66036b901644 --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst @@ -0,0 +1,42 @@ +================================ +kernel data structure for DRBD-9 +================================ + +This describes the in kernel data structure for DRBD-9. Starting with +Linux v3.14 we are reorganizing DRBD to use this data structure. + +Basic Data Structure +==================== + +A node has a number of DRBD resources. Each such resource has a number of +devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD +device is represented by a block device locally. + +The DRBD objects are interconnected to form a matrix as depicted below; a +drbd_peer_device object sits at each intersection between a drbd_device and a +drbd_connection:: + + /--------------+---------------+.....+---------------\ + | resource | device | | device | + +--------------+---------------+.....+---------------+ + | connection | peer_device | | peer_device | + +--------------+---------------+.....+---------------+ + : : : : : + : : : : : + +--------------+---------------+.....+---------------+ + | connection | peer_device | | peer_device | + \--------------+---------------+.....+---------------/ + +In this table, horizontally, devices can be accessed from resources by their +volume number. Likewise, peer_devices can be accessed from connections by +their volume number. Objects in the vertical direction are connected by double +linked lists. There are back pointers from peer_devices to their connections a +devices, and from connections and devices to their resource. + +All resources are in the drbd_resources double-linked list. In addition, all +devices can be accessed by their minor device number via the drbd_devices idr. + +The drbd_resource, drbd_connection, and drbd_device objects are reference +counted. The peer_device objects only serve to establish the links between +devices and connections; their lifetime is determined by the lifetime of the +device and connection which they reference. diff --git a/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot new file mode 100644 index 000000000000..d06cfb46fb98 --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot @@ -0,0 +1,16 @@ +digraph disk_states { + Diskless -> Inconsistent [ label = "ioctl_set_disk()" ] + Diskless -> Consistent [ label = "ioctl_set_disk()" ] + Diskless -> Outdated [ label = "ioctl_set_disk()" ] + Consistent -> Outdated [ label = "receive_param()" ] + Consistent -> UpToDate [ label = "receive_param()" ] + Consistent -> Inconsistent [ label = "start resync" ] + Outdated -> Inconsistent [ label = "start resync" ] + UpToDate -> Inconsistent [ label = "ioctl_replicate" ] + Inconsistent -> UpToDate [ label = "resync completed" ] + Consistent -> Failed [ label = "io completion error" ] + Outdated -> Failed [ label = "io completion error" ] + UpToDate -> Failed [ label = "io completion error" ] + Inconsistent -> Failed [ label = "io completion error" ] + Failed -> Diskless [ label = "sending notify to peer" ] +} diff --git a/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot new file mode 100644 index 000000000000..6d9cf0a7b11d --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot @@ -0,0 +1,85 @@ +// vim: set sw=2 sts=2 : +digraph { + rankdir=BT + bgcolor=white + + node [shape=plaintext] + node [fontcolor=black] + + StandAlone [ style=filled,fillcolor=gray,label=StandAlone ] + + node [fontcolor=lightgray] + + Unconnected [ label=Unconnected ] + + CommTrouble [ shape=record, + label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ] + + node [fontcolor=gray] + + subgraph cluster_try_connect { + label="try to connect, handshake" + rank=max + WFConnection [ label=WFConnection ] + WFReportParams [ label=WFReportParams ] + } + + TearDown [ label=TearDown ] + + Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ] + + node [fontcolor=lightblue] + + StartingSyncS [ label=StartingSyncS ] + StartingSyncT [ label=StartingSyncT ] + + subgraph cluster_bitmap_exchange { + node [fontcolor=red] + fontcolor=red + label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged" + + WFBitMapT [ label=WFBitMapT ] + WFSyncUUID [ label=WFSyncUUID ] + WFBitMapS [ label=WFBitMapS ] + } + + node [fontcolor=blue] + + cluster_resync [ shape=record,label="{resynchronisation process running\l'concurrent' application requests allowed|{{PausedSyncT\nSyncTarget}|{PausedSyncS\nSyncSource}}}" ] + + node [shape=box,fontcolor=black] + + // drbdadm [label="drbdadm connect"] + // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."] + // comm_error [label="communication trouble"] + + // + // edges + // -------------------------------------- + + StandAlone -> Unconnected [ label="drbdadm connect" ] + Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ] + Unconnected -> WFConnection [ label="receiver thread is started" ] + WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ] + + WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ] + WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ] + + WFReportParams -> WFBitMapS + WFReportParams -> WFBitMapT + WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false] + + WFBitMapS -> cluster_resync:S + WFSyncUUID -> cluster_resync:T + + edge [color=green] + cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ] + + edge [color=red] + WFReportParams -> CommTrouble + Connected -> CommTrouble + cluster_resync:any -> CommTrouble + edge [color=black] + CommTrouble -> Unconnected [label="receiver thread is stopped" ] + +} diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst new file mode 100644 index 000000000000..3e3fd4b8a478 --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/figures.rst @@ -0,0 +1,28 @@ +.. The here included files are intended to help understand the implementation + +Data flows that Relate some functions, and write packets +======================================================== + +.. kernel-figure:: DRBD-8.3-data-packets.svg + :alt: DRBD-8.3-data-packets.svg + :align: center + +.. kernel-figure:: DRBD-data-packets.svg + :alt: DRBD-data-packets.svg + :align: center + + +Sub graphs of DRBD's state transitions +====================================== + +.. kernel-figure:: conn-states-8.dot + :alt: conn-states-8.dot + :align: center + +.. kernel-figure:: disk-states-8.dot + :alt: disk-states-8.dot + :align: center + +.. kernel-figure:: node-states-8.dot + :alt: node-states-8.dot + :align: center diff --git a/Documentation/admin-guide/blockdev/drbd/index.rst b/Documentation/admin-guide/blockdev/drbd/index.rst new file mode 100644 index 000000000000..68ecd5c113e9 --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/index.rst @@ -0,0 +1,19 @@ +========================================== +Distributed Replicated Block Device - DRBD +========================================== + +Description +=========== + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Please visit http://www.drbd.org to find out more. + +.. toctree:: + :maxdepth: 1 + + data-structure-v9 + figures diff --git a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot new file mode 100644 index 000000000000..bfa54e1f8016 --- /dev/null +++ b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot @@ -0,0 +1,13 @@ +digraph node_states { + Secondary -> Primary [ label = "ioctl_set_state()" ] + Primary -> Secondary [ label = "ioctl_set_state()" ] +} + +digraph peer_states { + Secondary -> Primary [ label = "recv state packet" ] + Primary -> Secondary [ label = "recv state packet" ] + Primary -> Unknown [ label = "connection lost" ] + Secondary -> Unknown [ label = "connection lost" ] + Unknown -> Primary [ label = "connected" ] + Unknown -> Secondary [ label = "connected" ] +} diff --git a/Documentation/admin-guide/blockdev/floppy.rst b/Documentation/admin-guide/blockdev/floppy.rst new file mode 100644 index 000000000000..4a8f31cf4139 --- /dev/null +++ b/Documentation/admin-guide/blockdev/floppy.rst @@ -0,0 +1,255 @@ +============= +Floppy Driver +============= + +FAQ list: +========= + +A FAQ list may be found in the fdutils package (see below), and also +at . + + +LILO configuration options (Thinkpad users, read this) +====================================================== + +The floppy driver is configured using the 'floppy=' option in +lilo. This option can be typed at the boot prompt, or entered in the +lilo configuration file. + +Example: If your kernel is called linux-2.6.9, type the following line +at the lilo boot prompt (if you have a thinkpad):: + + linux-2.6.9 floppy=thinkpad + +You may also enter the following line in /etc/lilo.conf, in the description +of linux-2.6.9:: + + append = "floppy=thinkpad" + +Several floppy related options may be given, example:: + + linux-2.6.9 floppy=daring floppy=two_fdc + append = "floppy=daring floppy=two_fdc" + +If you give options both in the lilo config file and on the boot +prompt, the option strings of both places are concatenated, the boot +prompt options coming last. That's why there are also options to +restore the default behavior. + + +Module configuration options +============================ + +If you use the floppy driver as a module, use the following syntax:: + + modprobe floppy floppy="" + +Example:: + + modprobe floppy floppy="omnibook messages" + +If you need certain options enabled every time you load the floppy driver, +you can put:: + + options floppy floppy="omnibook messages" + +in a configuration file in /etc/modprobe.d/. + + +The floppy driver related options are: + + floppy=asus_pci + Sets the bit mask to allow only units 0 and 1. (default) + + floppy=daring + Tells the floppy driver that you have a well behaved floppy controller. + This allows more efficient and smoother operation, but may fail on + certain controllers. This may speed up certain operations. + + floppy=0,daring + Tells the floppy driver that your floppy controller should be used + with caution. + + floppy=one_fdc + Tells the floppy driver that you have only one floppy controller. + (default) + + floppy=two_fdc / floppy=
,two_fdc + Tells the floppy driver that you have two floppy controllers. + The second floppy controller is assumed to be at
. + This option is not needed if the second controller is at address + 0x370, and if you use the 'cmos' option. + + floppy=thinkpad + Tells the floppy driver that you have a Thinkpad. Thinkpads use an + inverted convention for the disk change line. + + floppy=0,thinkpad + Tells the floppy driver that you don't have a Thinkpad. + + floppy=omnibook / floppy=nodma + Tells the floppy driver not to use Dma for data transfers. + This is needed on HP Omnibooks, which don't have a workable + DMA channel for the floppy driver. This option is also useful + if you frequently get "Unable to allocate DMA memory" messages. + Indeed, dma memory needs to be continuous in physical memory, + and is thus harder to find, whereas non-dma buffers may be + allocated in virtual memory. However, I advise against this if + you have an FDC without a FIFO (8272A or 82072). 82072A and + later are OK. You also need at least a 486 to use nodma. + If you use nodma mode, I suggest you also set the FIFO + threshold to 10 or lower, in order to limit the number of data + transfer interrupts. + + If you have a FIFO-able FDC, the floppy driver automatically + falls back on non DMA mode if no DMA-able memory can be found. + If you want to avoid this, explicitly ask for 'yesdma'. + + floppy=yesdma + Tells the floppy driver that a workable DMA channel is available. + (default) + + floppy=nofifo + Disables the FIFO entirely. This is needed if you get "Bus + master arbitration error" messages from your Ethernet card (or + from other devices) while accessing the floppy. + + floppy=usefifo + Enables the FIFO. (default) + + floppy=,fifo_depth + Sets the FIFO threshold. This is mostly relevant in DMA + mode. If this is higher, the floppy driver tolerates more + interrupt latency, but it triggers more interrupts (i.e. it + imposes more load on the rest of the system). If this is + lower, the interrupt latency should be lower too (faster + processor). The benefit of a lower threshold is less + interrupts. + + To tune the fifo threshold, switch on over/underrun messages + using 'floppycontrol --messages'. Then access a floppy + disk. If you get a huge amount of "Over/Underrun - retrying" + messages, then the fifo threshold is too low. Try with a + higher value, until you only get an occasional Over/Underrun. + It is a good idea to compile the floppy driver as a module + when doing this tuning. Indeed, it allows to try different + fifo values without rebooting the machine for each test. Note + that you need to do 'floppycontrol --messages' every time you + re-insert the module. + + Usually, tuning the fifo threshold should not be needed, as + the default (0xa) is reasonable. + + floppy=,,cmos + Sets the CMOS type of to . This is mandatory if + you have more than two floppy drives (only two can be + described in the physical CMOS), or if your BIOS uses + non-standard CMOS types. The CMOS types are: + + == ================================== + 0 Use the value of the physical CMOS + 1 5 1/4 DD + 2 5 1/4 HD + 3 3 1/2 DD + 4 3 1/2 HD + 5 3 1/2 ED + 6 3 1/2 ED + 16 unknown or not installed + == ================================== + + (Note: there are two valid types for ED drives. This is because 5 was + initially chosen to represent floppy *tapes*, and 6 for ED drives. + AMI ignored this, and used 5 for ED drives. That's why the floppy + driver handles both.) + + floppy=unexpected_interrupts + Print a warning message when an unexpected interrupt is received. + (default) + + floppy=no_unexpected_interrupts / floppy=L40SX + Don't print a message when an unexpected interrupt is received. This + is needed on IBM L40SX laptops in certain video modes. (There seems + to be an interaction between video and floppy. The unexpected + interrupts affect only performance, and can be safely ignored.) + + floppy=broken_dcl + Don't use the disk change line, but assume that the disk was + changed whenever the device node is reopened. Needed on some + boxes where the disk change line is broken or unsupported. + This should be regarded as a stopgap measure, indeed it makes + floppy operation less efficient due to unneeded cache + flushings, and slightly more unreliable. Please verify your + cable, connection and jumper settings if you have any DCL + problems. However, some older drives, and also some laptops + are known not to have a DCL. + + floppy=debug + Print debugging messages. + + floppy=messages + Print informational messages for some operations (disk change + notifications, warnings about over and underruns, and about + autodetection). + + floppy=silent_dcl_clear + Uses a less noisy way to clear the disk change line (which + doesn't involve seeks). Implied by 'daring' option. + + floppy=,irq + Sets the floppy IRQ to instead of 6. + + floppy=,dma + Sets the floppy DMA channel to instead of 2. + + floppy=slow + Use PS/2 stepping rate:: + + PS/2 floppies have much slower step rates than regular floppies. + It's been recommended that take about 1/4 of the default speed + in some more extreme cases. + + +Supporting utilities and additional documentation: +================================================== + +Additional parameters of the floppy driver can be configured at +runtime. Utilities which do this can be found in the fdutils package. +This package also contains a new version of mtools which allows to +access high capacity disks (up to 1992K on a high density 3 1/2 disk!). +It also contains additional documentation about the floppy driver. + +The latest version can be found at fdutils homepage: + + http://fdutils.linux.lu + +The fdutils releases can be found at: + + http://fdutils.linux.lu/download.html + + http://www.tux.org/pub/knaff/fdutils/ + + ftp://metalab.unc.edu/pub/Linux/utils/disk-management/ + +Reporting problems about the floppy driver +========================================== + +If you have a question or a bug report about the floppy driver, mail +me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use +comp.os.linux.hardware. As the volume in these groups is rather high, +be sure to include the word "floppy" (or "FLOPPY") in the subject +line. If the reported problem happens when mounting floppy disks, be +sure to mention also the type of the filesystem in the subject line. + +Be sure to read the FAQ before mailing/posting any bug reports! + +Alain + +Changelog +========= + +10-30-2004 : + Cleanup, updating, add reference to module configuration. + James Nelson + +6-3-2000 : + Original Document diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst new file mode 100644 index 000000000000..20a738d9d047 --- /dev/null +++ b/Documentation/admin-guide/blockdev/index.rst @@ -0,0 +1,14 @@ +=========================== +The Linux RapidIO Subsystem +=========================== + +.. toctree:: + :maxdepth: 1 + + floppy + nbd + paride + ramdisk + zram + + drbd/index diff --git a/Documentation/admin-guide/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst new file mode 100644 index 000000000000..d78dfe559dcf --- /dev/null +++ b/Documentation/admin-guide/blockdev/nbd.rst @@ -0,0 +1,31 @@ +================================== +Network Block Device (TCP version) +================================== + +1) Overview +----------- + +What is it: With this compiled in the kernel (or as a module), Linux +can use a remote server as one of its block devices. So every time +the client computer wants to read, e.g., /dev/nb0, it sends a +request over TCP to the server, which will reply with the data read. +This can be used for stations with low disk space (or even diskless) +to borrow disk space from another computer. +Unlike NFS, it is possible to put any filesystem on it, etc. + +For more information, or to download the nbd-client and nbd-server +tools, go to http://nbd.sf.net/. + +The nbd kernel module need only be installed on the client +system, as the nbd-server is completely in userspace. In fact, +the nbd-server has been successfully ported to other operating +systems, including Windows. + +A) NBD parameters +----------------- + +max_part + Number of partitions per device (default: 0). + +nbds_max + Number of block devices that should be initialized (default: 16). diff --git a/Documentation/admin-guide/blockdev/paride.rst b/Documentation/admin-guide/blockdev/paride.rst new file mode 100644 index 000000000000..87b4278bf314 --- /dev/null +++ b/Documentation/admin-guide/blockdev/paride.rst @@ -0,0 +1,439 @@ +=================================== +Linux and parallel port IDE devices +=================================== + +PARIDE v1.03 (c) 1997-8 Grant Guenther + +1. Introduction +=============== + +Owing to the simplicity and near universality of the parallel port interface +to personal computers, many external devices such as portable hard-disk, +CD-ROM, LS-120 and tape drives use the parallel port to connect to their +host computer. While some devices (notably scanners) use ad-hoc methods +to pass commands and data through the parallel port interface, most +external devices are actually identical to an internal model, but with +a parallel-port adapter chip added in. Some of the original parallel port +adapters were little more than mechanisms for multiplexing a SCSI bus. +(The Iomega PPA-3 adapter used in the ZIP drives is an example of this +approach). Most current designs, however, take a different approach. +The adapter chip reproduces a small ISA or IDE bus in the external device +and the communication protocol provides operations for reading and writing +device registers, as well as data block transfer functions. Sometimes, +the device being addressed via the parallel cable is a standard SCSI +controller like an NCR 5380. The "ditto" family of external tape +drives use the ISA replicator to interface a floppy disk controller, +which is then connected to a floppy-tape mechanism. The vast majority +of external parallel port devices, however, are now based on standard +IDE type devices, which require no intermediate controller. If one +were to open up a parallel port CD-ROM drive, for instance, one would +find a standard ATAPI CD-ROM drive, a power supply, and a single adapter +that interconnected a standard PC parallel port cable and a standard +IDE cable. It is usually possible to exchange the CD-ROM device with +any other device using the IDE interface. + +The document describes the support in Linux for parallel port IDE +devices. It does not cover parallel port SCSI devices, "ditto" tape +drives or scanners. Many different devices are supported by the +parallel port IDE subsystem, including: + + - MicroSolutions backpack CD-ROM + - MicroSolutions backpack PD/CD + - MicroSolutions backpack hard-drives + - MicroSolutions backpack 8000t tape drive + - SyQuest EZ-135, EZ-230 & SparQ drives + - Avatar Shark + - Imation Superdisk LS-120 + - Maxell Superdisk LS-120 + - FreeCom Power CD + - Hewlett-Packard 5GB and 8GB tape drives + - Hewlett-Packard 7100 and 7200 CD-RW drives + +as well as most of the clone and no-name products on the market. + +To support such a wide range of devices, PARIDE, the parallel port IDE +subsystem, is actually structured in three parts. There is a base +paride module which provides a registry and some common methods for +accessing the parallel ports. The second component is a set of +high-level drivers for each of the different types of supported devices: + + === ============= + pd IDE disk + pcd ATAPI CD-ROM + pf ATAPI disk + pt ATAPI tape + pg ATAPI generic + === ============= + +(Currently, the pg driver is only used with CD-R drives). + +The high-level drivers function according to the relevant standards. +The third component of PARIDE is a set of low-level protocol drivers +for each of the parallel port IDE adapter chips. Thanks to the interest +and encouragement of Linux users from many parts of the world, +support is available for almost all known adapter protocols: + + ==== ====================================== ==== + aten ATEN EH-100 (HK) + bpck Microsolutions backpack (US) + comm DataStor (old-type) "commuter" adapter (TW) + dstr DataStor EP-2000 (TW) + epat Shuttle EPAT (UK) + epia Shuttle EPIA (UK) + fit2 FIT TD-2000 (US) + fit3 FIT TD-3000 (US) + friq Freecom IQ cable (DE) + frpw Freecom Power (DE) + kbic KingByte KBIC-951A and KBIC-971A (TW) + ktti KT Technology PHd adapter (SG) + on20 OnSpec 90c20 (US) + on26 OnSpec 90c26 (US) + ==== ====================================== ==== + + +2. Using the PARIDE subsystem +============================= + +While configuring the Linux kernel, you may choose either to build +the PARIDE drivers into your kernel, or to build them as modules. + +In either case, you will need to select "Parallel port IDE device support" +as well as at least one of the high-level drivers and at least one +of the parallel port communication protocols. If you do not know +what kind of parallel port adapter is used in your drive, you could +begin by checking the file names and any text files on your DOS +installation floppy. Alternatively, you can look at the markings on +the adapter chip itself. That's usually sufficient to identify the +correct device. + +You can actually select all the protocol modules, and allow the PARIDE +subsystem to try them all for you. + +For the "brand-name" products listed above, here are the protocol +and high-level drivers that you would use: + + ================ ============ ====== ======== + Manufacturer Model Driver Protocol + ================ ============ ====== ======== + MicroSolutions CD-ROM pcd bpck + MicroSolutions PD drive pf bpck + MicroSolutions hard-drive pd bpck + MicroSolutions 8000t tape pt bpck + SyQuest EZ, SparQ pd epat + Imation Superdisk pf epat + Maxell Superdisk pf friq + Avatar Shark pd epat + FreeCom CD-ROM pcd frpw + Hewlett-Packard 5GB Tape pt epat + Hewlett-Packard 7200e (CD) pcd epat + Hewlett-Packard 7200e (CD-R) pg epat + ================ ============ ====== ======== + +2.1 Configuring built-in drivers +--------------------------------- + +We recommend that you get to know how the drivers work and how to +configure them as loadable modules, before attempting to compile a +kernel with the drivers built-in. + +If you built all of your PARIDE support directly into your kernel, +and you have just a single parallel port IDE device, your kernel should +locate it automatically for you. If you have more than one device, +you may need to give some command line options to your bootloader +(eg: LILO), how to do that is beyond the scope of this document. + +The high-level drivers accept a number of command line parameters, all +of which are documented in the source files in linux/drivers/block/paride. +By default, each driver will automatically try all parallel ports it +can find, and all protocol types that have been installed, until it finds +a parallel port IDE adapter. Once it finds one, the probe stops. So, +if you have more than one device, you will need to tell the drivers +how to identify them. This requires specifying the port address, the +protocol identification number and, for some devices, the drive's +chain ID. While your system is booting, a number of messages are +displayed on the console. Like all such messages, they can be +reviewed with the 'dmesg' command. Among those messages will be +some lines like:: + + paride: bpck registered as protocol 0 + paride: epat registered as protocol 1 + +The numbers will always be the same until you build a new kernel with +different protocol selections. You should note these numbers as you +will need them to identify the devices. + +If you happen to be using a MicroSolutions backpack device, you will +also need to know the unit ID number for each drive. This is usually +the last two digits of the drive's serial number (but read MicroSolutions' +documentation about this). + +As an example, let's assume that you have a MicroSolutions PD/CD drive +with unit ID number 36 connected to the parallel port at 0x378, a SyQuest +EZ-135 connected to the chained port on the PD/CD drive and also an +Imation Superdisk connected to port 0x278. You could give the following +options on your boot command:: + + pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36 + +In the last option, pf.drive1 configures device /dev/pf1, the 0x378 +is the parallel port base address, the 0 is the protocol registration +number and 36 is the chain ID. + +Please note: while PARIDE will work both with and without the +PARPORT parallel port sharing system that is included by the +"Parallel port support" option, PARPORT must be included and enabled +if you want to use chains of devices on the same parallel port. + +2.2 Loading and configuring PARIDE as modules +---------------------------------------------- + +It is much faster and simpler to get to understand the PARIDE drivers +if you use them as loadable kernel modules. + +Note 1: + using these drivers with the "kerneld" automatic module loading + system is not recommended for beginners, and is not documented here. + +Note 2: + if you build PARPORT support as a loadable module, PARIDE must + also be built as loadable modules, and PARPORT must be loaded before + the PARIDE modules. + +To use PARIDE, you must begin by:: + + insmod paride + +this loads a base module which provides a registry for the protocols, +among other tasks. + +Then, load as many of the protocol modules as you think you might need. +As you load each module, it will register the protocols that it supports, +and print a log message to your kernel log file and your console. For +example:: + + # insmod epat + paride: epat registered as protocol 0 + # insmod kbic + paride: k951 registered as protocol 1 + paride: k971 registered as protocol 2 + +Finally, you can load high-level drivers for each kind of device that +you have connected. By default, each driver will autoprobe for a single +device, but you can support up to four similar devices by giving their +individual co-ordinates when you load the driver. + +For example, if you had two no-name CD-ROM drives both using the +KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc +you could give the following command:: + + # insmod pcd drive0=0x378,1 drive1=0x3bc,1 + +For most adapters, giving a port address and protocol number is sufficient, +but check the source files in linux/drivers/block/paride for more +information. (Hopefully someone will write some man pages one day !). + +As another example, here's what happens when PARPORT is installed, and +a SyQuest EZ-135 is attached to port 0x378:: + + # insmod paride + paride: version 1.0 installed + # insmod epat + paride: epat registered as protocol 0 + # insmod pd + pd: pd version 1.0, major 45, cluster 64, nice 0 + pda: Sharing parport1 at 0x378 + pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1 + pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media + pda: pda1 + +Note that the last line is the output from the generic partition table +scanner - in this case it reports that it has found a disk with one partition. + +2.3 Using a PARIDE device +-------------------------- + +Once the drivers have been loaded, you can access PARIDE devices in the +same way as their traditional counterparts. You will probably need to +create the device "special files". Here is a simple script that you can +cut to a file and execute:: + + #!/bin/bash + # + # mkd -- a script to create the device special files for the PARIDE subsystem + # + function mkdev { + mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1 + } + # + function pd { + D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) ) + mkdev pd$D b 45 $[ $1 * 16 ] + for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + do mkdev pd$D$P b 45 $[ $1 * 16 + $P ] + done + } + # + cd /dev + # + for u in 0 1 2 3 ; do pd $u ; done + for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done + for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done + for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done + for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done + for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done + # + # end of mkd + +With the device files and drivers in place, you can access PARIDE devices +like any other Linux device. For example, to mount a CD-ROM in pcd0, use:: + + mount /dev/pcd0 /cdrom + +If you have a fresh Avatar Shark cartridge, and the drive is pda, you +might do something like:: + + fdisk /dev/pda -- make a new partition table with + partition 1 of type 83 + + mke2fs /dev/pda1 -- to build the file system + + mkdir /shark -- make a place to mount the disk + + mount /dev/pda1 /shark + +Devices like the Imation superdisk work in the same way, except that +they do not have a partition table. For example to make a 120MB +floppy that you could share with a DOS system:: + + mkdosfs /dev/pf0 + mount /dev/pf0 /mnt + + +2.4 The pf driver +------------------ + +The pf driver is intended for use with parallel port ATAPI disk +devices. The most common devices in this category are PD drives +and LS-120 drives. Traditionally, media for these devices are not +partitioned. Consequently, the pf driver does not support partitioned +media. This may be changed in a future version of the driver. + +2.5 Using the pt driver +------------------------ + +The pt driver for parallel port ATAPI tape drives is a minimal driver. +It does not yet support many of the standard tape ioctl operations. +For best performance, a block size of 32KB should be used. You will +probably want to set the parallel port delay to 0, if you can. + +2.6 Using the pg driver +------------------------ + +The pg driver can be used in conjunction with the cdrecord program +to create CD-ROMs. Please get cdrecord version 1.6.1 or later +from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media +your parallel port should ideally be set to EPP mode, and the "port delay" +should be set to 0. With those settings it is possible to record at 2x +speed without any buffer underruns. If you cannot get the driver to work +in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only. + + +3. Troubleshooting +================== + +3.1 Use EPP mode if you can +---------------------------- + +The most common problems that people report with the PARIDE drivers +concern the parallel port CMOS settings. At this time, none of the +PARIDE protocol modules support ECP mode, or any ECP combination modes. +If you are able to do so, please set your parallel port into EPP mode +using your CMOS setup procedure. + +3.2 Check the port delay +------------------------- + +Some parallel ports cannot reliably transfer data at full speed. To +offset the errors, the PARIDE protocol modules introduce a "port +delay" between each access to the i/o ports. Each protocol sets +a default value for this delay. In most cases, the user can override +the default and set it to 0 - resulting in somewhat higher transfer +rates. In some rare cases (especially with older 486 systems) the +default delays are not long enough. if you experience corrupt data +transfers, or unexpected failures, you may wish to increase the +port delay. The delay can be programmed using the "driveN" parameters +to each of the high-level drivers. Please see the notes above, or +read the comments at the beginning of the driver source files in +linux/drivers/block/paride. + +3.3 Some drives need a printer reset +------------------------------------- + +There appear to be a number of "noname" external drives on the market +that do not always power up correctly. We have noticed this with some +drives based on OnSpec and older Freecom adapters. In these rare cases, +the adapter can often be reinitialised by issuing a "printer reset" on +the parallel port. As the reset operation is potentially disruptive in +multiple device environments, the PARIDE drivers will not do it +automatically. You can however, force a printer reset by doing:: + + insmod lp reset=1 + rmmod lp + +If you have one of these marginal cases, you should probably build +your paride drivers as modules, and arrange to do the printer reset +before loading the PARIDE drivers. + +3.4 Use the verbose option and dmesg if you need help +------------------------------------------------------ + +While a lot of testing has gone into these drivers to make them work +as smoothly as possible, problems will arise. If you do have problems, +please check all the obvious things first: does the drive work in +DOS with the manufacturer's drivers ? If that doesn't yield any useful +clues, then please make sure that only one drive is hooked to your system, +and that either (a) PARPORT is enabled or (b) no other device driver +is using your parallel port (check in /proc/ioports). Then, load the +appropriate drivers (you can load several protocol modules if you want) +as in:: + + # insmod paride + # insmod epat + # insmod bpck + # insmod kbic + ... + # insmod pd verbose=1 + +(using the correct driver for the type of device you have, of course). +The verbose=1 parameter will cause the drivers to log a trace of their +activity as they attempt to locate your drive. + +Use 'dmesg' to capture a log of all the PARIDE messages (any messages +beginning with paride:, a protocol module's name or a driver's name) and +include that with your bug report. You can submit a bug report in one +of two ways. Either send it directly to the author of the PARIDE suite, +by e-mail to grant@torque.net, or join the linux-parport mailing list +and post your report there. + +3.5 For more information or help +--------------------------------- + +You can join the linux-parport mailing list by sending a mail message +to: + + linux-parport-request@torque.net + +with the single word:: + + subscribe + +in the body of the mail message (not in the subject line). Please be +sure that your mail program is correctly set up when you do this, as +the list manager is a robot that will subscribe you using the reply +address in your mail headers. REMOVE any anti-spam gimmicks you may +have in your mail headers, when sending mail to the list server. + +You might also find some useful information on the linux-parport +web pages (although they are not always up to date) at + + http://web.archive.org/web/%2E/http://www.torque.net/parport/ diff --git a/Documentation/admin-guide/blockdev/ramdisk.rst b/Documentation/admin-guide/blockdev/ramdisk.rst new file mode 100644 index 000000000000..b7c2268f8dec --- /dev/null +++ b/Documentation/admin-guide/blockdev/ramdisk.rst @@ -0,0 +1,177 @@ +========================================== +Using the RAM disk block device with Linux +========================================== + +.. Contents: + + 1) Overview + 2) Kernel Command Line Parameters + 3) Using "rdev -r" + 4) An Example of Creating a Compressed RAM Disk + + +1) Overview +----------- + +The RAM disk driver is a way to use main system memory as a block device. It +is required for initrd, an initial filesystem used if you need to load modules +in order to access the root filesystem (see Documentation/admin-guide/initrd.rst). It can +also be used for a temporary filesystem for crypto work, since the contents +are erased on reboot. + +The RAM disk dynamically grows as more space is required. It does this by using +RAM from the buffer cache. The driver marks the buffers it is using as dirty +so that the VM subsystem does not try to reclaim them later. + +The RAM disk supports up to 16 RAM disks by default, and can be reconfigured +to support an unlimited number of RAM disks (at your own risk). Just change +the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu +and (re)build the kernel. + +To use RAM disk support with your system, run './MAKEDEV ram' from the /dev +directory. RAM disks are all major number 1, and start with minor number 0 +for /dev/ram0, etc. If used, modern kernels use /dev/ram0 for an initrd. + +The new RAM disk also has the ability to load compressed RAM disk images, +allowing one to squeeze more programs onto an average installation or +rescue floppy disk. + + +2) Parameters +--------------------------------- + +2a) Kernel Command Line Parameters + + ramdisk_size=N + Size of the ramdisk. + +This parameter tells the RAM disk driver to set up RAM disks of N k size. The +default is 4096 (4 MB). + +2b) Module parameters + + rd_nr + /dev/ramX devices created. + + max_part + Maximum partition number. + + rd_size + See ramdisk_size. + +3) Using "rdev -r" +------------------ + +The usage of the word (two bytes) that "rdev -r" sets in the kernel image is +as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up +to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit +14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a +prompt/wait sequence is to be given before trying to read the RAM disk. Since +the RAM disk dynamically grows as data is being written into it, a size field +is not required. Bits 11 to 13 are not currently used and may as well be zero. +These numbers are no magical secrets, as seen below:: + + ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF + ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 + ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 + +Consider a typical two floppy disk setup, where you will have the +kernel on disk one, and have already put a RAM disk image onto disk #2. + +Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk +starts at an offset of 0 kB from the beginning of the floppy. +The command line equivalent is: "ramdisk_start=0" + +You want bit 14 as one, indicating that a RAM disk is to be loaded. +The command line equivalent is: "load_ramdisk=1" + +You want bit 15 as one, indicating that you want a prompt/keypress +sequence so that you have a chance to switch floppy disks. +The command line equivalent is: "prompt_ramdisk=1" + +Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. +So to create disk one of the set, you would do:: + + /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0 + /usr/src/linux# rdev /dev/fd0 /dev/fd0 + /usr/src/linux# rdev -r /dev/fd0 49152 + +If you make a boot disk that has LILO, then for the above, you would use:: + + append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1" + +Since the default start = 0 and the default prompt = 1, you could use:: + + append = "load_ramdisk=1" + + +4) An Example of Creating a Compressed RAM Disk +----------------------------------------------- + +To create a RAM disk image, you will need a spare block device to +construct it on. This can be the RAM disk device itself, or an +unused disk partition (such as an unmounted swap partition). For this +example, we will use the RAM disk device, "/dev/ram0". + +Note: This technique should not be done on a machine with less than 8 MB +of RAM. If using a spare disk partition instead of /dev/ram0, then this +restriction does not apply. + +a) Decide on the RAM disk size that you want. Say 2 MB for this example. + Create it by writing to the RAM disk device. (This step is not currently + required, but may be in the future.) It is wise to zero out the + area (esp. for disks) so that maximal compression is achieved for + the unused blocks of the image that you are about to create:: + + dd if=/dev/zero of=/dev/ram0 bs=1k count=2048 + +b) Make a filesystem on it. Say ext2fs for this example:: + + mke2fs -vm0 /dev/ram0 2048 + +c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...) + and unmount it again. + +d) Compress the contents of the RAM disk. The level of compression + will be approximately 50% of the space used by the files. Unused + space on the RAM disk will compress to almost nothing:: + + dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz + +e) Put the kernel onto the floppy:: + + dd if=zImage of=/dev/fd0 bs=1k + +f) Put the RAM disk image onto the floppy, after the kernel. Use an offset + that is slightly larger than the kernel, so that you can put another + (possibly larger) kernel onto the same floppy later without overlapping + the RAM disk image. An offset of 400 kB for kernels about 350 kB in + size would be reasonable. Make sure offset+size of ram_image.gz is + not larger than the total space on your floppy (usually 1440 kB):: + + dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400 + +g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc. + For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would + have 2^15 + 2^14 + 400 = 49552:: + + rdev /dev/fd0 /dev/fd0 + rdev -r /dev/fd0 49552 + +That is it. You now have your boot/root compressed RAM disk floppy. Some +users may wish to combine steps (d) and (f) by using a pipe. + + + Paul Gortmaker 12/95 + +Changelog: +---------- + +10-22-04 : + Updated to reflect changes in command line options, remove + obsolete references, general cleanup. + James Nelson (james4765@gmail.com) + + +12-95 : + Original Document diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst new file mode 100644 index 000000000000..6eccf13219ff --- /dev/null +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -0,0 +1,422 @@ +======================================== +zram: Compressed RAM based block devices +======================================== + +Introduction +============ + +The zram module creates RAM based block devices named /dev/zram +( = 0, 1, ...). Pages written to these disks are compressed and stored +in memory itself. These disks allow very fast I/O and compression provides +good amounts of memory savings. Some of the usecases include /tmp storage, +use as swap disks, various caches under /var and maybe many more :) + +Statistics for individual zram devices are exported through sysfs nodes at +/sys/block/zram/ + +Usage +===== + +There are several ways to configure and manage zram device(-s): + +a) using zram and zram_control sysfs attributes +b) using zramctl utility, provided by util-linux (util-linux@vger.kernel.org). + +In this document we will describe only 'manual' zram configuration steps, +IOW, zram and zram_control sysfs attributes. + +In order to get a better idea about zramctl please consult util-linux +documentation, zramctl man-page or `zramctl --help`. Please be informed +that zram maintainers do not develop/maintain util-linux or zramctl, should +you have any questions please contact util-linux@vger.kernel.org + +Following shows a typical sequence of steps for using zram. + +WARNING +======= + +For the sake of simplicity we skip error checking parts in most of the +examples below. However, it is your sole responsibility to handle errors. + +zram sysfs attributes always return negative values in case of errors. +The list of possible return codes: + +======== ============================================================= +-EBUSY an attempt to modify an attribute that cannot be changed once + the device has been initialised. Please reset device first; +-ENOMEM zram was not able to allocate enough memory to fulfil your + needs; +-EINVAL invalid input has been provided. +======== ============================================================= + +If you use 'echo', the returned value that is changed by 'echo' utility, +and, in general case, something like:: + + echo 3 > /sys/block/zram0/max_comp_streams + if [ $? -ne 0 ]; + handle_error + fi + +should suffice. + +1) Load Module +============== + +:: + + modprobe zram num_devices=4 + This creates 4 devices: /dev/zram{0,1,2,3} + +num_devices parameter is optional and tells zram how many devices should be +pre-created. Default: 1. + +2) Set max number of compression streams +======================================== + +Regardless the value passed to this attribute, ZRAM will always +allocate multiple compression streams - one per online CPUs - thus +allowing several concurrent compression operations. The number of +allocated compression streams goes down when some of the CPUs +become offline. There is no single-compression-stream mode anymore, +unless you are running a UP system or has only 1 CPU online. + +To find out how many streams are currently available:: + + cat /sys/block/zram0/max_comp_streams + +3) Select compression algorithm +=============================== + +Using comp_algorithm device attribute one can see available and +currently selected (shown in square brackets) compression algorithms, +change selected compression algorithm (once the device is initialised +there is no way to change compression algorithm). + +Examples:: + + #show supported compression algorithms + cat /sys/block/zram0/comp_algorithm + lzo [lz4] + + #select lzo compression algorithm + echo lzo > /sys/block/zram0/comp_algorithm + +For the time being, the `comp_algorithm` content does not necessarily +show every compression algorithm supported by the kernel. We keep this +list primarily to simplify device configuration and one can configure +a new device with a compression algorithm that is not listed in +`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API +and, if some of the algorithms were built as modules, it's impossible +to list all of them using, for instance, /proc/crypto or any other +method. This, however, has an advantage of permitting the usage of +custom crypto compression modules (implementing S/W or H/W compression). + +4) Set Disksize +=============== + +Set disk size by writing the value to sysfs node 'disksize'. +The value can be either in bytes or you can use mem suffixes. +Examples:: + + # Initialize /dev/zram0 with 50MB disksize + echo $((50*1024*1024)) > /sys/block/zram0/disksize + + # Using mem suffixes + echo 256K > /sys/block/zram0/disksize + echo 512M > /sys/block/zram0/disksize + echo 1G > /sys/block/zram0/disksize + +Note: +There is little point creating a zram of greater than twice the size of memory +since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the +size of the disk when not in use so a huge zram is wasteful. + +5) Set memory limit: Optional +============================= + +Set memory limit by writing the value to sysfs node 'mem_limit'. +The value can be either in bytes or you can use mem suffixes. +In addition, you could change the value in runtime. +Examples:: + + # limit /dev/zram0 with 50MB memory + echo $((50*1024*1024)) > /sys/block/zram0/mem_limit + + # Using mem suffixes + echo 256K > /sys/block/zram0/mem_limit + echo 512M > /sys/block/zram0/mem_limit + echo 1G > /sys/block/zram0/mem_limit + + # To disable memory limit + echo 0 > /sys/block/zram0/mem_limit + +6) Activate +=========== + +:: + + mkswap /dev/zram0 + swapon /dev/zram0 + + mkfs.ext4 /dev/zram1 + mount /dev/zram1 /tmp + +7) Add/remove zram devices +========================== + +zram provides a control interface, which enables dynamic (on-demand) device +addition and removal. + +In order to add a new /dev/zramX device, perform read operation on hot_add +attribute. This will return either new device's device id (meaning that you +can use /dev/zram) or error code. + +Example:: + + cat /sys/class/zram-control/hot_add + 1 + +To remove the existing /dev/zramX device (where X is a device id) +execute:: + + echo X > /sys/class/zram-control/hot_remove + +8) Stats +======== + +Per-device statistics are exported as various nodes under /sys/block/zram/ + +A brief description of exported device attributes. For more details please +read Documentation/ABI/testing/sysfs-block-zram. + +====================== ====== =============================================== +Name access description +====================== ====== =============================================== +disksize RW show and set the device's disk size +initstate RO shows the initialization state of the device +reset WO trigger device reset +mem_used_max WO reset the `mem_used_max` counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can + use to store the compressed data +writeback_limit WO specifies the maximum amount of write IO zram + can write out to backing device as 4KB unit +writeback_limit_enable RW show and set writeback_limit feature +max_comp_streams RW the number of possible concurrent compress + operations +comp_algorithm RW show and change the compression algorithm +compact WO trigger memory compaction +debug_stat RO this file is used for zram debugging purposes +backing_dev RW set up backend storage for zram to write out +idle WO mark allocated slot as idle +====================== ====== =============================================== + + +User space is advised to use the following files to read the device statistics. + +File /sys/block/zram/stat + +Represents block layer statistics. Read Documentation/block/stat.rst for +details. + +File /sys/block/zram/io_stat + +The stat file represents device's I/O statistics not accounted by block +layer and, thus, not available in zram/stat file. It consists of a +single line of text and contains the following stats separated by +whitespace: + + ============= ============================================================= + failed_reads The number of failed reads + failed_writes The number of failed writes + invalid_io The number of non-page-size-aligned I/O requests + notify_free Depending on device usage scenario it may account + + a) the number of pages freed because of swap slot free + notifications + b) the number of pages freed because of + REQ_OP_DISCARD requests sent by bio. The former ones are + sent to a swap block device when a swap slot is freed, + which implies that this disk is being used as a swap disk. + + The latter ones are sent by filesystem mounted with + discard option, whenever some data blocks are getting + discarded. + ============= ============================================================= + +File /sys/block/zram/mm_stat + +The stat file represents device's mm statistics. It consists of a single +line of text and contains the following stats separated by whitespace: + + ================ ============================================================= + orig_data_size uncompressed size of data stored in this disk. + This excludes same-element-filled pages (same_pages) since + no memory is allocated for them. + Unit: bytes + compr_data_size compressed size of data stored in this disk + mem_used_total the amount of memory allocated for this disk. This + includes allocator fragmentation and metadata overhead, + allocated for this disk. So, allocator space efficiency + can be calculated using compr_data_size and this statistic. + Unit: bytes + mem_limit the maximum amount of memory ZRAM can use to store + the compressed data + mem_used_max the maximum amount of memory zram have consumed to + store the data + same_pages the number of same element filled pages written to this disk. + No memory is allocated for such pages. + pages_compacted the number of pages freed during compaction + huge_pages the number of incompressible pages + ================ ============================================================= + +File /sys/block/zram/bd_stat + +The stat file represents device's backing device statistics. It consists of +a single line of text and contains the following stats separated by whitespace: + + ============== ============================================================= + bd_count size of data written in backing device. + Unit: 4K bytes + bd_reads the number of reads from backing device + Unit: 4K bytes + bd_writes the number of writes to backing device + Unit: 4K bytes + ============== ============================================================= + +9) Deactivate +============= + +:: + + swapoff /dev/zram0 + umount /dev/zram1 + +10) Reset +========= + + Write any positive value to 'reset' sysfs node:: + + echo 1 > /sys/block/zram0/reset + echo 1 > /sys/block/zram1/reset + + This frees all the memory allocated for the given device and + resets the disksize to zero. You must set the disksize again + before reusing the device. + +Optional Feature +================ + +writeback +--------- + +With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page +to backing storage rather than keeping it in memory. +To use the feature, admin should set up backing device via:: + + echo /dev/sda5 > /sys/block/zramX/backing_dev + +before disksize setting. It supports only partition at this moment. +If admin want to use incompressible page writeback, they could do via:: + + echo huge > /sys/block/zramX/write + +To use idle page writeback, first, user need to declare zram pages +as idle:: + + echo all > /sys/block/zramX/idle + +From now on, any pages on zram are idle pages. The idle mark +will be removed until someone request access of the block. +IOW, unless there is access request, those pages are still idle pages. + +Admin can request writeback of those idle pages at right timing via:: + + echo idle > /sys/block/zramX/writeback + +With the command, zram writeback idle pages from memory to the storage. + +If there are lots of write IO with flash device, potentially, it has +flash wearout problem so that admin needs to design write limitation +to guarantee storage health for entire product life. + +To overcome the concern, zram supports "writeback_limit" feature. +The "writeback_limit_enable"'s default value is 0 so that it doesn't limit +any writeback. IOW, if admin want to apply writeback budget, he should +enable writeback_limit_enable via:: + + $ echo 1 > /sys/block/zramX/writeback_limit_enable + +Once writeback_limit_enable is set, zram doesn't allow any writeback +until admin set the budget via /sys/block/zramX/writeback_limit. + +(If admin doesn't enable writeback_limit_enable, writeback_limit's value +assigned via /sys/block/zramX/writeback_limit is meaninless.) + +If admin want to limit writeback as per-day 400M, he could do it +like below:: + + $ MB_SHIFT=20 + $ 4K_SHIFT=12 + $ echo $((400<>4K_SHIFT)) > \ + /sys/block/zram0/writeback_limit. + $ echo 1 > /sys/block/zram0/writeback_limit_enable + +If admin want to allow further write again once the bugdet is exausted, +he could do it like below:: + + $ echo $((400<>4K_SHIFT)) > \ + /sys/block/zram0/writeback_limit + +If admin want to see remaining writeback budget since he set:: + + $ cat /sys/block/zramX/writeback_limit + +If admin want to disable writeback limit, he could do:: + + $ echo 0 > /sys/block/zramX/writeback_limit_enable + +The writeback_limit count will reset whenever you reset zram(e.g., +system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of +writeback happened until you reset the zram to allocate extra writeback +budget in next setting is user's job. + +If admin want to measure writeback count in a certain period, he could +know it via /sys/block/zram0/bd_stat's 3rd column. + +memory tracking +=============== + +With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the +zram block. It could be useful to catch cold or incompressible +pages of the process with*pagemap. + +If you enable the feature, you could see block state via +/sys/kernel/debug/zram/zram0/block_state". The output is as follows:: + + 300 75.033841 .wh. + 301 63.806904 s... + 302 63.806919 ..hi + +First column + zram's block index. +Second column + access time since the system was booted +Third column + state of the block: + + s: + same page + w: + written page to backing store + h: + huge page + i: + idle page + +First line of above example says 300th block is accessed at 75.033841sec +and the block's state is huge so it is written back to the backing +storage. It's a debugging feature so anyone shouldn't rely on it to work +properly. + +Nitin Gupta +ngupta@vflare.org -- cgit v1.2.3-59-g8ed1b