aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 00:08:05 -0500
committerDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 00:08:05 -0500
commit95d465fd750897ab32462a6702fbfe1b122cbbc0 (patch)
tree65c38b2f11c51bb6932e44dd6c92f15b0091abfe /Documentation
parentInput: gameport - fix memory leak (diff)
parentMerge master.kernel.org:/home/rmk/linux-2.6-serial (diff)
downloadlinux-dev-95d465fd750897ab32462a6702fbfe1b122cbbc0.tar.xz
linux-dev-95d465fd750897ab32462a6702fbfe1b122cbbc0.zip
Manual merge with Linus.
Conflicts: arch/powerpc/kernel/setup-common.c drivers/input/keyboard/hil_kbd.c drivers/input/mouse/hil_ptr.c
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/BUG-HUNTING113
-rw-r--r--Documentation/Changes18
-rw-r--r--Documentation/DMA-mapping.txt2
-rw-r--r--Documentation/DocBook/Makefile10
-rw-r--r--Documentation/DocBook/deviceiobook.tmpl19
-rw-r--r--Documentation/DocBook/libata.tmpl47
-rw-r--r--Documentation/DocBook/sis900.tmpl585
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--Documentation/aoe/mkdevs.sh2
-rw-r--r--Documentation/aoe/udev.txt1
-rw-r--r--Documentation/arm/Booting2
-rw-r--r--Documentation/arm/README2
-rw-r--r--Documentation/arm/SA1100/Assabet2
-rw-r--r--Documentation/arm/SA1100/LART2
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt42
-rw-r--r--Documentation/arm/Setup2
-rw-r--r--Documentation/block/biodoc.txt14
-rw-r--r--Documentation/cachetlb.txt21
-rw-r--r--Documentation/connector/connector.txt5
-rw-r--r--Documentation/cpu-hotplug.txt4
-rw-r--r--Documentation/cpusets.txt117
-rw-r--r--Documentation/cputopology.txt4
-rw-r--r--Documentation/drivers/edac/edac.txt34
-rw-r--r--Documentation/dvb/avermedia.txt10
-rw-r--r--Documentation/dvb/bt8xx.txt140
-rw-r--r--Documentation/dvb/get_dvb_firmware25
-rw-r--r--Documentation/dvb/readme.txt32
-rw-r--r--Documentation/feature-removal-schedule.txt71
-rw-r--r--Documentation/filesystems/00-INDEX54
-rw-r--r--Documentation/filesystems/9p.txt (renamed from Documentation/filesystems/v9fs.txt)21
-rw-r--r--Documentation/filesystems/isofs.txt4
-rw-r--r--Documentation/filesystems/jfs.txt2
-rw-r--r--Documentation/filesystems/ntfs.txt5
-rw-r--r--Documentation/filesystems/proc.txt6
-rw-r--r--Documentation/filesystems/udf.txt14
-rw-r--r--Documentation/filesystems/vfat.txt6
-rw-r--r--Documentation/filesystems/vfs.txt217
-rw-r--r--Documentation/firmware_class/firmware_sample_driver.c3
-rw-r--r--Documentation/firmware_class/firmware_sample_firmware_class.c1
-rw-r--r--Documentation/hwmon/w83627hf4
-rw-r--r--Documentation/hwmon/w83781d24
-rw-r--r--Documentation/i2c/busses/i2c-piix42
-rw-r--r--Documentation/i2c/busses/scx200_acb5
-rw-r--r--Documentation/ioctl-number.txt2
-rw-r--r--Documentation/kbuild/makefiles.txt172
-rw-r--r--Documentation/kbuild/modules.txt98
-rw-r--r--Documentation/kernel-parameters.txt28
-rw-r--r--Documentation/leds-class.txt71
-rw-r--r--Documentation/m68k/README.buddha2
-rw-r--r--Documentation/memory-barriers.txt1913
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/README.ipw210012
-rw-r--r--Documentation/networking/README.ipw220044
-rw-r--r--Documentation/networking/TODO18
-rw-r--r--Documentation/networking/bcm43xx.txt36
-rw-r--r--Documentation/networking/e100.txt158
-rw-r--r--Documentation/networking/e1000.txt620
-rw-r--r--Documentation/networking/ifenslave.c2
-rw-r--r--Documentation/networking/ip-sysctl.txt49
-rw-r--r--Documentation/networking/packet_mmap.txt10
-rw-r--r--Documentation/networking/pktgen.txt20
-rw-r--r--Documentation/networking/ray_cs.txt2
-rw-r--r--Documentation/networking/sis900.txt257
-rw-r--r--Documentation/networking/vortex.txt81
-rw-r--r--Documentation/nfsroot.txt17
-rw-r--r--Documentation/pnp.txt3
-rw-r--r--Documentation/power/swsusp.txt51
-rw-r--r--Documentation/power/userland-swsusp.txt149
-rw-r--r--Documentation/power/video.txt74
-rw-r--r--Documentation/powerpc/booting-without-of.txt77
-rw-r--r--Documentation/powerpc/eeh-pci-error-recovery.txt15
-rw-r--r--Documentation/powerpc/hvcs.txt4
-rw-r--r--Documentation/robust-futex-ABI.txt182
-rw-r--r--Documentation/robust-futexes.txt218
-rw-r--r--Documentation/rpc-cache.txt121
-rw-r--r--Documentation/s390/driver-model.txt15
-rw-r--r--Documentation/serial-console.txt11
-rw-r--r--Documentation/smart-config.txt4
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt71
-rw-r--r--Documentation/sound/alsa/Audiophile-Usb.txt333
-rw-r--r--Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl8
-rw-r--r--Documentation/sound/oss/Introduction2
-rw-r--r--Documentation/sound/oss/cs46xx16
-rw-r--r--Documentation/spinlocks.txt2
-rw-r--r--Documentation/usb/et61x251.txt10
-rw-r--r--Documentation/usb/sn9c102.txt11
-rw-r--r--Documentation/usb/zc0301.txt254
-rw-r--r--Documentation/video4linux/CARDLIST.cx882
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx1
-rw-r--r--Documentation/video4linux/CARDLIST.saa71349
-rw-r--r--Documentation/video4linux/CARDLIST.tuner6
-rw-r--r--Documentation/video4linux/CQcam.txt182
-rw-r--r--Documentation/video4linux/README.cpia4
-rw-r--r--Documentation/video4linux/README.cpia2130
-rw-r--r--Documentation/video4linux/Zoran108
-rw-r--r--Documentation/video4linux/bttv/ICs4
-rw-r--r--Documentation/video4linux/bttv/PROBLEMS16
-rw-r--r--Documentation/video4linux/bttv/README.quirks4
-rw-r--r--Documentation/video4linux/bttv/THANKS4
-rw-r--r--Documentation/video4linux/cpia2_overview.txt38
-rw-r--r--Documentation/video4linux/radiotrack.txt16
-rw-r--r--Documentation/video4linux/w9966.txt2
-rw-r--r--Documentation/video4linux/zr36120.txt4
-rw-r--r--Documentation/vm/page_migration118
-rw-r--r--Documentation/w1/masters/ds248231
105 files changed, 5711 insertions, 1913 deletions
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING
index ca29242dbc38..65b97e1dbf70 100644
--- a/Documentation/BUG-HUNTING
+++ b/Documentation/BUG-HUNTING
@@ -1,3 +1,56 @@
+Table of contents
+=================
+
+Last updated: 20 December 2005
+
+Contents
+========
+
+- Introduction
+- Devices not appearing
+- Finding patch that caused a bug
+-- Finding using git-bisect
+-- Finding it the old way
+- Fixing the bug
+
+Introduction
+============
+
+Always try the latest kernel from kernel.org and build from source. If you are
+not confident in doing that please report the bug to your distribution vendor
+instead of to a kernel developer.
+
+Finding bugs is not always easy. Have a go though. If you can't find it don't
+give up. Report as much as you have found to the relevant maintainer. See
+MAINTAINERS for who that is for the subsystem you have worked on.
+
+Before you submit a bug report read REPORTING-BUGS.
+
+Devices not appearing
+=====================
+
+Often this is caused by udev. Check that first before blaming it on the
+kernel.
+
+Finding patch that caused a bug
+===============================
+
+
+
+Finding using git-bisect
+------------------------
+
+Using the provided tools with git makes finding bugs easy provided the bug is
+reproducible.
+
+Steps to do it:
+- start using git for the kernel source
+- read the man page for git-bisect
+- have fun
+
+Finding it the old way
+----------------------
+
[Sat Mar 2 10:32:33 PST 1996 KERNEL_BUG-HOWTO lm@sgi.com (Larry McVoy)]
This is how to track down a bug if you know nothing about kernel hacking.
@@ -90,3 +143,63 @@ it does work and it lets non-hackers help fix bugs. And it is cool
because Linux snapshots will let you do this - something that you can't
do with vendor supplied releases.
+Fixing the bug
+==============
+
+Nobody is going to tell you how to fix bugs. Seriously. You need to work it
+out. But below are some hints on how to use the tools.
+
+To debug a kernel, use objdump and look for the hex offset from the crash
+output to find the valid line of code/assembler. Without debug symbols, you
+will see the assembler code for the routine shown, but if your kernel has
+debug symbols the C code will also be available. (Debug symbols can be enabled
+in the kernel hacking menu of the menu configuration.) For example:
+
+ objdump -r -S -l --disassemble net/dccp/ipv4.o
+
+NB.: you need to be at the top level of the kernel tree for this to pick up
+your C files.
+
+If you don't have access to the code you can also debug on some crash dumps
+e.g. crash dump output as shown by Dave Miller.
+
+> EIP is at ip_queue_xmit+0x14/0x4c0
+> ...
+> Code: 44 24 04 e8 6f 05 00 00 e9 e8 fe ff ff 8d 76 00 8d bc 27 00 00
+> 00 00 55 57 56 53 81 ec bc 00 00 00 8b ac 24 d0 00 00 00 8b 5d 08
+> <8b> 83 3c 01 00 00 89 44 24 14 8b 45 28 85 c0 89 44 24 18 0f 85
+>
+> Put the bytes into a "foo.s" file like this:
+>
+> .text
+> .globl foo
+> foo:
+> .byte .... /* bytes from Code: part of OOPS dump */
+>
+> Compile it with "gcc -c -o foo.o foo.s" then look at the output of
+> "objdump --disassemble foo.o".
+>
+> Output:
+>
+> ip_queue_xmit:
+> push %ebp
+> push %edi
+> push %esi
+> push %ebx
+> sub $0xbc, %esp
+> mov 0xd0(%esp), %ebp ! %ebp = arg0 (skb)
+> mov 0x8(%ebp), %ebx ! %ebx = skb->sk
+> mov 0x13c(%ebx), %eax ! %eax = inet_sk(sk)->opt
+
+Another very useful option of the Kernel Hacking section in menuconfig is
+Debug memory allocations. This will help you see whether data has been
+initialised and not set before use etc. To see the values that get assigned
+with this look at mm/slab.c and search for POISON_INUSE. When using this an
+Oops will often show the poisoned data instead of zero which is the default.
+
+Once you have worked out a fix please submit it upstream. After all open
+source is about sharing what you do and don't you want to be recognised for
+your genius?
+
+Please do read Documentation/SubmittingPatches though to help your code get
+accepted.
diff --git a/Documentation/Changes b/Documentation/Changes
index fe5ae0f55020..b02f476c2973 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -15,24 +15,6 @@ and therefore owes credit to the same people as that file (Jared Mauch,
Axel Boldt, Alessandro Sigala, and countless other users all over the
'net).
-The latest revision of this document, in various formats, can always
-be found at <http://cyberbuzz.gatech.edu/kaboom/linux/Changes-2.4/>.
-
-Feel free to translate this document. If you do so, please send me a
-URL to your translation for inclusion in future revisions of this
-document.
-
-Smotrite file <http://oblom.rnc.ru/linux/kernel/Changes.ru>, yavlyaushisya
-russkim perevodom dannogo documenta.
-
-Visite <http://www2.adi.uam.es/~ender/tecnico/> para obtener la traducción
-al español de este documento en varios formatos.
-
-Eine deutsche Version dieser Datei finden Sie unter
-<http://www.stefan-winter.de/Changes-2.4.0.txt>.
-
-Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu).
-
Current Minimal Requirements
============================
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index 684557474c15..ee4bb73683cd 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -199,6 +199,8 @@ address during PCI bus mastering you might do something like:
"mydev: 24-bit DMA addressing not available.\n");
goto ignore_this_device;
}
+[Better use DMA_24BIT_MASK instead of 0x00ffffff.
+See linux/include/dma-mapping.h for reference.]
When pci_set_dma_mask() is successful, and returns zero, the PCI layer
saves away this mask you have provided. The PCI layer will use this
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 1c955883cf58..7d87dd73cbe4 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
- sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
+ kernel-api.xml journal-api.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
###
@@ -28,7 +28,7 @@ PS_METHOD = $(prefer-db2x)
###
# The targets that may be used.
-.PHONY: xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs
+PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs
BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
xmldocs: $(BOOKS)
@@ -211,3 +211,9 @@ clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS))
#man put files in man subdir - traverse down
subdir- := man/
+
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl
index 6f41f2f5c6f6..90ed23df1f68 100644
--- a/Documentation/DocBook/deviceiobook.tmpl
+++ b/Documentation/DocBook/deviceiobook.tmpl
@@ -270,25 +270,6 @@ CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
</para>
</sect1>
- <sect1>
- <title>ISA legacy functions</title>
- <para>
- On older kernels (2.2 and earlier) the ISA bus could be read or
- written with these functions and without ioremap being used. This is
- no longer true in Linux 2.4. A set of equivalent functions exist for
- easy legacy driver porting. The functions available are prefixed
- with 'isa_' and are <function>isa_readb</function>,
- <function>isa_writeb</function>, <function>isa_readw</function>,
- <function>isa_writew</function>, <function>isa_readl</function>,
- <function>isa_writel</function>, <function>isa_memcpy_fromio</function>
- and <function>isa_memcpy_toio</function>
- </para>
- <para>
- These functions should not be used in new drivers, and will
- eventually be going away.
- </para>
- </sect1>
-
</chapter>
<chapter>
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index d260d92089ad..5bcbb6ee3bc0 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -120,14 +120,27 @@ void (*dev_config) (struct ata_port *, struct ata_device *);
<programlisting>
void (*set_piomode) (struct ata_port *, struct ata_device *);
void (*set_dmamode) (struct ata_port *, struct ata_device *);
-void (*post_set_mode) (struct ata_port *ap);
+void (*post_set_mode) (struct ata_port *);
+unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int);
</programlisting>
<para>
Hooks called prior to the issue of SET FEATURES - XFER MODE
- command. dev->pio_mode is guaranteed to be valid when
- ->set_piomode() is called, and dev->dma_mode is guaranteed to be
- valid when ->set_dmamode() is called. ->post_set_mode() is
+ command. The optional ->mode_filter() hook is called when libata
+ has built a mask of the possible modes. This is passed to the
+ ->mode_filter() function which should return a mask of valid modes
+ after filtering those unsuitable due to hardware limits. It is not
+ valid to use this interface to add modes.
+ </para>
+ <para>
+ dev->pio_mode and dev->dma_mode are guaranteed to be valid when
+ ->set_piomode() and when ->set_dmamode() is called. The timings for
+ any other drive sharing the cable will also be valid at this point.
+ That is the library records the decisions for the modes of each
+ drive on a channel before it attempts to set any of them.
+ </para>
+ <para>
+ ->post_set_mode() is
called unconditionally, after the SET FEATURES - XFER MODE
command completes successfully.
</para>
@@ -230,6 +243,32 @@ void (*dev_select)(struct ata_port *ap, unsigned int device);
</sect2>
+ <sect2><title>Private tuning method</title>
+ <programlisting>
+void (*set_mode) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ By default libata performs drive and controller tuning in
+ accordance with the ATA timing rules and also applies blacklists
+ and cable limits. Some controllers need special handling and have
+ custom tuning rules, typically raid controllers that use ATA
+ commands but do not actually do drive timing.
+ </para>
+
+ <warning>
+ <para>
+ This hook should not be used to replace the standard controller
+ tuning logic when a controller has quirks. Replacing the default
+ tuning logic in that case would bypass handling for drive and
+ bridge quirks that may be important to data reliability. If a
+ controller needs to filter the mode selection it should use the
+ mode_filter hook instead.
+ </para>
+ </warning>
+
+ </sect2>
+
<sect2><title>Reset ATA bus</title>
<programlisting>
void (*phy_reset) (struct ata_port *ap);
diff --git a/Documentation/DocBook/sis900.tmpl b/Documentation/DocBook/sis900.tmpl
deleted file mode 100644
index 6c2cbac93c3f..000000000000
--- a/Documentation/DocBook/sis900.tmpl
+++ /dev/null
@@ -1,585 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
- "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
-
-<book id="SiS900Guide">
-
-<bookinfo>
-
-<title>SiS 900/7016 Fast Ethernet Device Driver</title>
-
-<authorgroup>
-<author>
-<firstname>Ollie</firstname>
-<surname>Lho</surname>
-</author>
-
-<author>
-<firstname>Lei Chun</firstname>
-<surname>Chang</surname>
-</author>
-</authorgroup>
-
-<edition>Document Revision: 0.3 for SiS900 driver v1.06 &amp; v1.07</edition>
-<pubdate>November 16, 2000</pubdate>
-
-<copyright>
- <year>1999</year>
- <holder>Silicon Integrated System Corp.</holder>
-</copyright>
-
-<legalnotice>
- <para>
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- </para>
-
- <para>
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- </para>
-
- <para>
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- </para>
-</legalnotice>
-
-<abstract>
-<para>
-This document gives some information on installation and usage of SiS 900/7016
-device driver under Linux.
-</para>
-</abstract>
-
-</bookinfo>
-
-<toc></toc>
-
-<chapter id="intro">
- <title>Introduction</title>
-
-<para>
-This document describes the revision 1.06 and 1.07 of SiS 900/7016 Fast Ethernet
-device driver under Linux. The driver is developed by Silicon Integrated
-System Corp. and distributed freely under the GNU General Public License (GPL).
-The driver can be compiled as a loadable module and used under Linux kernel
-version 2.2.x. (rev. 1.06)
-With minimal changes, the driver can also be used under 2.3.x and 2.4.x kernel
-(rev. 1.07), please see
-<xref linkend="install"/>. If you are intended to
-use the driver for earlier kernels, you are on your own.
-</para>
-
-<para>
-The driver is tested with usual TCP/IP applications including
-FTP, Telnet, Netscape etc. and is used constantly by the developers.
-</para>
-
-<para>
-Please send all comments/fixes/questions to
-<ulink url="mailto:lcchang@sis.com.tw">Lei-Chun Chang</ulink>.
-</para>
-</chapter>
-
-<chapter id="changes">
- <title>Changes</title>
-
-<para>
-Changes made in Revision 1.07
-
-<orderedlist>
-<listitem>
-<para>
-Separation of sis900.c and sis900.h in order to move most
-constant definition to sis900.h (many of those constants were
-corrected)
-</para>
-</listitem>
-
-<listitem>
-<para>
-Clean up PCI detection, the pci-scan from Donald Becker were not used,
-just simple pci&lowbar;find&lowbar;*.
-</para>
-</listitem>
-
-<listitem>
-<para>
-MII detection is modified to support multiple mii transceiver.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Bugs in read&lowbar;eeprom, mdio&lowbar;* were removed.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Lot of sis900 irrelevant comments were removed/changed and
-more comments were added to reflect the real situation.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Clean up of physical/virtual address space mess in buffer
-descriptors.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Better transmit/receive error handling.
-</para>
-</listitem>
-
-<listitem>
-<para>
-The driver now uses zero-copy single buffer management
-scheme to improve performance.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Names of variables were changed to be more consistent.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Clean up of auo-negotiation and timer code.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Automatic detection and change of PHY on the fly.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Bug in mac probing fixed.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Fix 630E equalier problem by modifying the equalizer workaround rule.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Support for ICS1893 10/100 Interated PHYceiver.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Support for media select by ifconfig.
-</para>
-</listitem>
-
-<listitem>
-<para>
-Added kernel-doc extratable documentation.
-</para>
-</listitem>
-
-</orderedlist>
-</para>
-</chapter>
-
-<chapter id="tested">
- <title>Tested Environment</title>
-
-<para>
-This driver is developed on the following hardware
-
-<itemizedlist>
-<listitem>
-
-<para>
-Intel Celeron 500 with SiS 630 (rev 02) chipset
-</para>
-</listitem>
-<listitem>
-
-<para>
-SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card
-</para>
-</listitem>
-
-</itemizedlist>
-
-and tested with these software environments
-
-<itemizedlist>
-<listitem>
-
-<para>
-Red Hat Linux version 6.2
-</para>
-</listitem>
-<listitem>
-
-<para>
-Linux kernel version 2.4.0
-</para>
-</listitem>
-<listitem>
-
-<para>
-Netscape version 4.6
-</para>
-</listitem>
-<listitem>
-
-<para>
-NcFTP 3.0.0 beta 18
-</para>
-</listitem>
-<listitem>
-
-<para>
-Samba version 2.0.3
-</para>
-</listitem>
-
-</itemizedlist>
-
-</para>
-
-</chapter>
-
-<chapter id="files">
-<title>Files in This Package</title>
-
-<para>
-In the package you can find these files:
-</para>
-
-<para>
-<variablelist>
-
-<varlistentry>
-<term>sis900.c</term>
-<listitem>
-<para>
-Driver source file in C
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>sis900.h</term>
-<listitem>
-<para>
-Header file for sis900.c
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>sis900.sgml</term>
-<listitem>
-<para>
-DocBook SGML source of the document
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>sis900.txt</term>
-<listitem>
-<para>
-Driver document in plain text
-</para>
-</listitem>
-</varlistentry>
-
-</variablelist>
-</para>
-</chapter>
-
-<chapter id="install">
- <title>Installation</title>
-
-<para>
-Silicon Integrated System Corp. is cooperating closely with core Linux Kernel
-developers. The revisions of SiS 900 driver are distributed by the usuall channels
-for kernel tar files and patches. Those kernel tar files for official kernel and
-patches for kernel pre-release can be download at
-<ulink url="http://ftp.kernel.org/pub/linux/kernel/">official kernel ftp site</ulink>
-and its mirrors.
-The 1.06 revision can be found in kernel version later than 2.3.15 and pre-2.2.14,
-and 1.07 revision can be found in kernel version 2.4.0.
-If you have no prior experience in networking under Linux, please read
-<ulink url="http://www.tldp.org/">Ethernet HOWTO</ulink> and
-<ulink url="http://www.tldp.org/">Networking HOWTO</ulink> available from
-Linux Documentation Project (LDP).
-</para>
-
-<para>
-The driver is bundled in release later than 2.2.11 and 2.3.15 so this
-is the most easy case.
-Be sure you have the appropriate packages for compiling kernel source.
-Those packages are listed in Document/Changes in kernel source
-distribution. If you have to install the driver other than those bundled
-in kernel release, you should have your driver file
-<filename>sis900.c</filename> and <filename>sis900.h</filename>
-copied into <filename class="directory">/usr/src/linux/drivers/net/</filename> first.
-There are two alternative ways to install the driver
-</para>
-
-<sect1>
-<title>Building the driver as loadable module</title>
-
-<para>
-To build the driver as a loadable kernel module you have to reconfigure
-the kernel to activate network support by
-</para>
-
-<para><screen>
-make menuconfig
-</screen></para>
-
-<para>
-Choose <quote>Loadable module support ---></quote>,
-then select <quote>Enable loadable module support</quote>.
-</para>
-
-<para>
-Choose <quote>Network Device Support ---></quote>, select
-<quote>Ethernet (10 or 100Mbit)</quote>.
-Then select <quote>EISA, VLB, PCI and on board controllers</quote>,
-and choose <quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote>
-to <quote>M</quote>.
-</para>
-
-<para>
-After reconfiguring the kernel, you can make the driver module by
-</para>
-
-<para><screen>
-make modules
-</screen></para>
-
-<para>
-The driver should be compiled with no errors. After compiling the driver,
-the driver can be installed to proper place by
-</para>
-
-<para><screen>
-make modules_install
-</screen></para>
-
-<para>
-Load the driver into kernel by
-</para>
-
-<para><screen>
-insmod sis900
-</screen></para>
-
-<para>
-When loading the driver into memory, some information message can be view by
-</para>
-
-<para>
-<screen>
-dmesg
-</screen>
-
-or
-
-<screen>
-cat /var/log/message
-</screen>
-</para>
-
-<para>
-If the driver is loaded properly you will have messages similar to this:
-</para>
-
-<para><screen>
-sis900.c: v1.07.06 11/07/2000
-eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4.
-eth0: SiS 900 Internal MII PHY transceiver found at address 1.
-eth0: Using SiS 900 Internal MII PHY as default
-</screen></para>
-
-<para>
-showing the version of the driver and the results of probing routine.
-</para>
-
-<para>
-Once the driver is loaded, network can be brought up by
-</para>
-
-<para><screen>
-/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE
-</screen></para>
-
-<para>
-where IPADDR, BROADCAST, NETMASK are your IP address, broadcast address and
-netmask respectively. TYPE is used to set medium type used by the device.
-Typical values are "10baseT"(twisted-pair 10Mbps Ethernet) or "100baseT"
-(twisted-pair 100Mbps Ethernet). For more information on how to configure
-network interface, please refer to
-<ulink url="http://www.tldp.org/">Networking HOWTO</ulink>.
-</para>
-
-<para>
-The link status is also shown by kernel messages. For example, after the
-network interface is activated, you may have the message:
-</para>
-
-<para><screen>
-eth0: Media Link On 100mbps full-duplex
-</screen></para>
-
-<para>
-If you try to unplug the twist pair (TP) cable you will get
-</para>
-
-<para><screen>
-eth0: Media Link Off
-</screen></para>
-
-<para>
-indicating that the link is failed.
-</para>
-</sect1>
-
-<sect1>
-<title>Building the driver into kernel</title>
-
-<para>
-If you want to make the driver into kernel, choose <quote>Y</quote>
-rather than <quote>M</quote> on
-<quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote>
-when configuring the kernel. Build the kernel image in the usual way
-</para>
-
-<para><screen>
-make clean
-
-make bzlilo
-</screen></para>
-
-<para>
-Next time the system reboot, you have the driver in memory.
-</para>
-
-</sect1>
-</chapter>
-
-<chapter id="problems">
- <title>Known Problems and Bugs</title>
-
-<para>
-There are some known problems and bugs. If you find any other bugs please
-mail to <ulink url="mailto:lcchang@sis.com.tw">lcchang@sis.com.tw</ulink>
-
-<orderedlist>
-
-<listitem>
-<para>
-AM79C901 HomePNA PHY is not thoroughly tested, there may be some
-bugs in the <quote>on the fly</quote> change of transceiver.
-</para>
-</listitem>
-
-<listitem>
-<para>
-A bug is hidden somewhere in the receive buffer management code,
-the bug causes NULL pointer reference in the kernel. This fault is
-caught before bad things happen and reported with the message:
-
-<computeroutput>
-eth0: NULL pointer encountered in Rx ring, skipping
-</computeroutput>
-
-which can be viewed with <literal remap="tt">dmesg</literal> or
-<literal remap="tt">cat /var/log/message</literal>.
-</para>
-</listitem>
-
-<listitem>
-<para>
-The media type change from 10Mbps to 100Mbps twisted-pair ethernet
-by ifconfig causes the media link down.
-</para>
-</listitem>
-
-</orderedlist>
-</para>
-</chapter>
-
-<chapter id="RHistory">
- <title>Revision History</title>
-
-<para>
-<itemizedlist>
-
-<listitem>
-<para>
-November 13, 2000, Revision 1.07, seventh release, 630E problem fixed
-and further clean up.
-</para>
-</listitem>
-
-<listitem>
-<para>
-November 4, 1999, Revision 1.06, Second release, lots of clean up
-and optimization.
-</para>
-</listitem>
-
-<listitem>
-<para>
-August 8, 1999, Revision 1.05, Initial Public Release
-</para>
-</listitem>
-
-</itemizedlist>
-</para>
-</chapter>
-
-<chapter id="acknowledgements">
- <title>Acknowledgements</title>
-
-<para>
-This driver was originally derived form
-<ulink url="mailto:becker@cesdis1.gsfc.nasa.gov">Donald Becker</ulink>'s
-<ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/pci-skeleton.c"
->pci-skeleton</ulink> and
-<ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/rtl8139.c"
->rtl8139</ulink> drivers. Donald also provided various suggestion
-regarded with improvements made in revision 1.06.
-</para>
-
-<para>
-The 1.05 revision was created by
-<ulink url="mailto:cmhuang@sis.com.tw">Jim Huang</ulink>, AMD 79c901
-support was added by <ulink url="mailto:lcs@sis.com.tw">Chin-Shan Li</ulink>.
-</para>
-</chapter>
-
-<chapter id="functions">
-<title>List of Functions</title>
-!Idrivers/net/sis900.c
-</chapter>
-
-</book>
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 5ed85af88789..07cb93b82ba9 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -360,7 +360,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
struct foo *new_fp;
struct foo *old_fp;
- new_fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
spin_lock(&foo_mutex);
old_fp = gbl_foo;
*new_fp = *old_fp;
@@ -461,7 +461,7 @@ The foo_update_a() function might then be written as follows:
struct foo *new_fp;
struct foo *old_fp;
- new_fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
spin_lock(&foo_mutex);
old_fp = gbl_foo;
*new_fp = *old_fp;
@@ -605,7 +605,7 @@ are the same as those shown in the preceding section, so they are omitted.
{
int cpu;
- for_each_cpu(cpu)
+ for_each_possible_cpu(cpu)
run_on(cpu);
}
diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh
index ec5a6de1cd7b..97374aacacb2 100644
--- a/Documentation/aoe/mkdevs.sh
+++ b/Documentation/aoe/mkdevs.sh
@@ -27,6 +27,8 @@ rm -f $dir/discover
mknod -m 0200 $dir/discover c $MAJOR 3
rm -f $dir/interfaces
mknod -m 0200 $dir/interfaces c $MAJOR 4
+rm -f $dir/revalidate
+mknod -m 0200 $dir/revalidate c $MAJOR 5
export n_partitions
mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'`
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index ab39d8bb634c..a7ed1dc4f331 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -18,6 +18,7 @@
SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
# aoe block devices
KERNEL="etherd*", NAME="%k", GROUP="disk"
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
index fad566bb02fc..76850295af8f 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/Booting
@@ -118,7 +118,7 @@ to store page tables. The recommended placement is 32KiB into RAM.
In either case, the following conditions must be met:
-- Quiesce all DMA capable devicess so that memory does not get
+- Quiesce all DMA capable devices so that memory does not get
corrupted by bogus network packets or disk data. This will save
you many hours of debug.
diff --git a/Documentation/arm/README b/Documentation/arm/README
index 5ed6f3530b86..9b9c8226fdc4 100644
--- a/Documentation/arm/README
+++ b/Documentation/arm/README
@@ -89,7 +89,7 @@ Modules
Although modularisation is supported (and required for the FP emulator),
each module on an ARM2/ARM250/ARM3 machine when is loaded will take
memory up to the next 32k boundary due to the size of the pages.
- Therefore, modularisation on these machines really worth it?
+ Therefore, is modularisation on these machines really worth it?
However, ARM6 and up machines allow modules to take multiples of 4k, and
as such Acorn RiscPCs and other architectures using these processors can
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index cbbe5587c78d..78bc1c1b04e5 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -26,7 +26,7 @@ Installing a bootloader
A couple of bootloaders able to boot Linux on Assabet are available:
-BLOB (http://www.lart.tudelft.nl/lartware/blob/)
+BLOB (http://www.lartmaker.nl/lartware/blob/)
BLOB is a bootloader used within the LART project. Some contributed
patches were merged into BLOB to add support for Assabet.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
index 2f73f513e16a..6d412b685598 100644
--- a/Documentation/arm/SA1100/LART
+++ b/Documentation/arm/SA1100/LART
@@ -11,4 +11,4 @@ is under development, with plenty of others in different stages of
planning.
The hardware designs for this board have been released under an open license;
-see the LART page at http://www.lart.tudelft.nl/ for more information.
+see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
index 89aa89d526ac..8c6ee684174c 100644
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt
@@ -10,6 +10,8 @@ Introduction
by the 's3c2410' architecture of ARM Linux. Currently the S3C2410 and
the S3C2440 are supported CPUs.
+ Support for the S3C2400 series is in progress.
+
Configuration
-------------
@@ -32,6 +34,11 @@ Machines
A general purpose development board, see EB2410ITX.txt for further
details
+ Simtec Electronics IM2440D20 (Osiris)
+
+ CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
+ and a PCMCIA controller.
+
Samsung SMDK2410
Samsung's own development board, geared for PDA work.
@@ -85,6 +92,26 @@ Adding New Machines
mailing list information.
+I2C
+---
+
+ The hardware I2C core in the CPU is supported in single master
+ mode, and can be configured via platform data.
+
+
+RTC
+---
+
+ Support for the onboard RTC unit, including alarm function.
+
+
+Watchdog
+--------
+
+ The onchip watchdog is available via the standard watchdog
+ interface.
+
+
NAND
----
@@ -121,6 +148,15 @@ Clock Management
various clock units
+Suspend to RAM
+--------------
+
+ For boards that provide support for suspend to RAM, the
+ system can be placed into low power suspend.
+
+ See Suspend.txt for more information.
+
+
Platform Data
-------------
@@ -158,6 +194,7 @@ Platform Data
exported outside arch/arm/mach-s3c2410/, or exported to
modules via EXPORT_SYMBOL() and related functions.
+
Port Contributors
-----------------
@@ -188,8 +225,11 @@ Document Changes
08 Mar 2005 - BJD - Added LCVR to list of people, updated introduction
08 Mar 2005 - BJD - Added section on adding machines
09 Sep 2005 - BJD - Added section on platform data
+ 11 Feb 2006 - BJD - Added I2C, RTC and Watchdog sections
+ 11 Feb 2006 - BJD - Added Osiris machine, and S3C2400 information
+
Document Author
---------------
-Ben Dooks, (c) 2004-2005 Simtec Electronics
+Ben Dooks, (c) 2004-2005,2006 Simtec Electronics
diff --git a/Documentation/arm/Setup b/Documentation/arm/Setup
index 0abd0720d7ed..0cb1e64bde80 100644
--- a/Documentation/arm/Setup
+++ b/Documentation/arm/Setup
@@ -58,7 +58,7 @@ below:
video_y
This describes the character position of cursor on VGA console, and
- is otherwise unused. (should not used for other console types, and
+ is otherwise unused. (should not be used for other console types, and
should not be used for other purposes).
memc_control_reg
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8e63831971d5..f989a9e839b4 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -132,8 +132,18 @@ Some new queue property settings:
limit. No highmem default.
blk_queue_max_sectors(q, max_sectors)
- Maximum size request you can handle in units of 512 byte
- sectors. 255 default.
+ Sets two variables that limit the size of the request.
+
+ - The request queue's max_sectors, which is a soft size in
+ in units of 512 byte sectors, and could be dynamically varied
+ by the core kernel.
+
+ - The request queue's max_hw_sectors, which is a hard limit
+ and reflects the maximum size request a driver can handle
+ in units of 512 byte sectors.
+
+ The default for both max_sectors and max_hw_sectors is
+ 255. The upper limit of max_sectors is 1024.
blk_queue_max_phys_segments(q, max_segments)
Maximum physical segments you can handle in a request. 128
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 4ae418889b88..53245c429f7d 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -362,6 +362,27 @@ maps this page at its virtual address.
likely that you will need to flush the instruction cache
for copy_to_user_page().
+ void flush_anon_page(struct page *page, unsigned long vmaddr)
+ When the kernel needs to access the contents of an anonymous
+ page, it calls this function (currently only
+ get_user_pages()). Note: flush_dcache_page() deliberately
+ doesn't work for an anonymous page. The default
+ implementation is a nop (and should remain so for all coherent
+ architectures). For incoherent architectures, it should flush
+ the cache of the page at vmaddr in the current user process.
+
+ void flush_kernel_dcache_page(struct page *page)
+ When the kernel needs to modify a user page is has obtained
+ with kmap, it calls this function after all modifications are
+ complete (but before kunmapping it) to bring the underlying
+ page up to date. It is assumed here that the user has no
+ incoherent cached copies (i.e. the original page was obtained
+ from a mechanism like get_user_pages()). The default
+ implementation is a nop and should remain so on all coherent
+ architectures. On incoherent architectures, this should flush
+ the kernel cache for page (using page_address(page)).
+
+
void flush_icache_range(unsigned long start, unsigned long end)
When the kernel stores into addresses that it will execute
out of (eg when loading modules), this function is called.
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index 57a314b14cf8..ad6e0ba7b38c 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -69,10 +69,11 @@ Unregisters new callback with connector core.
struct cb_id *id - unique connector's user identifier.
-void cn_netlink_send(struct cn_msg *msg, u32 __groups, int gfp_mask);
+int cn_netlink_send(struct cn_msg *msg, u32 __groups, int gfp_mask);
Sends message to the specified groups. It can be safely called from
-any context, but may silently fail under strong memory pressure.
+softirq context, but may silently fail under strong memory pressure.
+If there are no listeners for given group -ESRCH can be returned.
struct cn_msg * - message header(with attached data).
u32 __group - destination group.
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 57a09f99ecb0..1bcf69996c9d 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -97,13 +97,13 @@ at which time hotplug is disabled.
You really dont need to manipulate any of the system cpu maps. They should
be read-only for most use. When setting up per-cpu resources almost always use
-cpu_possible_map/for_each_cpu() to iterate.
+cpu_possible_map/for_each_possible_cpu() to iterate.
Never use anything other than cpumask_t to represent bitmap of CPUs.
#include <linux/cpumask.h>
-for_each_cpu - Iterate over cpu_possible_map
+for_each_possible_cpu - Iterate over cpu_possible_map
for_each_online_cpu - Iterate over cpu_online_map
for_each_present_cpu - Iterate over cpu_present_map
for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 990998ee10b6..159e2a0c3e80 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -4,8 +4,9 @@
Copyright (C) 2004 BULL SA.
Written by Simon.Derr@bull.net
-Portions Copyright (c) 2004 Silicon Graphics, Inc.
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
CONTENTS:
=========
@@ -17,7 +18,8 @@ CONTENTS:
1.4 What are exclusive cpusets ?
1.5 What does notify_on_release do ?
1.6 What is memory_pressure ?
- 1.7 How do I use cpusets ?
+ 1.7 What is memory spread ?
+ 1.8 How do I use cpusets ?
2. Usage Examples and Syntax
2.1 Basic Usage
2.2 Adding/removing cpus
@@ -90,7 +92,8 @@ This can be especially valuable on:
These subsets, or "soft partitions" must be able to be dynamically
adjusted, as the job mix changes, without impacting other concurrently
-executing jobs.
+executing jobs. The location of the running jobs pages may also be moved
+when the memory locations are changed.
The kernel cpuset patch provides the minimum essential kernel
mechanisms required to efficiently implement such subsets. It
@@ -102,8 +105,8 @@ memory allocator code.
1.3 How are cpusets implemented ?
---------------------------------
-Cpusets provide a Linux kernel (2.6.7 and above) mechanism to constrain
-which CPUs and Memory Nodes are used by a process or set of processes.
+Cpusets provide a Linux kernel mechanism to constrain which CPUs and
+Memory Nodes are used by a process or set of processes.
The Linux kernel already has a pair of mechanisms to specify on which
CPUs a task may be scheduled (sched_setaffinity) and on which Memory
@@ -315,7 +318,78 @@ the tasks in the cpuset, in units of reclaims attempted per second,
times 1000.
-1.7 How do I use cpusets ?
+1.7 What is memory spread ?
+---------------------------
+There are two boolean flag files per cpuset that control where the
+kernel allocates pages for the file system buffers and related in
+kernel data structures. They are called 'memory_spread_page' and
+'memory_spread_slab'.
+
+If the per-cpuset boolean flag file 'memory_spread_page' is set, then
+the kernel will spread the file system buffers (page cache) evenly
+over all the nodes that the faulting task is allowed to use, instead
+of preferring to put those pages on the node where the task is running.
+
+If the per-cpuset boolean flag file 'memory_spread_slab' is set,
+then the kernel will spread some file system related slab caches,
+such as for inodes and dentries evenly over all the nodes that the
+faulting task is allowed to use, instead of preferring to put those
+pages on the node where the task is running.
+
+The setting of these flags does not affect anonymous data segment or
+stack segment pages of a task.
+
+By default, both kinds of memory spreading are off, and memory
+pages are allocated on the node local to where the task is running,
+except perhaps as modified by the tasks NUMA mempolicy or cpuset
+configuration, so long as sufficient free memory pages are available.
+
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+
+Setting memory spreading causes allocations for the affected page
+or slab caches to ignore the tasks NUMA mempolicy and be spread
+instead. Tasks using mbind() or set_mempolicy() calls to set NUMA
+mempolicies will not notice any change in these calls as a result of
+their containing tasks memory spread settings. If memory spreading
+is turned off, then the currently specified NUMA mempolicy once again
+applies to memory page allocations.
+
+Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag
+files. By default they contain "0", meaning that the feature is off
+for that cpuset. If a "1" is written to that file, then that turns
+the named feature on.
+
+The implementation is simple.
+
+Setting the flag 'memory_spread_page' turns on a per-process flag
+PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
+joins that cpuset. The page allocation calls for the page cache
+is modified to perform an inline check for this PF_SPREAD_PAGE task
+flag, and if set, a call to a new routine cpuset_mem_spread_node()
+returns the node to prefer for the allocation.
+
+Similarly, setting 'memory_spread_cache' turns on the flag
+PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
+pages from the node returned by cpuset_mem_spread_node().
+
+The cpuset_mem_spread_node() routine is also simple. It uses the
+value of a per-task rotor cpuset_mem_spread_rotor to select the next
+node in the current tasks mems_allowed to prefer for the allocation.
+
+This memory placement policy is also known (in other contexts) as
+round-robin or interleave.
+
+This policy can provide substantial improvements for jobs that need
+to place thread local data on the corresponding node, but that need
+to access large file system data sets that need to be spread across
+the several nodes in the jobs cpuset in order to fit. Without this
+policy, especially for jobs that might have one thread reading in the
+data set, the memory allocation across the nodes in the jobs cpuset
+can become very uneven.
+
+
+1.8 How do I use cpusets ?
--------------------------
In order to minimize the impact of cpusets on critical kernel
@@ -371,22 +445,17 @@ cpusets memory placement policy 'mems' subsequently changes.
If the cpuset flag file 'memory_migrate' is set true, then when
tasks are attached to that cpuset, any pages that task had
allocated to it on nodes in its previous cpuset are migrated
-to the tasks new cpuset. Depending on the implementation,
-this migration may either be done by swapping the page out,
-so that the next time the page is referenced, it will be paged
-into the tasks new cpuset, usually on the node where it was
-referenced, or this migration may be done by directly copying
-the pages from the tasks previous cpuset to the new cpuset,
-where possible to the same node, relative to the new cpuset,
-as the node that held the page, relative to the old cpuset.
+to the tasks new cpuset. The relative placement of the page within
+the cpuset is preserved during these migration operations if possible.
+For example if the page was on the second valid node of the prior cpuset
+then the page will be placed on the second valid node of the new cpuset.
+
Also if 'memory_migrate' is set true, then if that cpusets
'mems' file is modified, pages allocated to tasks in that
cpuset, that were on nodes in the previous setting of 'mems',
-will be moved to nodes in the new setting of 'mems.' Again,
-depending on the implementation, this might be done by swapping,
-or by direct copying. In either case, pages that were not in
-the tasks prior cpuset, or in the cpusets prior 'mems' setting,
-will not be moved.
+will be moved to nodes in the new setting of 'mems.'
+Pages that were not in the tasks prior cpuset, or in the cpusets
+prior 'mems' setting, will not be moved.
There is an exception to the above. If hotplug functionality is used
to remove all the CPUs that are currently assigned to a cpuset,
@@ -434,16 +503,6 @@ and then start a subshell 'sh' in that cpuset:
# The next line should display '/Charlie'
cat /proc/self/cpuset
-In the case that a change of cpuset includes wanting to move already
-allocated memory pages, consider further the work of IWAMOTO
-Toshihiro <iwamoto@valinux.co.jp> for page remapping and memory
-hotremoval, which can be found at:
-
- http://people.valinux.co.jp/~iwamoto/mh.html
-
-The integration of cpusets with such memory migration is not yet
-available.
-
In the future, a C library interface to cpusets will likely be
available. For now, the only way to query or modify cpusets is
via the cpuset file system, using the various cd, mkdir, echo, cat,
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index ff280e2e1613..2b28e9ec4e3a 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -1,5 +1,5 @@
-Export cpu topology info by sysfs. Items (attributes) are similar
+Export cpu topology info via sysfs. Items (attributes) are similar
to /proc/cpuinfo.
1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
@@ -12,7 +12,7 @@ represent the thread siblings to cpu X in the same core;
represent the thread siblings to cpu X in the same physical package;
To implement it in an architecture-neutral way, a new source file,
-driver/base/topology.c, is to export the 5 attributes.
+drivers/base/topology.c, is to export the 4 attributes.
If one architecture wants to support this feature, it just needs to
implement 4 defines, typically in file include/asm-XXX/topology.h.
diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/drivers/edac/edac.txt
index d37191fe5681..70d96a62e5e1 100644
--- a/Documentation/drivers/edac/edac.txt
+++ b/Documentation/drivers/edac/edac.txt
@@ -21,7 +21,7 @@ within the computer system. In the initial release, memory Correctable Errors
Detecting CE events, then harvesting those events and reporting them,
CAN be a predictor of future UE events. With CE events, the system can
-continue to operate, but with less safety. Preventive maintainence and
+continue to operate, but with less safety. Preventive maintenance and
proactive part replacement of memory DIMMs exhibiting CEs can reduce
the likelihood of the dreaded UE events and system 'panics'.
@@ -29,13 +29,13 @@ the likelihood of the dreaded UE events and system 'panics'.
In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices
in order to determine if errors are occurring on data transfers.
The presence of PCI Parity errors must be examined with a grain of salt.
-There are several addin adapters that do NOT follow the PCI specification
+There are several add-in adapters that do NOT follow the PCI specification
with regards to Parity generation and reporting. The specification says
the vendor should tie the parity status bits to 0 if they do not intend
to generate parity. Some vendors do not do this, and thus the parity bit
can "float" giving false positives.
-The PCI Parity EDAC device has the ability to "skip" known flakey
+The PCI Parity EDAC device has the ability to "skip" known flaky
cards during the parity scan. These are set by the parity "blacklist"
interface in the sysfs for PCI Parity. (See the PCI section in the sysfs
section below.) There is also a parity "whitelist" which is used as
@@ -101,7 +101,7 @@ Memory Controller (mc) Model
First a background on the memory controller's model abstracted in EDAC.
Each mc device controls a set of DIMM memory modules. These modules are
-layed out in a Chip-Select Row (csrowX) and Channel table (chX). There can
+laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can
be multiple csrows and two channels.
Memory controllers allow for several csrows, with 8 csrows being a typical value.
@@ -131,7 +131,7 @@ for memory DIMMs:
DIMM_B1
Labels for these slots are usually silk screened on the motherboard. Slots
-labeled 'A' are channel 0 in this example. Slots labled 'B'
+labeled 'A' are channel 0 in this example. Slots labeled 'B'
are channel 1. Notice that there are two csrows possible on a
physical DIMM. These csrows are allocated their csrow assignment
based on the slot into which the memory DIMM is placed. Thus, when 1 DIMM
@@ -140,7 +140,7 @@ is placed in each Channel, the csrows cross both DIMMs.
Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
will have 1 csrow, csrow0. csrow1 will be empty. On the other hand,
-when 2 dual ranked DIMMs are similiaryly placed, then both csrow0 and
+when 2 dual ranked DIMMs are similarly placed, then both csrow0 and
csrow1 will be populated. The pattern repeats itself for csrow2 and
csrow3.
@@ -246,7 +246,7 @@ Module Version read-only attribute file:
'mc_version'
- The EDAC CORE modules's version and compile date are shown here to
+ The EDAC CORE module's version and compile date are shown here to
indicate what EDAC is running.
@@ -423,7 +423,7 @@ Total memory managed by this csrow attribute file:
'size_mb'
This attribute file displays, in count of megabytes, of memory
- that this csrow contatins.
+ that this csrow contains.
Memory Type attribute file:
@@ -557,7 +557,7 @@ On Header Type 00 devices the primary status is looked at
for any parity error regardless of whether Parity is enabled on the
device. (The spec indicates parity is generated in some cases).
On Header Type 01 bridges, the secondary status register is also
-looked at to see if parity ocurred on the bus on the other side of
+looked at to see if parity occurred on the bus on the other side of
the bridge.
@@ -588,7 +588,7 @@ Panic on PCI PARITY Error:
'panic_on_pci_parity'
- This control files enables or disables panic'ing when a parity
+ This control files enables or disables panicking when a parity
error has been detected.
@@ -616,12 +616,12 @@ PCI Device Whitelist:
This control file allows for an explicit list of PCI devices to be
scanned for parity errors. Only devices found on this list will
- be examined. The list is a line of hexadecimel VENDOR and DEVICE
+ be examined. The list is a line of hexadecimal VENDOR and DEVICE
ID tuples:
1022:7450,1434:16a6
- One or more can be inserted, seperated by a comma.
+ One or more can be inserted, separated by a comma.
To write the above list doing the following as one command line:
@@ -639,11 +639,11 @@ PCI Device Blacklist:
This control file allows for a list of PCI devices to be
skipped for scanning.
- The list is a line of hexadecimel VENDOR and DEVICE ID tuples:
+ The list is a line of hexadecimal VENDOR and DEVICE ID tuples:
1022:7450,1434:16a6
- One or more can be inserted, seperated by a comma.
+ One or more can be inserted, separated by a comma.
To write the above list doing the following as one command line:
@@ -651,14 +651,14 @@ PCI Device Blacklist:
> /sys/devices/system/edac/pci/pci_parity_blacklist
- To display what the whitelist current contatins,
+ To display what the whitelist currently contains,
simply 'cat' the same file.
=======================================================================
PCI Vendor and Devices IDs can be obtained with the lspci command. Using
the -n option lspci will display the vendor and device IDs. The system
-adminstrator will have to determine which devices should be scanned or
+administrator will have to determine which devices should be scanned or
skipped.
@@ -669,5 +669,5 @@ Turn OFF a whitelist by an empty echo command:
echo > /sys/devices/system/edac/pci/pci_parity_whitelist
-and any previous blacklist will be utililzed.
+and any previous blacklist will be utilized.
diff --git a/Documentation/dvb/avermedia.txt b/Documentation/dvb/avermedia.txt
index 068070ff13cd..8bab8461a4af 100644
--- a/Documentation/dvb/avermedia.txt
+++ b/Documentation/dvb/avermedia.txt
@@ -1,4 +1,3 @@
-
HOWTO: Get An Avermedia DVB-T working under Linux
______________________________________________
@@ -137,11 +136,8 @@ Getting the card going
To power up the card, load the following modules in the
following order:
- * insmod dvb-core.o
- * modprobe bttv.o
- * insmod bt878.o
- * insmod dvb-bt8xx.o
- * insmod sp887x.o
+ * modprobe bttv (normally loaded automatically)
+ * modprobe dvb-bt8xx (or place dvb-bt8xx in /etc/modules)
Insertion of these modules into the running kernel will
activate the appropriate DVB device nodes. It is then possible
@@ -302,4 +298,4 @@ Further Update
Many thanks to Nigel Pearson for the updates to this document
since the recent revision of the driver.
- January 29th 2004
+ February 14th 2006
diff --git a/Documentation/dvb/bt8xx.txt b/Documentation/dvb/bt8xx.txt
index 52ed462061df..4e7614e606c5 100644
--- a/Documentation/dvb/bt8xx.txt
+++ b/Documentation/dvb/bt8xx.txt
@@ -1,118 +1,78 @@
-How to get the Nebula, PCTV, FusionHDTV Lite and Twinhan DST cards working
-==========================================================================
+How to get the bt8xx cards working
+==================================
-This class of cards has a bt878a as the PCI interface, and
-require the bttv driver.
+1) General information
+======================
-Please pay close attention to the warning about the bttv module
-options below for the DST card.
+This class of cards has a bt878a as the PCI interface, and require the bttv driver
+for accessing the i2c bus and the gpio pins of the bt8xx chipset.
+Please see Documentation/dvb/cards.txt => o Cards based on the Conexant Bt8xx PCI bridge:
-1) General informations
-=======================
-
-These drivers require the bttv driver to provide the means to access
-the i2c bus and the gpio pins of the bt8xx chipset.
-
-Because of this, you need to enable
-"Device drivers" => "Multimedia devices"
- => "Video For Linux" => "BT848 Video For Linux"
-
-Furthermore you need to enable
-"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
- => "DVB for Linux" "DVB Core Support" "BT8xx based PCI cards"
+Compiling kernel please enable:
+a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "BT848 Video For Linux"
+b.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
+ => "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
2) Loading Modules
==================
-In general you need to load the bttv driver, which will handle the gpio and
-i2c communication for us, plus the common dvb-bt8xx device driver.
-The frontends for Nebula (nxt6000), Pinnacle PCTV (cx24110), TwinHan (dst),
-FusionHDTV DVB-T Lite (mt352) and FusionHDTV5 Lite (lgdt330x) are loaded
-automatically by the dvb-bt8xx device driver.
-
-3a) Nebula / Pinnacle PCTV / FusionHDTV Lite
----------------------------------------------
-
- $ modprobe bttv (normally bttv is being loaded automatically by kmod)
- $ modprobe dvb-bt8xx
-
-(or just place dvb-bt8xx in /etc/modules for automatic loading)
-
-
-3b) TwinHan and Clones
---------------------------
+In default cases bttv is loaded automatically.
+To load the backend either place dvb-bt8xx in etc/modules, or apply manually:
- $ modprobe bttv card=0x71
- $ modprobe dvb-bt8xx
- $ modprobe dst
+ $ modprobe dvb-bt8xx
-The value 0x71 will override the PCI type detection for dvb-bt8xx,
-which is necessary for TwinHan cards. Omission of this parameter might result
-in a system lockup.
+All frontends will be loaded automatically.
+People running udev please see Documentation/dvb/udev.txt.
-If you're having an older card (blue color PCB) and card=0x71 locks up
-your machine, try using 0x68, too. If that does not work, ask on the
-mailing list.
+In the following cases overriding the PCI type detection for dvb-bt8xx might be necessary:
-The DST module takes a couple of useful parameters.
+2a) Running TwinHan and Clones
+------------------------------
-verbose takes values 0 to 4. These values control the verbosity level,
-and can be used to debug also.
+ $ modprobe bttv card=113
+ $ modprobe dvb-bt8xx
+ $ modprobe dst
-verbose=0 means complete disabling of messages
- 1 only error messages are displayed
- 2 notifications are also displayed
- 3 informational messages are also displayed
- 4 debug setting
+Useful parameters for verbosity level and debugging the dst module:
-dst_addons takes values 0 and 0x20. A value of 0 means it is a FTA card.
-0x20 means it has a Conditional Access slot.
+verbose=0: messages are disabled
+ 1: only error messages are displayed
+ 2: notifications are displayed
+ 3: other useful messages are displayed
+ 4: debug setting
+dst_addons=0: card is a free to air (FTA) card only
+ 0x20: card has a conditional access slot for scrambled channels
-The autodetected values are determined by the cards 'response string'
-which you can see in your logs e.g.
+The autodetected values are determined by the cards' "response string".
+In your logs see f. ex.: dst_get_device_id: Recognize [DSTMCI].
+For bug reports please send in a complete log with verbose=4 activated.
+Please also see Documentation/dvb/ci.txt.
-dst_get_device_id: Recognise [DSTMCI]
-
-If you need to sent in bug reports on the dst, please do send in a complete
-log with the verbose=4 module parameter. For general usage, the default setting
-of verbose=1 is ideal.
-
-
-4) Multiple cards
+2b) Running multiple cards
--------------------------
-If you happen to be running multiple cards, it would be advisable to load
-the bttv module with the card id. This would help to solve any module loading
-problems that you might face.
-
-For example, if you have a Twinhan and Clones card along with a FusionHDTV5 Lite
+Examples of card ID's:
- $ modprobe bttv card=0x71 card=0x87
-
-Here the order of the card id is important and should be the same as that of the
-physical order of the cards. Here card=0x71 represents the Twinhan and clones
-and card=0x87 represents Fusion HDTV5 Lite. These arguments can also be
-specified in decimal, rather than hex:
+Pinnacle PCTV Sat: 94
+Nebula Electronics Digi TV: 104
+pcHDTV HD-2000 TV: 112
+Twinhan DST and clones: 113
+Avermedia AverTV DVB-T 771: 123
+Avermedia AverTV DVB-T 761: 124
+DViCO FusionHDTV DVB-T Lite: 128
+DViCO FusionHDTV 5 Lite: 135
+Notice: The order of the card ID should be uprising:
+Example:
$ modprobe bttv card=113 card=135
+ $ modprobe dvb-bt8xx
-Some examples of card-id's
-
-Pinnacle Sat 0x5e (94)
-Nebula Digi TV 0x68 (104)
-PC HDTV 0x70 (112)
-Twinhan 0x71 (113)
-FusionHDTV DVB-T Lite 0x80 (128)
-FusionHDTV5 Lite 0x87 (135)
-
-For a full list of card-id's, see the V4L Documentation within the kernel
-source: linux/Documentation/video4linux/CARDLIST.bttv
-
-If you have problems with this please do ask on the mailing list.
+For a full list of card ID's please see Documentation/video4linux/CARDLIST.bttv.
+In case of further problems send questions to the mailing list: www.linuxdvb.org.
---
Authors: Richard Walker,
Jamie Honan,
Michael Hunold,
Manu Abraham,
+ Uwe Bugla,
Michael Krufky
diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware
index 75c28a174092..15fc8fbef67e 100644
--- a/Documentation/dvb/get_dvb_firmware
+++ b/Documentation/dvb/get_dvb_firmware
@@ -21,8 +21,9 @@
use File::Temp qw/ tempdir /;
use IO::Handle;
-@components = ( "sp8870", "sp887x", "tda10045", "tda10046", "av7110", "dec2000t",
- "dec2540t", "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
+@components = ( "sp8870", "sp887x", "tda10045", "tda10046",
+ "tda10046lifeview", "av7110", "dec2000t", "dec2540t",
+ "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
"or51211", "or51132_qam", "or51132_vsb", "bluebird");
# Check args
@@ -126,6 +127,24 @@ sub tda10046 {
$outfile;
}
+sub tda10046lifeview {
+ my $sourcefile = "Drv_2.11.02.zip";
+ my $url = "http://www.lifeview.com.tw/drivers/pci_card/FlyDVB-T/$sourcefile";
+ my $hash = "1ea24dee4eea8fe971686981f34fd2e0";
+ my $outfile = "dvb-fe-tda10046.fw";
+ my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
+
+ checkstandard();
+
+ wgetfile($sourcefile, $url);
+ unzip($sourcefile, $tmpdir);
+ extract("$tmpdir/LVHybrid.sys", 0x8b088, 24602, "$tmpdir/fwtmp");
+ verify("$tmpdir/fwtmp", $hash);
+ copy("$tmpdir/fwtmp", $outfile);
+
+ $outfile;
+}
+
sub av7110 {
my $sourcefile = "dvb-ttpci-01.fw-261d";
my $url = "http://www.linuxtv.org/downloads/firmware/$sourcefile";
@@ -227,7 +246,7 @@ sub vp7041 {
}
sub dibusb {
- my $url = "http://www.linuxtv.org/downloads/firmware/dvb-dibusb-5.0.0.11.fw";
+ my $url = "http://www.linuxtv.org/downloads/firmware/dvb-usb-dibusb-5.0.0.11.fw";
my $outfile = "dvb-dibusb-5.0.0.11.fw";
my $hash = "fa490295a527360ca16dcdf3224ca243";
diff --git a/Documentation/dvb/readme.txt b/Documentation/dvb/readme.txt
index f5c50b22de3b..0b0380c91990 100644
--- a/Documentation/dvb/readme.txt
+++ b/Documentation/dvb/readme.txt
@@ -20,11 +20,23 @@ http://linuxtv.org/downloads/
What's inside this directory:
+"avermedia.txt"
+contains detailed information about the
+Avermedia DVB-T cards. See also "bt8xx.txt".
+
+"bt8xx.txt"
+contains detailed information about the
+various bt8xx based "budget" DVB cards.
+
"cards.txt"
contains a list of supported hardware.
+"ci.txt"
+contains detailed information about the
+CI module as part from TwinHan cards and Clones.
+
"contributors.txt"
-is the who-is-who of DVB development
+is the who-is-who of DVB development.
"faq.txt"
contains frequently asked questions and their answers.
@@ -34,19 +46,17 @@ script to download and extract firmware for those devices
that require it.
"ttusb-dec.txt"
-contains detailed informations about the
+contains detailed information about the
TT DEC2000/DEC3000 USB DVB hardware.
-"bt8xx.txt"
-contains detailed installation instructions for the
-various bt8xx based "budget" DVB cards
-(Nebula, Pinnacle PCTV, Twinhan DST)
-
-"README.dibusb"
-contains detailed information about adapters
-based on DiBcom reference design.
-
"udev.txt"
how to get DVB and udev up and running.
+"README.dvb-usb"
+contains detailed information about the DVB USB cards.
+
+"README.flexcop"
+contains detailed information about the
+Technisat- and Flexcop B2C2 drivers.
+
Good luck and have fun!
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 81bc51369f59..59d0c74c79c9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -116,9 +116,13 @@ Who: Harald Welte <laforge@netfilter.org>
---------------------------
-What: EXPORT_SYMBOL(lookup_hash)
-When: January 2006
-Why: Too low-level interface. Use lookup_one_len or lookup_create instead.
+What: remove EXPORT_SYMBOL(kernel_thread)
+When: August 2006
+Files: arch/*/kernel/*_ksyms.c
+Why: kernel_thread is a low-level implementation detail. Drivers should
+ use the <linux/kthread.h> API instead which shields them from
+ implementation details and provides a higherlevel interface that
+ prevents bugs and code duplication
Who: Christoph Hellwig <hch@lst.de>
---------------------------
@@ -151,10 +155,10 @@ Who: Ralf Baechle <ralf@linux-mips.org>
---------------------------
-What: Legacy /proc/pci interface (PCI_LEGACY_PROC)
-When: March 2006
-Why: deprecated since 2.5.53 in favor of lspci(8)
-Who: Adrian Bunk <bunk@stusta.de>
+What: eepro100 network driver
+When: January 2007
+Why: replaced by the e100 driver
+Who: Adrian Bunk <bunk@stusta.de>
---------------------------
@@ -165,6 +169,18 @@ Who: Richard Knutsson <ricknu-0@student.ltu.se> and Greg Kroah-Hartman <gregkh@s
---------------------------
+What: Usage of invalid timevals in setitimer
+When: March 2007
+Why: POSIX requires to validate timevals in the setitimer call. This
+ was never done by Linux. The invalid (e.g. negative timevals) were
+ silently converted to more or less random timeouts and intervals.
+ Until the removal a per boot limited number of warnings is printed
+ and the timevals are sanitized.
+
+Who: Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
What: I2C interface of the it87 driver
When: January 2007
Why: The ISA interface is faster and should be always available. The I2C
@@ -174,6 +190,17 @@ Who: Jean Delvare <khali@linux-fr.org>
---------------------------
+What: remove EXPORT_SYMBOL(tasklist_lock)
+When: August 2006
+Files: kernel/fork.c
+Why: tasklist_lock protects the kernel internal task list. Modules have
+ no business looking at it, and all instances in drivers have been due
+ to use of too-lowlevel APIs. Having this symbol exported prevents
+ moving to more scalable locking schemes for the task list.
+Who: Christoph Hellwig <hch@lst.de>
+
+---------------------------
+
What: mount/umount uevents
When: February 2007
Why: These events are not correct, and do not properly let userspace know
@@ -189,3 +216,33 @@ Why: Board specific code doesn't build anymore since ~2.6.0 and no
users have complained indicating there is no more need for these
boards. This should really be considered a last call.
Who: Ralf Baechle <ralf@linux-mips.org>
+
+---------------------------
+
+What: USB driver API moves to EXPORT_SYMBOL_GPL
+When: Febuary 2008
+Files: include/linux/usb.h, drivers/usb/core/driver.c
+Why: The USB subsystem has changed a lot over time, and it has been
+ possible to create userspace USB drivers using usbfs/libusb/gadgetfs
+ that operate as fast as the USB bus allows. Because of this, the USB
+ subsystem will not be allowing closed source kernel drivers to
+ register with it, after this grace period is over. If anyone needs
+ any help in converting their closed source drivers over to use the
+ userspace filesystems, please contact the
+ linux-usb-devel@lists.sourceforge.net mailing list, and the developers
+ there will be glad to help you out.
+Who: Greg Kroah-Hartman <gregkh@suse.de>
+
+---------------------------
+
+What: find_trylock_page
+When: January 2007
+Why: The interface no longer has any callers left in the kernel. It
+ is an odd interface (compared with other find_*_page functions), in
+ that it does not take a refcount to the page, only the page lock.
+ It should be replaced with find_get_page or find_lock_page if possible.
+ This feature removal can be reevaluated if users of the interface
+ cannot cleanly use something else.
+Who: Nick Piggin <npiggin@suse.de>
+
+---------------------------
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 74052d22d868..66fdc0744fe0 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -1,27 +1,47 @@
00-INDEX
- this file (info on some of the filesystems supported by linux).
+Exporting
+ - explanation of how to make filesystems exportable.
Locking
- info on locking rules as they pertain to Linux VFS.
adfs.txt
- info and mount options for the Acorn Advanced Disc Filing System.
+afs.txt
+ - info and examples for the distributed AFS (Andrew File System) fs.
affs.txt
- info and mount options for the Amiga Fast File System.
+automount-support.txt
+ - information about filesystem automount support.
+befs.txt
+ - information about the BeOS filesystem for Linux.
bfs.txt
- info for the SCO UnixWare Boot Filesystem (BFS).
cifs.txt
- - description of the CIFS filesystem
+ - description of the CIFS filesystem.
coda.txt
- description of the CODA filesystem.
configfs/
- directory containing configfs documentation and example code.
cramfs.txt
- - info on the cram filesystem for small storage (ROMs etc)
+ - info on the cram filesystem for small storage (ROMs etc).
+dentry-locking.txt
+ - info on the RCU-based dcache locking model.
devfs/
- directory containing devfs documentation.
+directory-locking
+ - info about the locking scheme used for directory operations.
dlmfs.txt
- info on the userspace interface to the OCFS2 DLM.
ext2.txt
- info, mount options and specifications for the Ext2 filesystem.
+ext3.txt
+ - info, mount options and specifications for the Ext3 filesystem.
+files.txt
+ - info on file management in the Linux kernel.
+fuse.txt
+ - info on the Filesystem in User SpacE including mount options.
+hfs.txt
+ - info on the Macintosh HFS Filesystem for Linux.
hpfs.txt
- info and mount options for the OS/2 HPFS.
isofs.txt
@@ -32,23 +52,43 @@ ncpfs.txt
- info on Novell Netware(tm) filesystem using NCP protocol.
ntfs.txt
- info and mount options for the NTFS filesystem (Windows NT).
-proc.txt
- - info on Linux's /proc filesystem.
ocfs2.txt
- info and mount options for the OCFS2 clustered filesystem.
+porting
+ - various information on filesystem porting.
+proc.txt
+ - info on Linux's /proc filesystem.
+ramfs-rootfs-initramfs.txt
+ - info on the 'in memory' filesystems ramfs, rootfs and initramfs.
+reiser4.txt
+ - info on the Reiser4 filesystem based on dancing tree algorithms.
+relayfs.txt
+ - info on relayfs, for efficient streaming from kernel to user space.
romfs.txt
- - Description of the ROMFS filesystem.
+ - description of the ROMFS filesystem.
smbfs.txt
- - info on using filesystems with the SMB protocol (Windows 3.11 and NT)
+ - info on using filesystems with the SMB protocol (Win 3.11 and NT).
+spufs.txt
+ - info and mount options for the SPU filesystem used on Cell.
+sysfs-pci.txt
+ - info on accessing PCI device resources through sysfs.
+sysfs.txt
+ - info on sysfs, a ram-based filesystem for exporting kernel objects.
sysv-fs.txt
- info on the SystemV/V7/Xenix/Coherent filesystem.
+tmpfs.txt
+ - info on tmpfs, a filesystem that holds all files in virtual memory.
udf.txt
- info and mount options for the UDF filesystem.
ufs.txt
- info on the ufs filesystem.
+v9fs.txt
+ - v9fs is a Unix implementation of the Plan 9 9p remote fs protocol.
vfat.txt
- info on using the VFAT filesystem used in Windows NT and Windows 95
vfs.txt
- - Overview of the Virtual File System
+ - overview of the Virtual File System
xfs.txt
- info and mount options for the XFS filesystem.
+xip.txt
+ - info on execute-in-place for file mappings.
diff --git a/Documentation/filesystems/v9fs.txt b/Documentation/filesystems/9p.txt
index 24c7a9c41f0d..43b89c214d20 100644
--- a/Documentation/filesystems/v9fs.txt
+++ b/Documentation/filesystems/9p.txt
@@ -1,5 +1,5 @@
- V9FS: 9P2000 for Linux
- ======================
+ v9fs: Plan 9 Resource Sharing for Linux
+ =======================================
ABOUT
=====
@@ -9,18 +9,19 @@ v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
This software was originally developed by Ron Minnich <rminnich@lanl.gov>
and Maya Gokhale <maya@lanl.gov>. Additional development by Greg Watson
<gwatson@lanl.gov> and most recently Eric Van Hensbergen
-<ericvh@gmail.com> and Latchesar Ionkov <lucho@ionkov.net>.
+<ericvh@gmail.com>, Latchesar Ionkov <lucho@ionkov.net> and Russ Cox
+<rsc@swtch.com>.
USAGE
=====
For remote file server:
- mount -t 9P 10.10.1.2 /mnt/9
+ mount -t 9p 10.10.1.2 /mnt/9
For Plan 9 From User Space applications (http://swtch.com/plan9)
- mount -t 9P `namespace`/acme /mnt/9 -o proto=unix,name=$USER
+ mount -t 9p `namespace`/acme /mnt/9 -o proto=unix,uname=$USER
OPTIONS
=======
@@ -32,7 +33,7 @@ OPTIONS
fd - used passed file descriptors for connection
(see rfdno and wfdno)
- name=name user name to attempt mount as on the remote server. The
+ uname=name user name to attempt mount as on the remote server. The
server may override or ignore this value. Certain user
names may require authentication.
@@ -42,7 +43,7 @@ OPTIONS
debug=n specifies debug level. The debug level is a bitmask.
0x01 = display verbose error messages
0x02 = developer debug (DEBUG_CURRENT)
- 0x04 = display 9P trace
+ 0x04 = display 9p trace
0x08 = display VFS trace
0x10 = display Marshalling debug
0x20 = display RPC debug
@@ -53,11 +54,11 @@ OPTIONS
wfdno=n the file descriptor for writing with proto=fd
- maxdata=n the number of bytes to use for 9P packet payload (msize)
+ maxdata=n the number of bytes to use for 9p packet payload (msize)
port=n port to connect to on the remote server
- noextend force legacy mode (no 9P2000.u semantics)
+ noextend force legacy mode (no 9p2000.u semantics)
uid attempt to mount as a particular uid
@@ -72,7 +73,7 @@ OPTIONS
RESOURCES
=========
-The Linux version of the 9P server is now maintained under the npfs project
+The Linux version of the 9p server is now maintained under the npfs project
on sourceforge (http://sourceforge.net/projects/npfs).
There are user and developer mailing lists available through the v9fs project
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
index 424585ff6ea1..758e50401c16 100644
--- a/Documentation/filesystems/isofs.txt
+++ b/Documentation/filesystems/isofs.txt
@@ -9,9 +9,9 @@ when using discs encoded using Microsoft's Joliet extensions.
iocharset=name Character set to use for converting from Unicode to
ASCII. Joliet filenames are stored in Unicode format, but
Unix for the most part doesn't know how to deal with Unicode.
- There is also an option of doing UTF8 translations with the
+ There is also an option of doing UTF-8 translations with the
utf8 option.
- utf8 Encode Unicode names in UTF8 format. Default is no.
+ utf8 Encode Unicode names in UTF-8 format. Default is no.
Mount options unique to the isofs filesystem.
block=512 Set the block size for the disk to 512 bytes
diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt
index 3e992daf99ad..bae128663748 100644
--- a/Documentation/filesystems/jfs.txt
+++ b/Documentation/filesystems/jfs.txt
@@ -6,7 +6,7 @@ The following mount options are supported:
iocharset=name Character set to use for converting from Unicode to
ASCII. The default is to do no conversion. Use
- iocharset=utf8 for UTF8 translations. This requires
+ iocharset=utf8 for UTF-8 translations. This requires
CONFIG_NLS_UTF8 to be set in the kernel .config file.
iocharset=none specifies the default behavior explicitly.
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 251168587899..638cbd3d2b00 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -457,6 +457,11 @@ ChangeLog
Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
+2.1.27:
+ - Implement page migration support so the kernel can move memory used
+ by NTFS files and directories around for management purposes.
+ - Add support for writing to sparse files created with Windows XP SP2.
+ - Many minor improvements and bug fixes.
2.1.26:
- Implement support for sector sizes above 512 bytes (up to the maximum
supported by NTFS which is 4096 bytes).
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 944cf109a6f5..99902ae6804e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -121,7 +121,7 @@ Table 1-1: Process specific entries in /proc
..............................................................................
File Content
cmdline Command line arguments
- cpu Current and last cpu in wich it was executed (2.4)(smp)
+ cpu Current and last cpu in which it was executed (2.4)(smp)
cwd Link to the current working directory
environ Values of environment variables
exe Link to the executable of this process
@@ -309,13 +309,13 @@ is the same by default:
> cat /proc/irq/0/smp_affinity
ffffffff
-It's a bitmask, in wich you can specify wich CPUs can handle the IRQ, you can
+It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can
set it by doing:
> echo 1 > /proc/irq/prof_cpu_mask
This means that only the first CPU will handle the IRQ, but you can also echo 5
-wich means that only the first and fourth CPU can handle the IRQ.
+which means that only the first and fourth CPU can handle the IRQ.
The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
between all the CPUs which are allowed to handle it. As usual the kernel has
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
index e5213bc301f7..511b4230c053 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.txt
@@ -26,6 +26,20 @@ The following mount options are supported:
nostrict Unset strict conformance
iocharset= Set the NLS character set
+The uid= and gid= options need a bit more explaining. They will accept a
+decimal numeric value which will be used as the default ID for that mount.
+They will also accept the string "ignore" and "forget". For files on the disk
+that are owned by nobody ( -1 ), they will instead look as if they are owned
+by the default ID. The ignore option causes the default ID to override all
+IDs on the disk, not just -1. The forget option causes all IDs to be written
+to disk as -1, so when the media is later remounted, they will appear to be
+owned by whatever default ID it is mounted with at that time.
+
+For typical desktop use of removable media, you should set the ID to that
+of the interactively logged on user, and also specify both the forget and
+ignore options. This way the interactive user will always see the files
+on the disk as belonging to him.
+
The remaining are for debugging and disaster recovery:
novrs Skip volume sequence recognition
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 5ead20c6c744..2001abbc60e6 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -28,16 +28,16 @@ iocharset=name -- Character set to use for converting between the
know how to deal with Unicode.
By default, FAT_DEFAULT_IOCHARSET setting is used.
- There is also an option of doing UTF8 translations
+ There is also an option of doing UTF-8 translations
with the utf8 option.
NOTE: "iocharset=utf8" is not recommended. If unsure,
you should consider the following option instead.
-utf8=<bool> -- UTF8 is the filesystem safe version of Unicode that
+utf8=<bool> -- UTF-8 is the filesystem safe version of Unicode that
is used by the console. It can be be enabled for the
filesystem with this option. If 'uni_xlate' gets set,
- UTF8 gets disabled.
+ UTF-8 gets disabled.
uni_xlate=<bool> -- Translate unhandled Unicode characters to special
escaped sequences. This would let you backup and
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index e56e842847d3..adaa899e5c90 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -230,10 +230,15 @@ only called from a process context (i.e. not from an interrupt handler
or bottom half).
alloc_inode: this method is called by inode_alloc() to allocate memory
- for struct inode and initialize it.
+ for struct inode and initialize it. If this function is not
+ defined, a simple 'struct inode' is allocated. Normally
+ alloc_inode will be used to allocate a larger structure which
+ contains a 'struct inode' embedded within it.
destroy_inode: this method is called by destroy_inode() to release
- resources allocated for struct inode.
+ resources allocated for struct inode. It is only required if
+ ->alloc_inode was defined and simply undoes anything done by
+ ->alloc_inode.
read_inode: this method is called to read a specific inode from the
mounted filesystem. The i_ino member in the struct inode is
@@ -443,14 +448,81 @@ otherwise noted.
The Address Space Object
========================
-The address space object is used to identify pages in the page cache.
-
+The address space object is used to group and manage pages in the page
+cache. It can be used to keep track of the pages in a file (or
+anything else) and also track the mapping of sections of the file into
+process address spaces.
+
+There are a number of distinct yet related services that an
+address-space can provide. These include communicating memory
+pressure, page lookup by address, and keeping track of pages tagged as
+Dirty or Writeback.
+
+The first can be used independently to the others. The VM can try to
+either write dirty pages in order to clean them, or release clean
+pages in order to reuse them. To do this it can call the ->writepage
+method on dirty pages, and ->releasepage on clean pages with
+PagePrivate set. Clean pages without PagePrivate and with no external
+references will be released without notice being given to the
+address_space.
+
+To achieve this functionality, pages need to be placed on an LRU with
+lru_cache_add and mark_page_active needs to be called whenever the
+page is used.
+
+Pages are normally kept in a radix tree index by ->index. This tree
+maintains information about the PG_Dirty and PG_Writeback status of
+each page, so that pages with either of these flags can be found
+quickly.
+
+The Dirty tag is primarily used by mpage_writepages - the default
+->writepages method. It uses the tag to find dirty pages to call
+->writepage on. If mpage_writepages is not used (i.e. the address
+provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is
+almost unused. write_inode_now and sync_inode do use it (through
+__sync_single_inode) to check if ->writepages has been successful in
+writing out the whole address_space.
+
+The Writeback tag is used by filemap*wait* and sync_page* functions,
+via wait_on_page_writeback_range, to wait for all writeback to
+complete. While waiting ->sync_page (if defined) will be called on
+each page that is found to require writeback.
+
+An address_space handler may attach extra information to a page,
+typically using the 'private' field in the 'struct page'. If such
+information is attached, the PG_Private flag should be set. This will
+cause various VM routines to make extra calls into the address_space
+handler to deal with that data.
+
+An address space acts as an intermediate between storage and
+application. Data is read into the address space a whole page at a
+time, and provided to the application either by copying of the page,
+or by memory-mapping the page.
+Data is written into the address space by the application, and then
+written-back to storage typically in whole pages, however the
+address_space has finer control of write sizes.
+
+The read process essentially only requires 'readpage'. The write
+process is more complicated and uses prepare_write/commit_write or
+set_page_dirty to write data into the address_space, and writepage,
+sync_page, and writepages to writeback data to storage.
+
+Adding and removing pages to/from an address_space is protected by the
+inode's i_mutex.
+
+When data is written to a page, the PG_Dirty flag should be set. It
+typically remains set until writepage asks for it to be written. This
+should clear PG_Dirty and set PG_Writeback. It can be actually
+written at any point after PG_Dirty is clear. Once it is known to be
+safe, PG_Writeback is cleared.
+
+Writeback makes use of a writeback_control structure...
struct address_space_operations
-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. As of kernel 2.6.13, the following members are defined:
+your filesystem. As of kernel 2.6.16, the following members are defined:
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -469,47 +541,148 @@ struct address_space_operations {
loff_t offset, unsigned long nr_segs);
struct page* (*get_xip_page)(struct address_space *, sector_t,
int);
+ /* migrate the contents of a page to the specified target */
+ int (*migratepage) (struct page *, struct page *);
};
- writepage: called by the VM write a dirty page to backing store.
+ writepage: called by the VM to write a dirty page to backing store.
+ This may happen for data integrity reasons (i.e. 'sync'), or
+ to free up memory (flush). The difference can be seen in
+ wbc->sync_mode.
+ The PG_Dirty flag has been cleared and PageLocked is true.
+ writepage should start writeout, should set PG_Writeback,
+ and should make sure the page is unlocked, either synchronously
+ or asynchronously when the write operation completes.
+
+ If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
+ try too hard if there are problems, and may choose to write out
+ other pages from the mapping if that is easier (e.g. due to
+ internal dependencies). If it chooses not to start writeout, it
+ should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep
+ calling ->writepage on that page.
+
+ See the file "Locking" for more details.
readpage: called by the VM to read a page from backing store.
+ The page will be Locked when readpage is called, and should be
+ unlocked and marked uptodate once the read completes.
+ If ->readpage discovers that it needs to unlock the page for
+ some reason, it can do so, and then return AOP_TRUNCATED_PAGE.
+ In this case, the page will be relocated, relocked and if
+ that all succeeds, ->readpage will be called again.
sync_page: called by the VM to notify the backing store to perform all
queued I/O operations for a page. I/O operations for other pages
associated with this address_space object may also be performed.
+ This function is optional and is called only for pages with
+ PG_Writeback set while waiting for the writeback to complete.
+
writepages: called by the VM to write out pages associated with the
- address_space object.
+ address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then
+ the writeback_control will specify a range of pages that must be
+ written out. If it is WBC_SYNC_NONE, then a nr_to_write is given
+ and that many pages should be written if possible.
+ If no ->writepages is given, then mpage_writepages is used
+ instead. This will choose pages from the address space that are
+ tagged as DIRTY and will pass them to ->writepage.
set_page_dirty: called by the VM to set a page dirty.
+ This is particularly needed if an address space attaches
+ private data to a page, and that data needs to be updated when
+ a page is dirtied. This is called, for example, when a memory
+ mapped page gets modified.
+ If defined, it should set the PageDirty flag, and the
+ PAGECACHE_TAG_DIRTY tag in the radix tree.
readpages: called by the VM to read pages associated with the address_space
- object.
+ object. This is essentially just a vector version of
+ readpage. Instead of just one page, several pages are
+ requested.
+ readpages is only used for read-ahead, so read errors are
+ ignored. If anything goes wrong, feel free to give up.
prepare_write: called by the generic write path in VM to set up a write
- request for a page.
-
- commit_write: called by the generic write path in VM to write page to
- its backing store.
+ request for a page. This indicates to the address space that
+ the given range of bytes is about to be written. The
+ address_space should check that the write will be able to
+ complete, by allocating space if necessary and doing any other
+ internal housekeeping. If the write will update parts of
+ any basic-blocks on storage, then those blocks should be
+ pre-read (if they haven't been read already) so that the
+ updated blocks can be written out properly.
+ The page will be locked. If prepare_write wants to unlock the
+ page it, like readpage, may do so and return
+ AOP_TRUNCATED_PAGE.
+ In this case the prepare_write will be retried one the lock is
+ regained.
+
+ commit_write: If prepare_write succeeds, new data will be copied
+ into the page and then commit_write will be called. It will
+ typically update the size of the file (if appropriate) and
+ mark the inode as dirty, and do any other related housekeeping
+ operations. It should avoid returning an error if possible -
+ errors should have been handled by prepare_write.
bmap: called by the VFS to map a logical block offset within object to
- physical block number. This method is use by for the legacy FIBMAP
- ioctl. Other uses are discouraged.
-
- invalidatepage: called by the VM on truncate to disassociate a page from its
- address_space mapping.
-
- releasepage: called by the VFS to release filesystem specific metadata from
- a page.
-
- direct_IO: called by the VM for direct I/O writes and reads.
+ physical block number. This method is used by the FIBMAP
+ ioctl and for working with swap-files. To be able to swap to
+ a file, the file must have a stable mapping to a block
+ device. The swap system does not go through the filesystem
+ but instead uses bmap to find out where the blocks in the file
+ are and uses those addresses directly.
+
+
+ invalidatepage: If a page has PagePrivate set, then invalidatepage
+ will be called when part or all of the page is to be removed
+ from the address space. This generally corresponds to either a
+ truncation or a complete invalidation of the address space
+ (in the latter case 'offset' will always be 0).
+ Any private data associated with the page should be updated
+ to reflect this truncation. If offset is 0, then
+ the private data should be released, because the page
+ must be able to be completely discarded. This may be done by
+ calling the ->releasepage function, but in this case the
+ release MUST succeed.
+
+ releasepage: releasepage is called on PagePrivate pages to indicate
+ that the page should be freed if possible. ->releasepage
+ should remove any private data from the page and clear the
+ PagePrivate flag. It may also remove the page from the
+ address_space. If this fails for some reason, it may indicate
+ failure with a 0 return value.
+ This is used in two distinct though related cases. The first
+ is when the VM finds a clean page with no active users and
+ wants to make it a free page. If ->releasepage succeeds, the
+ page will be removed from the address_space and become free.
+
+ The second case if when a request has been made to invalidate
+ some or all pages in an address_space. This can happen
+ through the fadvice(POSIX_FADV_DONTNEED) system call or by the
+ filesystem explicitly requesting it as nfs and 9fs do (when
+ they believe the cache may be out of date with storage) by
+ calling invalidate_inode_pages2().
+ If the filesystem makes such a call, and needs to be certain
+ that all pages are invalidated, then its releasepage will
+ need to ensure this. Possibly it can clear the PageUptodate
+ bit if it cannot free private data yet.
+
+ direct_IO: called by the generic read/write routines to perform
+ direct_IO - that is IO requests which bypass the page cache
+ and transfer data directly between the storage and the
+ application's address space.
get_xip_page: called by the VM to translate a block number to a page.
The page is valid until the corresponding filesystem is unmounted.
Filesystems that want to use execute-in-place (XIP) need to implement
it. An example implementation can be found in fs/ext2/xip.c.
+ migrate_page: This is used to compact the physical memory usage.
+ If the VM wants to relocate a page (maybe off a memory card
+ that is signalling imminent failure) it will pass a new page
+ and an old page to this function. migrate_page should
+ transfer any private data across and update any references
+ that it has to the page.
The File Object
===============
diff --git a/Documentation/firmware_class/firmware_sample_driver.c b/Documentation/firmware_class/firmware_sample_driver.c
index d3ad2c24490a..ad3edaba4533 100644
--- a/Documentation/firmware_class/firmware_sample_driver.c
+++ b/Documentation/firmware_class/firmware_sample_driver.c
@@ -23,7 +23,6 @@ char __init inkernel_firmware[] = "let's say that this is firmware\n";
#endif
static struct device ghost_device = {
- .name = "Ghost Device",
.bus_id = "ghost0",
};
@@ -92,7 +91,7 @@ static void sample_probe_async(void)
{
/* Let's say that I can't sleep */
int error;
- error = request_firmware_nowait (THIS_MODULE,
+ error = request_firmware_nowait (THIS_MODULE, FW_ACTION_NOHOTPLUG,
"sample_driver_fw", &ghost_device,
"my device pointer",
sample_probe_async_cont);
diff --git a/Documentation/firmware_class/firmware_sample_firmware_class.c b/Documentation/firmware_class/firmware_sample_firmware_class.c
index 57b956aecbc5..9e1b0e4051cd 100644
--- a/Documentation/firmware_class/firmware_sample_firmware_class.c
+++ b/Documentation/firmware_class/firmware_sample_firmware_class.c
@@ -172,7 +172,6 @@ static void fw_remove_class_device(struct class_device *class_dev)
static struct class_device *class_dev;
static struct device my_device = {
- .name = "Sample Device",
.bus_id = "my_dev0",
};
diff --git a/Documentation/hwmon/w83627hf b/Documentation/hwmon/w83627hf
index bbeaba680443..792231921241 100644
--- a/Documentation/hwmon/w83627hf
+++ b/Documentation/hwmon/w83627hf
@@ -18,6 +18,10 @@ Supported chips:
Prefix: 'w83637hf'
Addresses scanned: ISA address retrieved from Super I/O registers
Datasheet: http://www.winbond.com/PDF/sheet/w83637hf.pdf
+ * Winbond W83687THF
+ Prefix: 'w83687thf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Provided by Winbond on request
Authors:
Frodo Looijaard <frodol@dds.nl>,
diff --git a/Documentation/hwmon/w83781d b/Documentation/hwmon/w83781d
index e5459333ba68..b1e9f80098ee 100644
--- a/Documentation/hwmon/w83781d
+++ b/Documentation/hwmon/w83781d
@@ -36,6 +36,11 @@ Module parameters
Use 'init=0' to bypass initializing the chip.
Try this if your computer crashes when you load the module.
+* reset int
+ (default 0)
+ The driver used to reset the chip on load, but does no more. Use
+ 'reset=1' to restore the old behavior. Report if you need to do this.
+
force_subclients=bus,caddr,saddr,saddr
This is used to force the i2c addresses for subclients of
a certain chip. Typical usage is `force_subclients=0,0x2d,0x4a,0x4b'
@@ -123,6 +128,25 @@ When an alarm goes off, you can be warned by a beeping signal through
your computer speaker. It is possible to enable all beeping globally,
or only the beeping for some alarms.
+Individual alarm and beep bits:
+
+0x000001: in0
+0x000002: in1
+0x000004: in2
+0x000008: in3
+0x000010: temp1
+0x000020: temp2 (+temp3 on W83781D)
+0x000040: fan1
+0x000080: fan2
+0x000100: in4
+0x000200: in5
+0x000400: in6
+0x000800: fan3
+0x001000: chassis
+0x002000: temp3 (W83782D and W83627HF only)
+0x010000: in7 (W83782D and W83627HF only)
+0x020000: in8 (W83782D and W83627HF only)
+
If an alarm triggers, it will remain triggered until the hardware register
is read at least once. This means that the cause for the alarm may
already have disappeared! Note that in the current implementation, all
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index 856b4b8b962c..a1c8f581afed 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -4,7 +4,7 @@ Supported adapters:
* Intel 82371AB PIIX4 and PIIX4E
* Intel 82443MX (440MX)
Datasheet: Publicly available at the Intel website
- * ServerWorks OSB4, CSB5 and CSB6 southbridges
+ * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges
Datasheet: Only available via NDA from ServerWorks
* Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
Datasheet: Publicly available at the SMSC website http://www.smsc.com
diff --git a/Documentation/i2c/busses/scx200_acb b/Documentation/i2c/busses/scx200_acb
index 08c8cd1df60c..f50e69981ec6 100644
--- a/Documentation/i2c/busses/scx200_acb
+++ b/Documentation/i2c/busses/scx200_acb
@@ -6,9 +6,10 @@ Module Parameters
-----------------
* base: int
- Base addresses for the ACCESS.bus controllers
+ Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices
Description
-----------
-Enable the use of the ACCESS.bus controllers of a SCx200 processor.
+Enable the use of the ACCESS.bus controller on the Geode SCx200 and
+SC1100 processors and the CS5535 and CS5536 Geode companion devices.
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index aa7ba00ec082..171a44ebd939 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -78,8 +78,6 @@ Code Seq# Include File Comments
'#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem
'1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl
<ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
-'6' 00-10 <asm-i386/processor.h> Intel IA32 microcode update driver
- <mailto:tigran@veritas.com>
'8' all SNP8023 advanced NIC card
<mailto:mcr@solidum.com>
'A' 00-1F linux/apm_bios.h
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 443230b43e09..a9c00facdf40 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -17,6 +17,7 @@ This document describes the Linux kernel Makefiles.
--- 3.8 Command line dependency
--- 3.9 Dependency tracking
--- 3.10 Special Rules
+ --- 3.11 $(CC) support functions
=== 4 Host Program support
--- 4.1 Simple Host Program
@@ -38,7 +39,6 @@ This document describes the Linux kernel Makefiles.
--- 6.6 Commands useful for building a boot image
--- 6.7 Custom kbuild commands
--- 6.8 Preprocessing linker scripts
- --- 6.9 $(CC) support functions
=== 7 Kbuild Variables
=== 8 Makefile language
@@ -106,9 +106,9 @@ This document is aimed towards normal developers and arch developers.
Most Makefiles within the kernel are kbuild Makefiles that use the
kbuild infrastructure. This chapter introduce the syntax used in the
kbuild makefiles.
-The preferred name for the kbuild files is 'Kbuild' but 'Makefile' will
-continue to be supported. All new developmen is expected to use the
-Kbuild filename.
+The preferred name for the kbuild files are 'Makefile' but 'Kbuild' can
+be used and if both a 'Makefile' and a 'Kbuild' file exists then the 'Kbuild'
+file will be used.
Section 3.1 "Goal definitions" is a quick intro, further chapters provide
more details, with real examples.
@@ -385,6 +385,102 @@ more details, with real examples.
to prerequisites are referenced with $(src) (because they are not
generated files).
+--- 3.11 $(CC) support functions
+
+ The kernel may be build with several different versions of
+ $(CC), each supporting a unique set of features and options.
+ kbuild provide basic support to check for valid options for $(CC).
+ $(CC) is useally the gcc compiler, but other alternatives are
+ available.
+
+ as-option
+ as-option is used to check if $(CC) when used to compile
+ assembler (*.S) files supports the given option. An optional
+ second option may be specified if first option are not supported.
+
+ Example:
+ #arch/sh/Makefile
+ cflags-y += $(call as-option,-Wa$(comma)-isa=$(isa-y),)
+
+ In the above example cflags-y will be assinged the the option
+ -Wa$(comma)-isa=$(isa-y) if it is supported by $(CC).
+ The second argument is optional, and if supplied will be used
+ if first argument is not supported.
+
+ cc-option
+ cc-option is used to check if $(CC) support a given option, and not
+ supported to use an optional second option.
+
+ Example:
+ #arch/i386/Makefile
+ cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
+
+ In the above example cflags-y will be assigned the option
+ -march=pentium-mmx if supported by $(CC), otherwise -march-i586.
+ The second argument to cc-option is optional, and if omitted
+ cflags-y will be assigned no value if first option is not supported.
+
+ cc-option-yn
+ cc-option-yn is used to check if gcc supports a given option
+ and return 'y' if supported, otherwise 'n'.
+
+ Example:
+ #arch/ppc/Makefile
+ biarch := $(call cc-option-yn, -m32)
+ aflags-$(biarch) += -a32
+ cflags-$(biarch) += -m32
+
+ In the above example $(biarch) is set to y if $(CC) supports the -m32
+ option. When $(biarch) equals to y the expanded variables $(aflags-y)
+ and $(cflags-y) will be assigned the values -a32 and -m32.
+
+ cc-option-align
+ gcc version >= 3.0 shifted type of options used to speify
+ alignment of functions, loops etc. $(cc-option-align) whrn used
+ as prefix to the align options will select the right prefix:
+ gcc < 3.00
+ cc-option-align = -malign
+ gcc >= 3.00
+ cc-option-align = -falign
+
+ Example:
+ CFLAGS += $(cc-option-align)-functions=4
+
+ In the above example the option -falign-functions=4 is used for
+ gcc >= 3.00. For gcc < 3.00 -malign-functions=4 is used.
+
+ cc-version
+ cc-version return a numerical version of the $(CC) compiler version.
+ The format is <major><minor> where both are two digits. So for example
+ gcc 3.41 would return 0341.
+ cc-version is useful when a specific $(CC) version is faulty in one
+ area, for example the -mregparm=3 were broken in some gcc version
+ even though the option was accepted by gcc.
+
+ Example:
+ #arch/i386/Makefile
+ cflags-y += $(shell \
+ if [ $(call cc-version) -ge 0300 ] ; then \
+ echo "-mregparm=3"; fi ;)
+
+ In the above example -mregparm=3 is only used for gcc version greater
+ than or equal to gcc 3.0.
+
+ cc-ifversion
+ cc-ifversion test the version of $(CC) and equals last argument if
+ version expression is true.
+
+ Example:
+ #fs/reiserfs/Makefile
+ EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0402, -O1)
+
+ In this example EXTRA_CFLAGS will be assigned the value -O1 if the
+ $(CC) version is less than 4.2.
+ cc-ifversion takes all the shell operators:
+ -eq, -ne, -lt, -le, -gt, and -ge
+ The third parameter may be a text as in this example, but it may also
+ be an expanded variable or a macro.
+
=== 4 Host Program support
@@ -973,74 +1069,6 @@ When kbuild executes the following steps are followed (roughly):
architecture specific files.
---- 6.9 $(CC) support functions
-
- The kernel may be build with several different versions of
- $(CC), each supporting a unique set of features and options.
- kbuild provide basic support to check for valid options for $(CC).
- $(CC) is useally the gcc compiler, but other alternatives are
- available.
-
- cc-option
- cc-option is used to check if $(CC) support a given option, and not
- supported to use an optional second option.
-
- Example:
- #arch/i386/Makefile
- cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
-
- In the above example cflags-y will be assigned the option
- -march=pentium-mmx if supported by $(CC), otherwise -march-i586.
- The second argument to cc-option is optional, and if omitted
- cflags-y will be assigned no value if first option is not supported.
-
- cc-option-yn
- cc-option-yn is used to check if gcc supports a given option
- and return 'y' if supported, otherwise 'n'.
-
- Example:
- #arch/ppc/Makefile
- biarch := $(call cc-option-yn, -m32)
- aflags-$(biarch) += -a32
- cflags-$(biarch) += -m32
-
- In the above example $(biarch) is set to y if $(CC) supports the -m32
- option. When $(biarch) equals to y the expanded variables $(aflags-y)
- and $(cflags-y) will be assigned the values -a32 and -m32.
-
- cc-option-align
- gcc version >= 3.0 shifted type of options used to speify
- alignment of functions, loops etc. $(cc-option-align) whrn used
- as prefix to the align options will select the right prefix:
- gcc < 3.00
- cc-option-align = -malign
- gcc >= 3.00
- cc-option-align = -falign
-
- Example:
- CFLAGS += $(cc-option-align)-functions=4
-
- In the above example the option -falign-functions=4 is used for
- gcc >= 3.00. For gcc < 3.00 -malign-functions=4 is used.
-
- cc-version
- cc-version return a numerical version of the $(CC) compiler version.
- The format is <major><minor> where both are two digits. So for example
- gcc 3.41 would return 0341.
- cc-version is useful when a specific $(CC) version is faulty in one
- area, for example the -mregparm=3 were broken in some gcc version
- even though the option was accepted by gcc.
-
- Example:
- #arch/i386/Makefile
- cflags-y += $(shell \
- if [ $(call cc-version) -ge 0300 ] ; then \
- echo "-mregparm=3"; fi ;)
-
- In the above example -mregparm=3 is only used for gcc version greater
- than or equal to gcc 3.0.
-
-
=== 7 Kbuild Variables
The top Makefile exports the following variables:
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 7e77f93634ea..fcccf2432f98 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -13,6 +13,7 @@ In this document you will find information about:
--- 2.2 Available targets
--- 2.3 Available options
--- 2.4 Preparing the kernel tree for module build
+ --- 2.5 Building separate files for a module
=== 3. Example commands
=== 4. Creating a kbuild file for an external module
=== 5. Include files
@@ -22,7 +23,10 @@ In this document you will find information about:
=== 6. Module installation
--- 6.1 INSTALL_MOD_PATH
--- 6.2 INSTALL_MOD_DIR
- === 7. Module versioning
+ === 7. Module versioning & Module.symvers
+ --- 7.1 Symbols fron the kernel (vmlinux + modules)
+ --- 7.2 Symbols and external modules
+ --- 7.3 Symbols from another external module
=== 8. Tips & Tricks
--- 8.1 Testing for CONFIG_FOO_BAR
@@ -88,7 +92,8 @@ when building an external module.
make -C $KDIR M=$PWD modules_install
Install the external module(s).
Installation default is in /lib/modules/<kernel-version>/extra,
- but may be prefixed with INSTALL_MOD_PATH - see separate chapter.
+ but may be prefixed with INSTALL_MOD_PATH - see separate
+ chapter.
make -C $KDIR M=$PWD clean
Remove all generated files for the module - the kernel
@@ -131,6 +136,16 @@ when building an external module.
Therefore a full kernel build needs to be executed to make
module versioning work.
+--- 2.5 Building separate files for a module
+ It is possible to build single files which is part of a module.
+ This works equal for the kernel, a module and even for external
+ modules.
+ Examples (module foo.ko, consist of bar.o, baz.o):
+ make -C $KDIR M=`pwd` bar.lst
+ make -C $KDIR M=`pwd` bar.o
+ make -C $KDIR M=`pwd` foo.ko
+ make -C $KDIR M=`pwd` /
+
=== 3. Example commands
@@ -422,7 +437,7 @@ External modules are installed in the directory:
=> Install dir: /lib/modules/$(KERNELRELEASE)/gandalf
-=== 7. Module versioning
+=== 7. Module versioning & Module.symvers
Module versioning is enabled by the CONFIG_MODVERSIONS tag.
@@ -432,11 +447,80 @@ when a module is loaded/used then the CRC values contained in the kernel are
compared with similar values in the module. If they are not equal then the
kernel refuses to load the module.
-During a kernel build a file named Module.symvers will be generated. This
-file includes the symbol version of all symbols within the kernel. If the
-Module.symvers file is saved from the last full kernel compile one does not
-have to do a full kernel compile to build a module version's compatible module.
+Module.symvers contains a list of all exported symbols from a kernel build.
+
+--- 7.1 Symbols fron the kernel (vmlinux + modules)
+
+ During a kernel build a file named Module.symvers will be generated.
+ Module.symvers contains all exported symbols from the kernel and
+ compiled modules. For each symbols the corresponding CRC value
+ is stored too.
+
+ The syntax of the Module.symvers file is:
+ <CRC> <Symbol> <module>
+ Sample:
+ 0x2d036834 scsi_remove_host drivers/scsi/scsi_mod
+ For a kernel build without CONFIG_MODVERSIONING enabled the crc
+ would read: 0x00000000
+
+ Module.symvers serve two purposes.
+ 1) It list all exported symbols both from vmlinux and all modules
+ 2) It list CRC if CONFIG_MODVERSION is enabled
+
+--- 7.2 Symbols and external modules
+
+ When building an external module the build system needs access to
+ the symbols from the kernel to check if all external symbols are
+ defined. This is done in the MODPOST step and to obtain all
+ symbols modpost reads Module.symvers from the kernel.
+ If a Module.symvers file is present in the directory where
+ the external module is being build this file will be read too.
+ During the MODPOST step a new Module.symvers file will be written
+ containing all exported symbols that was not defined in the kernel.
+
+--- 7.3 Symbols from another external module
+
+ Sometimes one external module uses exported symbols from another
+ external module. Kbuild needs to have full knowledge on all symbols
+ to avoid spitting out warnings about undefined symbols.
+ Two solutions exist to let kbuild know all symbols of more than
+ one external module.
+ The method with a top-level kbuild file is recommended but may be
+ impractical in certain situations.
+
+ Use a top-level Kbuild file
+ If you have two modules: 'foo', 'bar' and 'foo' needs symbols
+ from 'bar' then one can use a common top-level kbuild file so
+ both modules are compiled in same build.
+
+ Consider following directory layout:
+ ./foo/ <= contains the foo module
+ ./bar/ <= contains the bar module
+ The top-level Kbuild file would then look like:
+
+ #./Kbuild: (this file may also be named Makefile)
+ obj-y := foo/ bar/
+
+ Executing:
+ make -C $KDIR M=`pwd`
+
+ will then do the expected and compile both modules with full
+ knowledge on symbols from both modules.
+
+ Use an extra Module.symvers file
+ When an external module is build a Module.symvers file is
+ generated containing all exported symbols which are not
+ defined in the kernel.
+ To get access to symbols from module 'bar' one can copy the
+ Module.symvers file from the compilation of the 'bar' module
+ to the directory where the 'foo' module is build.
+ During the module build kbuild will read the Module.symvers
+ file in the directory of the external module and when the
+ build is finished a new Module.symvers file is created
+ containing the sum of all symbols defined and not part of the
+ kernel.
+
=== 8. Tips & Tricks
--- 8.1 Testing for CONFIG_FOO_BAR
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fc99075e0af4..f8cb55c30b0f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ restrictions referred to are that the relevant option is valid if:
MCA MCA bus support is enabled.
MDA MDA console support is enabled.
MOUSE Appropriate mouse support is enabled.
+ MSI Message Signaled Interrupts (PCI).
MTD MTD support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
@@ -366,12 +367,17 @@ running once the system is up.
tty<n> Use the virtual console device <n>.
ttyS<n>[,options]
+ ttyUSB0[,options]
Use the specified serial port. The options are of
- the form "bbbbpn", where "bbbb" is the baud rate,
- "p" is parity ("n", "o", or "e"), and "n" is bits.
- Default is "9600n8".
+ the form "bbbbpnf", where "bbbb" is the baud rate,
+ "p" is parity ("n", "o", or "e"), "n" is number of
+ bits, and "f" is flow control ("r" for RTS or
+ omit it). Default is "9600n8".
- See also Documentation/serial-console.txt.
+ See Documentation/serial-console.txt for more
+ information. See
+ Documentation/networking/netconsole.txt for an
+ alternative.
uart,io,<addr>[,options]
uart,mmio,<addr>[,options]
@@ -1008,7 +1014,9 @@ running once the system is up.
noexec=on: enable non-executable mappings (default)
noexec=off: disable nn-executable mappings
- nofxsr [BUGS=IA-32]
+ nofxsr [BUGS=IA-32] Disables x86 floating point extended
+ register save and restore. The kernel will only save
+ legacy floating-point registers on task switch.
nohlt [BUGS=ARM]
@@ -1053,6 +1061,8 @@ running once the system is up.
nosbagart [IA-64]
+ nosep [BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support.
+
nosmp [SMP] Tells an SMP kernel to act as a UP kernel.
nosync [HW,M68K] Disables sync negotiation for all devices.
@@ -1122,6 +1132,11 @@ running once the system is up.
pas16= [HW,SCSI]
See header of drivers/scsi/pas16.c.
+ pause_on_oops=
+ Halt all CPUs after the first oops has been printed for
+ the specified number of seconds. This is to be used if
+ your oopses keep scrolling off the screen.
+
pcbit= [HW,ISDN]
pcd. [PARIDE]
@@ -1143,6 +1158,9 @@ running once the system is up.
Mechanism 2.
nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI
Configuration
+ nomsi [MSI] If the PCI_MSI kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable the use of MSI interrupts system-wide.
nosort [IA-32] Don't sort PCI devices according to
order given by the PCI BIOS. This sorting is
done to get a device order compatible with
diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt
new file mode 100644
index 000000000000..8c35c0426110
--- /dev/null
+++ b/Documentation/leds-class.txt
@@ -0,0 +1,71 @@
+LED handling under Linux
+========================
+
+If you're reading this and thinking about keyboard leds, these are
+handled by the input subsystem and the led class is *not* needed.
+
+In its simplest form, the LED class just allows control of LEDs from
+userspace. LEDs appear in /sys/class/leds/. The brightness file will
+set the brightness of the LED (taking a value 0-255). Most LEDs don't
+have hardware brightness support so will just be turned on for non-zero
+brightness settings.
+
+The class also introduces the optional concept of an LED trigger. A trigger
+is a kernel based source of led events. Triggers can either be simple or
+complex. A simple trigger isn't configurable and is designed to slot into
+existing subsystems with minimal additional code. Examples are the ide-disk,
+nand-disk and sharpsl-charge triggers. With led triggers disabled, the code
+optimises away.
+
+Complex triggers whilst available to all LEDs have LED specific
+parameters and work on a per LED basis. The timer trigger is an example.
+
+You can change triggers in a similar manner to the way an IO scheduler
+is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
+parameters can appear in /sys/class/leds/<device> once a given trigger is
+selected.
+
+
+Design Philosophy
+=================
+
+The underlying design philosophy is simplicity. LEDs are simple devices
+and the aim is to keep a small amount of code giving as much functionality
+as possible. Please keep this in mind when suggesting enhancements.
+
+
+LED Device Naming
+=================
+
+Is currently of the form:
+
+"devicename:colour"
+
+There have been calls for LED properties such as colour to be exported as
+individual led class attributes. As a solution which doesn't incur as much
+overhead, I suggest these become part of the device name. The naming scheme
+above leaves scope for further attributes should they be needed.
+
+
+Known Issues
+============
+
+The LED Trigger core cannot be a module as the simple trigger functions
+would cause nightmare dependency issues. I see this as a minor issue
+compared to the benefits the simple trigger functionality brings. The
+rest of the LED subsystem can be modular.
+
+Some leds can be programmed to flash in hardware. As this isn't a generic
+LED device property, this should be exported as a device specific sysfs
+attribute rather than part of the class if this functionality is required.
+
+
+Future Development
+==================
+
+At the moment, a trigger can't be created specifically for a single LED.
+There are a number of cases where a trigger might only be mappable to a
+particular LED (ACPI?). The addition of triggers provided by the LED driver
+should cover this option and be possible to add without breaking the
+current interface.
+
diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha
index bf802ffc98ad..ef484a719bb9 100644
--- a/Documentation/m68k/README.buddha
+++ b/Documentation/m68k/README.buddha
@@ -29,7 +29,7 @@ address is written to $4a, then the whole Byte is written to
$48, while it doesn't matter how often you're writing to $4a
as long as $48 is not touched. After $48 has been written,
the whole card disappears from $e8 and is mapped to the new
-address just written. Make shure $4a is written before $48,
+address just written. Make sure $4a is written before $48,
otherwise your chance is only 1:16 to find the board :-).
The local memory-map is even active when mapped to $e8:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
new file mode 100644
index 000000000000..f8550310a6d5
--- /dev/null
+++ b/Documentation/memory-barriers.txt
@@ -0,0 +1,1913 @@
+ ============================
+ LINUX KERNEL MEMORY BARRIERS
+ ============================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Abstract memory access model.
+
+ - Device operations.
+ - Guarantees.
+
+ (*) What are memory barriers?
+
+ - Varieties of memory barrier.
+ - What may not be assumed about memory barriers?
+ - Data dependency barriers.
+ - Control dependencies.
+ - SMP barrier pairing.
+ - Examples of memory barrier sequences.
+
+ (*) Explicit kernel barriers.
+
+ - Compiler barrier.
+ - The CPU memory barriers.
+ - MMIO write barrier.
+
+ (*) Implicit kernel memory barriers.
+
+ - Locking functions.
+ - Interrupt disabling functions.
+ - Miscellaneous functions.
+
+ (*) Inter-CPU locking barrier effects.
+
+ - Locks vs memory accesses.
+ - Locks vs I/O accesses.
+
+ (*) Where are memory barriers needed?
+
+ - Interprocessor interaction.
+ - Atomic operations.
+ - Accessing devices.
+ - Interrupts.
+
+ (*) Kernel I/O barrier effects.
+
+ (*) Assumed minimum execution ordering model.
+
+ (*) The effects of the cpu cache.
+
+ - Cache coherency.
+ - Cache coherency vs DMA.
+ - Cache coherency vs MMIO.
+
+ (*) The things CPUs get up to.
+
+ - And then there's the Alpha.
+
+ (*) References.
+
+
+============================
+ABSTRACT MEMORY ACCESS MODEL
+============================
+
+Consider the following abstract model of the system:
+
+ : :
+ : :
+ : :
+ +-------+ : +--------+ : +-------+
+ | | : | | : | |
+ | | : | | : | |
+ | CPU 1 |<----->| Memory |<----->| CPU 2 |
+ | | : | | : | |
+ | | : | | : | |
+ +-------+ : +--------+ : +-------+
+ ^ : ^ : ^
+ | : | : |
+ | : | : |
+ | : v : |
+ | : +--------+ : |
+ | : | | : |
+ | : | | : |
+ +---------->| Device |<----------+
+ : | | :
+ : | | :
+ : +--------+ :
+ : :
+
+Each CPU executes a program that generates memory access operations. In the
+abstract CPU, memory operation ordering is very relaxed, and a CPU may actually
+perform the memory operations in any order it likes, provided program causality
+appears to be maintained. Similarly, the compiler may also arrange the
+instructions it emits in any order it likes, provided it doesn't affect the
+apparent operation of the program.
+
+So in the above diagram, the effects of the memory operations performed by a
+CPU are perceived by the rest of the system as the operations cross the
+interface between the CPU and rest of the system (the dotted lines).
+
+
+For example, consider the following sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1; B == 2 }
+ A = 3; x = A;
+ B = 4; y = B;
+
+The set of accesses as seen by the memory system in the middle can be arranged
+in 24 different combinations:
+
+ STORE A=3, STORE B=4, x=LOAD A->3, y=LOAD B->4
+ STORE A=3, STORE B=4, y=LOAD B->4, x=LOAD A->3
+ STORE A=3, x=LOAD A->3, STORE B=4, y=LOAD B->4
+ STORE A=3, x=LOAD A->3, y=LOAD B->2, STORE B=4
+ STORE A=3, y=LOAD B->2, STORE B=4, x=LOAD A->3
+ STORE A=3, y=LOAD B->2, x=LOAD A->3, STORE B=4
+ STORE B=4, STORE A=3, x=LOAD A->3, y=LOAD B->4
+ STORE B=4, ...
+ ...
+
+and can thus result in four different combinations of values:
+
+ x == 1, y == 2
+ x == 1, y == 4
+ x == 3, y == 2
+ x == 3, y == 4
+
+
+Furthermore, the stores committed by a CPU to the memory system may not be
+perceived by the loads made by another CPU in the same order as the stores were
+committed.
+
+
+As a further example, consider this sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4; Q = P;
+ P = &B D = *Q;
+
+There is an obvious data dependency here, as the value loaded into D depends on
+the address retrieved from P by CPU 2. At the end of the sequence, any of the
+following results are possible:
+
+ (Q == &A) and (D == 1)
+ (Q == &B) and (D == 2)
+ (Q == &B) and (D == 4)
+
+Note that CPU 2 will never try and load C into D because the CPU will load P
+into Q before issuing the load of *Q.
+
+
+DEVICE OPERATIONS
+-----------------
+
+Some devices present their control interfaces as collections of memory
+locations, but the order in which the control registers are accessed is very
+important. For instance, imagine an ethernet card with a set of internal
+registers that are accessed through an address port register (A) and a data
+port register (D). To read internal register 5, the following code might then
+be used:
+
+ *A = 5;
+ x = *D;
+
+but this might show up as either of the following two sequences:
+
+ STORE *A = 5, x = LOAD *D
+ x = LOAD *D, STORE *A = 5
+
+the second of which will almost certainly result in a malfunction, since it set
+the address _after_ attempting to read the register.
+
+
+GUARANTEES
+----------
+
+There are some minimal guarantees that may be expected of a CPU:
+
+ (*) On any given CPU, dependent memory accesses will be issued in order, with
+ respect to itself. This means that for:
+
+ Q = P; D = *Q;
+
+ the CPU will issue the following memory operations:
+
+ Q = LOAD P, D = LOAD *Q
+
+ and always in that order.
+
+ (*) Overlapping loads and stores within a particular CPU will appear to be
+ ordered within that CPU. This means that for:
+
+ a = *X; *X = b;
+
+ the CPU will only issue the following sequence of memory operations:
+
+ a = LOAD *X, STORE *X = b
+
+ And for:
+
+ *X = c; d = *X;
+
+ the CPU will only issue:
+
+ STORE *X = c, d = LOAD *X
+
+ (Loads and stores overlap if they are targetted at overlapping pieces of
+ memory).
+
+And there are a number of things that _must_ or _must_not_ be assumed:
+
+ (*) It _must_not_ be assumed that independent loads and stores will be issued
+ in the order given. This means that for:
+
+ X = *A; Y = *B; *D = Z;
+
+ we may get any of the following sequences:
+
+ X = LOAD *A, Y = LOAD *B, STORE *D = Z
+ X = LOAD *A, STORE *D = Z, Y = LOAD *B
+ Y = LOAD *B, X = LOAD *A, STORE *D = Z
+ Y = LOAD *B, STORE *D = Z, X = LOAD *A
+ STORE *D = Z, X = LOAD *A, Y = LOAD *B
+ STORE *D = Z, Y = LOAD *B, X = LOAD *A
+
+ (*) It _must_ be assumed that overlapping memory accesses may be merged or
+ discarded. This means that for:
+
+ X = *A; Y = *(A + 4);
+
+ we may get any one of the following sequences:
+
+ X = LOAD *A; Y = LOAD *(A + 4);
+ Y = LOAD *(A + 4); X = LOAD *A;
+ {X, Y} = LOAD {*A, *(A + 4) };
+
+ And for:
+
+ *A = X; Y = *A;
+
+ we may get either of:
+
+ STORE *A = X; Y = LOAD *A;
+ STORE *A = Y;
+
+
+=========================
+WHAT ARE MEMORY BARRIERS?
+=========================
+
+As can be seen above, independent memory operations are effectively performed
+in random order, but this can be a problem for CPU-CPU interaction and for I/O.
+What is required is some way of intervening to instruct the compiler and the
+CPU to restrict the order.
+
+Memory barriers are such interventions. They impose a perceived partial
+ordering between the memory operations specified on either side of the barrier.
+They request that the sequence of memory events generated appears to other
+parts of the system as if the barrier is effective on that CPU.
+
+
+VARIETIES OF MEMORY BARRIER
+---------------------------
+
+Memory barriers come in four basic varieties:
+
+ (1) Write (or store) memory barriers.
+
+ A write memory barrier gives a guarantee that all the STORE operations
+ specified before the barrier will appear to happen before all the STORE
+ operations specified after the barrier with respect to the other
+ components of the system.
+
+ A write barrier is a partial ordering on stores only; it is not required
+ to have any effect on loads.
+
+ A CPU can be viewed as as commiting a sequence of store operations to the
+ memory system as time progresses. All stores before a write barrier will
+ occur in the sequence _before_ all the stores after the write barrier.
+
+ [!] Note that write barriers should normally be paired with read or data
+ dependency barriers; see the "SMP barrier pairing" subsection.
+
+
+ (2) Data dependency barriers.
+
+ A data dependency barrier is a weaker form of read barrier. In the case
+ where two loads are performed such that the second depends on the result
+ of the first (eg: the first load retrieves the address to which the second
+ load will be directed), a data dependency barrier would be required to
+ make sure that the target of the second load is updated before the address
+ obtained by the first load is accessed.
+
+ A data dependency barrier is a partial ordering on interdependent loads
+ only; it is not required to have any effect on stores, independent loads
+ or overlapping loads.
+
+ As mentioned in (1), the other CPUs in the system can be viewed as
+ committing sequences of stores to the memory system that the CPU being
+ considered can then perceive. A data dependency barrier issued by the CPU
+ under consideration guarantees that for any load preceding it, if that
+ load touches one of a sequence of stores from another CPU, then by the
+ time the barrier completes, the effects of all the stores prior to that
+ touched by the load will be perceptible to any loads issued after the data
+ dependency barrier.
+
+ See the "Examples of memory barrier sequences" subsection for diagrams
+ showing the ordering constraints.
+
+ [!] Note that the first load really has to have a _data_ dependency and
+ not a control dependency. If the address for the second load is dependent
+ on the first load, but the dependency is through a conditional rather than
+ actually loading the address itself, then it's a _control_ dependency and
+ a full read barrier or better is required. See the "Control dependencies"
+ subsection for more information.
+
+ [!] Note that data dependency barriers should normally be paired with
+ write barriers; see the "SMP barrier pairing" subsection.
+
+
+ (3) Read (or load) memory barriers.
+
+ A read barrier is a data dependency barrier plus a guarantee that all the
+ LOAD operations specified before the barrier will appear to happen before
+ all the LOAD operations specified after the barrier with respect to the
+ other components of the system.
+
+ A read barrier is a partial ordering on loads only; it is not required to
+ have any effect on stores.
+
+ Read memory barriers imply data dependency barriers, and so can substitute
+ for them.
+
+ [!] Note that read barriers should normally be paired with write barriers;
+ see the "SMP barrier pairing" subsection.
+
+
+ (4) General memory barriers.
+
+ A general memory barrier is a combination of both a read memory barrier
+ and a write memory barrier. It is a partial ordering over both loads and
+ stores.
+
+ General memory barriers imply both read and write memory barriers, and so
+ can substitute for either.
+
+
+And a couple of implicit varieties:
+
+ (5) LOCK operations.
+
+ This acts as a one-way permeable barrier. It guarantees that all memory
+ operations after the LOCK operation will appear to happen after the LOCK
+ operation with respect to the other components of the system.
+
+ Memory operations that occur before a LOCK operation may appear to happen
+ after it completes.
+
+ A LOCK operation should almost always be paired with an UNLOCK operation.
+
+
+ (6) UNLOCK operations.
+
+ This also acts as a one-way permeable barrier. It guarantees that all
+ memory operations before the UNLOCK operation will appear to happen before
+ the UNLOCK operation with respect to the other components of the system.
+
+ Memory operations that occur after an UNLOCK operation may appear to
+ happen before it completes.
+
+ LOCK and UNLOCK operations are guaranteed to appear with respect to each
+ other strictly in the order specified.
+
+ The use of LOCK and UNLOCK operations generally precludes the need for
+ other sorts of memory barrier (but note the exceptions mentioned in the
+ subsection "MMIO write barrier").
+
+
+Memory barriers are only required where there's a possibility of interaction
+between two CPUs or between a CPU and a device. If it can be guaranteed that
+there won't be any such interaction in any particular piece of code, then
+memory barriers are unnecessary in that piece of code.
+
+
+Note that these are the _minimum_ guarantees. Different architectures may give
+more substantial guarantees, but they may _not_ be relied upon outside of arch
+specific code.
+
+
+WHAT MAY NOT BE ASSUMED ABOUT MEMORY BARRIERS?
+----------------------------------------------
+
+There are certain things that the Linux kernel memory barriers do not guarantee:
+
+ (*) There is no guarantee that any of the memory accesses specified before a
+ memory barrier will be _complete_ by the completion of a memory barrier
+ instruction; the barrier can be considered to draw a line in that CPU's
+ access queue that accesses of the appropriate type may not cross.
+
+ (*) There is no guarantee that issuing a memory barrier on one CPU will have
+ any direct effect on another CPU or any other hardware in the system. The
+ indirect effect will be the order in which the second CPU sees the effects
+ of the first CPU's accesses occur, but see the next point:
+
+ (*) There is no guarantee that the a CPU will see the correct order of effects
+ from a second CPU's accesses, even _if_ the second CPU uses a memory
+ barrier, unless the first CPU _also_ uses a matching memory barrier (see
+ the subsection on "SMP Barrier Pairing").
+
+ (*) There is no guarantee that some intervening piece of off-the-CPU
+ hardware[*] will not reorder the memory accesses. CPU cache coherency
+ mechanisms should propagate the indirect effects of a memory barrier
+ between CPUs, but might not do so in order.
+
+ [*] For information on bus mastering DMA and coherency please read:
+
+ Documentation/pci.txt
+ Documentation/DMA-mapping.txt
+ Documentation/DMA-API.txt
+
+
+DATA DEPENDENCY BARRIERS
+------------------------
+
+The usage requirements of data dependency barriers are a little subtle, and
+it's not always obvious that they're needed. To illustrate, consider the
+following sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4;
+ <write barrier>
+ P = &B
+ Q = P;
+ D = *Q;
+
+There's a clear data dependency here, and it would seem that by the end of the
+sequence, Q must be either &A or &B, and that:
+
+ (Q == &A) implies (D == 1)
+ (Q == &B) implies (D == 4)
+
+But! CPU 2's perception of P may be updated _before_ its perception of B, thus
+leading to the following situation:
+
+ (Q == &B) and (D == 2) ????
+
+Whilst this may seem like a failure of coherency or causality maintenance, it
+isn't, and this behaviour can be observed on certain real CPUs (such as the DEC
+Alpha).
+
+To deal with this, a data dependency barrier must be inserted between the
+address load and the data load:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4;
+ <write barrier>
+ P = &B
+ Q = P;
+ <data dependency barrier>
+ D = *Q;
+
+This enforces the occurrence of one of the two implications, and prevents the
+third possibility from arising.
+
+[!] Note that this extremely counterintuitive situation arises most easily on
+machines with split caches, so that, for example, one cache bank processes
+even-numbered cache lines and the other bank processes odd-numbered cache
+lines. The pointer P might be stored in an odd-numbered cache line, and the
+variable B might be stored in an even-numbered cache line. Then, if the
+even-numbered bank of the reading CPU's cache is extremely busy while the
+odd-numbered bank is idle, one can see the new value of the pointer P (&B),
+but the old value of the variable B (1).
+
+
+Another example of where data dependency barriers might by required is where a
+number is read from memory and then used to calculate the index for an array
+access:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
+ M[1] = 4;
+ <write barrier>
+ P = 1
+ Q = P;
+ <data dependency barrier>
+ D = M[Q];
+
+
+The data dependency barrier is very important to the RCU system, for example.
+See rcu_dereference() in include/linux/rcupdate.h. This permits the current
+target of an RCU'd pointer to be replaced with a new modified target, without
+the replacement target appearing to be incompletely initialised.
+
+See also the subsection on "Cache Coherency" for a more thorough example.
+
+
+CONTROL DEPENDENCIES
+--------------------
+
+A control dependency requires a full read memory barrier, not simply a data
+dependency barrier to make it work correctly. Consider the following bit of
+code:
+
+ q = &a;
+ if (p)
+ q = &b;
+ <data dependency barrier>
+ x = *q;
+
+This will not have the desired effect because there is no actual data
+dependency, but rather a control dependency that the CPU may short-circuit by
+attempting to predict the outcome in advance. In such a case what's actually
+required is:
+
+ q = &a;
+ if (p)
+ q = &b;
+ <read barrier>
+ x = *q;
+
+
+SMP BARRIER PAIRING
+-------------------
+
+When dealing with CPU-CPU interactions, certain types of memory barrier should
+always be paired. A lack of appropriate pairing is almost certainly an error.
+
+A write barrier should always be paired with a data dependency barrier or read
+barrier, though a general barrier would also be viable. Similarly a read
+barrier or a data dependency barrier should always be paired with at least an
+write barrier, though, again, a general barrier is viable:
+
+ CPU 1 CPU 2
+ =============== ===============
+ a = 1;
+ <write barrier>
+ b = 2; x = a;
+ <read barrier>
+ y = b;
+
+Or:
+
+ CPU 1 CPU 2
+ =============== ===============================
+ a = 1;
+ <write barrier>
+ b = &a; x = b;
+ <data dependency barrier>
+ y = *x;
+
+Basically, the read barrier always has to be there, even though it can be of
+the "weaker" type.
+
+
+EXAMPLES OF MEMORY BARRIER SEQUENCES
+------------------------------------
+
+Firstly, write barriers act as a partial orderings on store operations.
+Consider the following sequence of events:
+
+ CPU 1
+ =======================
+ STORE A = 1
+ STORE B = 2
+ STORE C = 3
+ <write barrier>
+ STORE D = 4
+ STORE E = 5
+
+This sequence of events is committed to the memory coherence system in an order
+that the rest of the system might perceive as the unordered set of { STORE A,
+STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
+}:
+
+ +-------+ : :
+ | | +------+
+ | |------>| C=3 | } /\
+ | | : +------+ }----- \ -----> Events perceptible
+ | | : | A=1 | } \/ to rest of system
+ | | : +------+ }
+ | CPU 1 | : | B=2 | }
+ | | +------+ }
+ | | wwwwwwwwwwwwwwww } <--- At this point the write barrier
+ | | +------+ } requires all stores prior to the
+ | | : | E=5 | } barrier to be committed before
+ | | : +------+ } further stores may be take place.
+ | |------>| D=4 | }
+ | | +------+
+ +-------+ : :
+ |
+ | Sequence in which stores committed to memory system
+ | by CPU 1
+ V
+
+
+Secondly, data dependency barriers act as a partial orderings on data-dependent
+loads. Consider the following sequence of events:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ STORE A = 1
+ STORE B = 2
+ <write barrier>
+ STORE C = &B LOAD X
+ STORE D = 4 LOAD C (gets &B)
+ LOAD *C (reads B)
+
+Without intervention, CPU 2 may perceive the events on CPU 1 in some
+effectively random order, despite the write barrier issued by CPU 1:
+
+ +-------+ : : : :
+ | | +------+ +-------+ | Sequence of update
+ | |------>| B=2 |----- --->| Y->8 | | of perception on
+ | | : +------+ \ +-------+ | CPU 2
+ | CPU 1 | : | A=1 | \ --->| C->&Y | V
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ Apparently incorrect ---> | | B->7 |------>| |
+ perception of B (!) | +-------+ | |
+ | : : | |
+ | +-------+ | |
+ The load of X holds ---> \ | X->9 |------>| |
+ up the maintenance \ +-------+ | |
+ of coherence of B ----->| B->2 | +-------+
+ +-------+
+ : :
+
+
+In the above example, CPU 2 perceives that B is 7, despite the load of *C
+(which would be B) coming after the the LOAD of C.
+
+If, however, a data dependency barrier were to be placed between the load of C
+and the load of *C (ie: B) on CPU 2, then the following will occur:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| B=2 |----- --->| Y->8 |
+ | | : +------+ \ +-------+
+ | CPU 1 | : | A=1 | \ --->| C->&Y |
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ \ | X->9 |------>| |
+ \ +-------+ | |
+ ----->| B->2 | | |
+ +-------+ | |
+ Makes sure all effects ---> ddddddddddddddddd | |
+ prior to the store of C +-------+ | |
+ are perceptible to | B->2 |------>| |
+ successive loads +-------+ | |
+ : : +-------+
+
+
+And thirdly, a read barrier acts as a partial order on loads. Consider the
+following sequence of events:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ STORE A=1
+ STORE B=2
+ STORE C=3
+ <write barrier>
+ STORE D=4
+ STORE E=5
+ LOAD A
+ LOAD B
+ LOAD C
+ LOAD D
+ LOAD E
+
+Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
+some effectively random order, despite the write barrier issued by CPU 1:
+
+ +-------+ : :
+ | | +------+
+ | |------>| C=3 | }
+ | | : +------+ }
+ | | : | A=1 | }
+ | | : +------+ }
+ | CPU 1 | : | B=2 | }---
+ | | +------+ } \
+ | | wwwwwwwwwwwww} \
+ | | +------+ } \ : : +-------+
+ | | : | E=5 | } \ +-------+ | |
+ | | : +------+ } \ { | C->3 |------>| |
+ | |------>| D=4 | } \ { +-------+ : | |
+ | | +------+ \ { | E->5 | : | |
+ +-------+ : : \ { +-------+ : | |
+ Transfer -->{ | A->1 | : | CPU 2 |
+ from CPU 1 { +-------+ : | |
+ to CPU 2 { | D->4 | : | |
+ { +-------+ : | |
+ { | B->2 |------>| |
+ +-------+ | |
+ : : +-------+
+
+
+If, however, a read barrier were to be placed between the load of C and the
+load of D on CPU 2, then the partial ordering imposed by CPU 1 will be
+perceived correctly by CPU 2.
+
+ +-------+ : :
+ | | +------+
+ | |------>| C=3 | }
+ | | : +------+ }
+ | | : | A=1 | }---
+ | | : +------+ } \
+ | CPU 1 | : | B=2 | } \
+ | | +------+ \
+ | | wwwwwwwwwwwwwwww \
+ | | +------+ \ : : +-------+
+ | | : | E=5 | } \ +-------+ | |
+ | | : +------+ }--- \ { | C->3 |------>| |
+ | |------>| D=4 | } \ \ { +-------+ : | |
+ | | +------+ \ -->{ | B->2 | : | |
+ +-------+ : : \ { +-------+ : | |
+ \ { | A->1 | : | CPU 2 |
+ \ +-------+ | |
+ At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
+ barrier causes all effects \ +-------+ | |
+ prior to the storage of C \ { | E->5 | : | |
+ to be perceptible to CPU 2 -->{ +-------+ : | |
+ { | D->4 |------>| |
+ +-------+ | |
+ : : +-------+
+
+
+========================
+EXPLICIT KERNEL BARRIERS
+========================
+
+The Linux kernel has a variety of different barriers that act at different
+levels:
+
+ (*) Compiler barrier.
+
+ (*) CPU memory barriers.
+
+ (*) MMIO write barrier.
+
+
+COMPILER BARRIER
+----------------
+
+The Linux kernel has an explicit compiler barrier function that prevents the
+compiler from moving the memory accesses either side of it to the other side:
+
+ barrier();
+
+This a general barrier - lesser varieties of compiler barrier do not exist.
+
+The compiler barrier has no direct effect on the CPU, which may then reorder
+things however it wishes.
+
+
+CPU MEMORY BARRIERS
+-------------------
+
+The Linux kernel has eight basic CPU memory barriers:
+
+ TYPE MANDATORY SMP CONDITIONAL
+ =============== ======================= ===========================
+ GENERAL mb() smp_mb()
+ WRITE wmb() smp_wmb()
+ READ rmb() smp_rmb()
+ DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends()
+
+
+All CPU memory barriers unconditionally imply compiler barriers.
+
+SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
+systems because it is assumed that a CPU will be appear to be self-consistent,
+and will order overlapping accesses correctly with respect to itself.
+
+[!] Note that SMP memory barriers _must_ be used to control the ordering of
+references to shared memory on SMP systems, though the use of locking instead
+is sufficient.
+
+Mandatory barriers should not be used to control SMP effects, since mandatory
+barriers unnecessarily impose overhead on UP systems. They may, however, be
+used to control MMIO effects on accesses through relaxed memory I/O windows.
+These are required even on non-SMP systems as they affect the order in which
+memory operations appear to a device by prohibiting both the compiler and the
+CPU from reordering them.
+
+
+There are some more advanced barrier functions:
+
+ (*) set_mb(var, value)
+ (*) set_wmb(var, value)
+
+ These assign the value to the variable and then insert at least a write
+ barrier after it, depending on the function. They aren't guaranteed to
+ insert anything more than a compiler barrier in a UP compilation.
+
+
+ (*) smp_mb__before_atomic_dec();
+ (*) smp_mb__after_atomic_dec();
+ (*) smp_mb__before_atomic_inc();
+ (*) smp_mb__after_atomic_inc();
+
+ These are for use with atomic add, subtract, increment and decrement
+ functions, especially when used for reference counting. These functions
+ do not imply memory barriers.
+
+ As an example, consider a piece of code that marks an object as being dead
+ and then decrements the object's reference count:
+
+ obj->dead = 1;
+ smp_mb__before_atomic_dec();
+ atomic_dec(&obj->ref_count);
+
+ This makes sure that the death mark on the object is perceived to be set
+ *before* the reference counter is decremented.
+
+ See Documentation/atomic_ops.txt for more information. See the "Atomic
+ operations" subsection for information on where to use these.
+
+
+ (*) smp_mb__before_clear_bit(void);
+ (*) smp_mb__after_clear_bit(void);
+
+ These are for use similar to the atomic inc/dec barriers. These are
+ typically used for bitwise unlocking operations, so care must be taken as
+ there are no implicit memory barriers here either.
+
+ Consider implementing an unlock operation of some nature by clearing a
+ locking bit. The clear_bit() would then need to be barriered like this:
+
+ smp_mb__before_clear_bit();
+ clear_bit( ... );
+
+ This prevents memory operations before the clear leaking to after it. See
+ the subsection on "Locking Functions" with reference to UNLOCK operation
+ implications.
+
+ See Documentation/atomic_ops.txt for more information. See the "Atomic
+ operations" subsection for information on where to use these.
+
+
+MMIO WRITE BARRIER
+------------------
+
+The Linux kernel also has a special barrier for use with memory-mapped I/O
+writes:
+
+ mmiowb();
+
+This is a variation on the mandatory write barrier that causes writes to weakly
+ordered I/O regions to be partially ordered. Its effects may go beyond the
+CPU->Hardware interface and actually affect the hardware at some level.
+
+See the subsection "Locks vs I/O accesses" for more information.
+
+
+===============================
+IMPLICIT KERNEL MEMORY BARRIERS
+===============================
+
+Some of the other functions in the linux kernel imply memory barriers, amongst
+which are locking, scheduling and memory allocation functions.
+
+This specification is a _minimum_ guarantee; any particular architecture may
+provide more substantial guarantees, but these may not be relied upon outside
+of arch specific code.
+
+
+LOCKING FUNCTIONS
+-----------------
+
+The Linux kernel has a number of locking constructs:
+
+ (*) spin locks
+ (*) R/W spin locks
+ (*) mutexes
+ (*) semaphores
+ (*) R/W semaphores
+ (*) RCU
+
+In all cases there are variants on "LOCK" operations and "UNLOCK" operations
+for each construct. These operations all imply certain barriers:
+
+ (1) LOCK operation implication:
+
+ Memory operations issued after the LOCK will be completed after the LOCK
+ operation has completed.
+
+ Memory operations issued before the LOCK may be completed after the LOCK
+ operation has completed.
+
+ (2) UNLOCK operation implication:
+
+ Memory operations issued before the UNLOCK will be completed before the
+ UNLOCK operation has completed.
+
+ Memory operations issued after the UNLOCK may be completed before the
+ UNLOCK operation has completed.
+
+ (3) LOCK vs LOCK implication:
+
+ All LOCK operations issued before another LOCK operation will be completed
+ before that LOCK operation.
+
+ (4) LOCK vs UNLOCK implication:
+
+ All LOCK operations issued before an UNLOCK operation will be completed
+ before the UNLOCK operation.
+
+ All UNLOCK operations issued before a LOCK operation will be completed
+ before the LOCK operation.
+
+ (5) Failed conditional LOCK implication:
+
+ Certain variants of the LOCK operation may fail, either due to being
+ unable to get the lock immediately, or due to receiving an unblocked
+ signal whilst asleep waiting for the lock to become available. Failed
+ locks do not imply any sort of barrier.
+
+Therefore, from (1), (2) and (4) an UNLOCK followed by an unconditional LOCK is
+equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
+
+[!] Note: one of the consequence of LOCKs and UNLOCKs being only one-way
+ barriers is that the effects instructions outside of a critical section may
+ seep into the inside of the critical section.
+
+Locks and semaphores may not provide any guarantee of ordering on UP compiled
+systems, and so cannot be counted on in such a situation to actually achieve
+anything at all - especially with respect to I/O accesses - unless combined
+with interrupt disabling operations.
+
+See also the section on "Inter-CPU locking barrier effects".
+
+
+As an example, consider the following:
+
+ *A = a;
+ *B = b;
+ LOCK
+ *C = c;
+ *D = d;
+ UNLOCK
+ *E = e;
+ *F = f;
+
+The following sequence of events is acceptable:
+
+ LOCK, {*F,*A}, *E, {*C,*D}, *B, UNLOCK
+
+ [+] Note that {*F,*A} indicates a combined access.
+
+But none of the following are:
+
+ {*F,*A}, *B, LOCK, *C, *D, UNLOCK, *E
+ *A, *B, *C, LOCK, *D, UNLOCK, *E, *F
+ *A, *B, LOCK, *C, UNLOCK, *D, *E, *F
+ *B, LOCK, *C, *D, UNLOCK, {*F,*A}, *E
+
+
+
+INTERRUPT DISABLING FUNCTIONS
+-----------------------------
+
+Functions that disable interrupts (LOCK equivalent) and enable interrupts
+(UNLOCK equivalent) will act as compiler barriers only. So if memory or I/O
+barriers are required in such a situation, they must be provided from some
+other means.
+
+
+MISCELLANEOUS FUNCTIONS
+-----------------------
+
+Other functions that imply barriers:
+
+ (*) schedule() and similar imply full memory barriers.
+
+ (*) Memory allocation and release functions imply full memory barriers.
+
+
+=================================
+INTER-CPU LOCKING BARRIER EFFECTS
+=================================
+
+On SMP systems locking primitives give a more substantial form of barrier: one
+that does affect memory access ordering on other CPUs, within the context of
+conflict on any particular lock.
+
+
+LOCKS VS MEMORY ACCESSES
+------------------------
+
+Consider the following: the system has a pair of spinlocks (N) and (Q), and
+three CPUs; then should the following sequence of events occur:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ *A = a; *E = e;
+ LOCK M LOCK Q
+ *B = b; *F = f;
+ *C = c; *G = g;
+ UNLOCK M UNLOCK Q
+ *D = d; *H = h;
+
+Then there is no guarantee as to what order CPU #3 will see the accesses to *A
+through *H occur in, other than the constraints imposed by the separate locks
+on the separate CPUs. It might, for example, see:
+
+ *E, LOCK M, LOCK Q, *G, *C, *F, *A, *B, UNLOCK Q, *D, *H, UNLOCK M
+
+But it won't see any of:
+
+ *B, *C or *D preceding LOCK M
+ *A, *B or *C following UNLOCK M
+ *F, *G or *H preceding LOCK Q
+ *E, *F or *G following UNLOCK Q
+
+
+However, if the following occurs:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ *A = a;
+ LOCK M [1]
+ *B = b;
+ *C = c;
+ UNLOCK M [1]
+ *D = d; *E = e;
+ LOCK M [2]
+ *F = f;
+ *G = g;
+ UNLOCK M [2]
+ *H = h;
+
+CPU #3 might see:
+
+ *E, LOCK M [1], *C, *B, *A, UNLOCK M [1],
+ LOCK M [2], *H, *F, *G, UNLOCK M [2], *D
+
+But assuming CPU #1 gets the lock first, it won't see any of:
+
+ *B, *C, *D, *F, *G or *H preceding LOCK M [1]
+ *A, *B or *C following UNLOCK M [1]
+ *F, *G or *H preceding LOCK M [2]
+ *A, *B, *C, *E, *F or *G following UNLOCK M [2]
+
+
+LOCKS VS I/O ACCESSES
+---------------------
+
+Under certain circumstances (especially involving NUMA), I/O accesses within
+two spinlocked sections on two different CPUs may be seen as interleaved by the
+PCI bridge, because the PCI bridge does not necessarily participate in the
+cache-coherence protocol, and is therefore incapable of issuing the required
+read memory barriers.
+
+For example:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ spin_unlock(Q);
+
+may be seen by the PCI bridge as follows:
+
+ STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
+
+which would probably cause the hardware to malfunction.
+
+
+What is necessary here is to intervene with an mmiowb() before dropping the
+spinlock, for example:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ mmiowb();
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ mmiowb();
+ spin_unlock(Q);
+
+this will ensure that the two stores issued on CPU #1 appear at the PCI bridge
+before either of the stores issued on CPU #2.
+
+
+Furthermore, following a store by a load to the same device obviates the need
+for an mmiowb(), because the load forces the store to complete before the load
+is performed:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ a = readl(DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ b = readl(DATA);
+ spin_unlock(Q);
+
+
+See Documentation/DocBook/deviceiobook.tmpl for more information.
+
+
+=================================
+WHERE ARE MEMORY BARRIERS NEEDED?
+=================================
+
+Under normal operation, memory operation reordering is generally not going to
+be a problem as a single-threaded linear piece of code will still appear to
+work correctly, even if it's in an SMP kernel. There are, however, three
+circumstances in which reordering definitely _could_ be a problem:
+
+ (*) Interprocessor interaction.
+
+ (*) Atomic operations.
+
+ (*) Accessing devices (I/O).
+
+ (*) Interrupts.
+
+
+INTERPROCESSOR INTERACTION
+--------------------------
+
+When there's a system with more than one processor, more than one CPU in the
+system may be working on the same data set at the same time. This can cause
+synchronisation problems, and the usual way of dealing with them is to use
+locks. Locks, however, are quite expensive, and so it may be preferable to
+operate without the use of a lock if at all possible. In such a case
+operations that affect both CPUs may have to be carefully ordered to prevent
+a malfunction.
+
+Consider, for example, the R/W semaphore slow path. Here a waiting process is
+queued on the semaphore, by virtue of it having a piece of its stack linked to
+the semaphore's list of waiting processes:
+
+ struct rw_semaphore {
+ ...
+ spinlock_t lock;
+ struct list_head waiters;
+ };
+
+ struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ };
+
+To wake up a particular waiter, the up_read() or up_write() functions have to:
+
+ (1) read the next pointer from this waiter's record to know as to where the
+ next waiter record is;
+
+ (4) read the pointer to the waiter's task structure;
+
+ (3) clear the task pointer to tell the waiter it has been given the semaphore;
+
+ (4) call wake_up_process() on the task; and
+
+ (5) release the reference held on the waiter's task struct.
+
+In otherwords, it has to perform this sequence of events:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+and if any of these steps occur out of order, then the whole thing may
+malfunction.
+
+Once it has queued itself and dropped the semaphore lock, the waiter does not
+get the lock again; it instead just waits for its task pointer to be cleared
+before proceeding. Since the record is on the waiter's stack, this means that
+if the task pointer is cleared _before_ the next pointer in the list is read,
+another CPU might start processing the waiter and might clobber the waiter's
+stack before the up*() function has a chance to read the next pointer.
+
+Consider then what might happen to the above sequence of events:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ down_xxx()
+ Queue waiter
+ Sleep
+ up_yyy()
+ LOAD waiter->task;
+ STORE waiter->task;
+ Woken up by other event
+ <preempt>
+ Resume processing
+ down_xxx() returns
+ call foo()
+ foo() clobbers *waiter
+ </preempt>
+ LOAD waiter->list.next;
+ --- OOPS ---
+
+This could be dealt with using the semaphore lock, but then the down_xxx()
+function has to needlessly get the spinlock again after being woken up.
+
+The way to deal with this is to insert a general SMP memory barrier:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ smp_mb();
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+In this case, the barrier makes a guarantee that all memory accesses before the
+barrier will appear to happen before all the memory accesses after the barrier
+with respect to the other CPUs on the system. It does _not_ guarantee that all
+the memory accesses before the barrier will be complete by the time the barrier
+instruction itself is complete.
+
+On a UP system - where this wouldn't be a problem - the smp_mb() is just a
+compiler barrier, thus making sure the compiler emits the instructions in the
+right order without actually intervening in the CPU. Since there there's only
+one CPU, that CPU's dependency ordering logic will take care of everything
+else.
+
+
+ATOMIC OPERATIONS
+-----------------
+
+Though they are technically interprocessor interaction considerations, atomic
+operations are noted specially as they do _not_ generally imply memory
+barriers. The possible offenders include:
+
+ xchg();
+ cmpxchg();
+ test_and_set_bit();
+ test_and_clear_bit();
+ test_and_change_bit();
+ atomic_cmpxchg();
+ atomic_inc_return();
+ atomic_dec_return();
+ atomic_add_return();
+ atomic_sub_return();
+ atomic_inc_and_test();
+ atomic_dec_and_test();
+ atomic_sub_and_test();
+ atomic_add_negative();
+ atomic_add_unless();
+
+These may be used for such things as implementing LOCK operations or controlling
+the lifetime of objects by decreasing their reference counts. In such cases
+they need preceding memory barriers.
+
+The following may also be possible offenders as they may be used as UNLOCK
+operations.
+
+ set_bit();
+ clear_bit();
+ change_bit();
+ atomic_set();
+
+
+The following are a little tricky:
+
+ atomic_add();
+ atomic_sub();
+ atomic_inc();
+ atomic_dec();
+
+If they're used for statistics generation, then they probably don't need memory
+barriers, unless there's a coupling between statistical data.
+
+If they're used for reference counting on an object to control its lifetime,
+they probably don't need memory barriers because either the reference count
+will be adjusted inside a locked section, or the caller will already hold
+sufficient references to make the lock, and thus a memory barrier unnecessary.
+
+If they're used for constructing a lock of some description, then they probably
+do need memory barriers as a lock primitive generally has to do things in a
+specific order.
+
+
+Basically, each usage case has to be carefully considered as to whether memory
+barriers are needed or not. The simplest rule is probably: if the atomic
+operation is protected by a lock, then it does not require a barrier unless
+there's another operation within the critical section with respect to which an
+ordering must be maintained.
+
+See Documentation/atomic_ops.txt for more information.
+
+
+ACCESSING DEVICES
+-----------------
+
+Many devices can be memory mapped, and so appear to the CPU as if they're just
+a set of memory locations. To control such a device, the driver usually has to
+make the right memory accesses in exactly the right order.
+
+However, having a clever CPU or a clever compiler creates a potential problem
+in that the carefully sequenced accesses in the driver code won't reach the
+device in the requisite order if the CPU or the compiler thinks it is more
+efficient to reorder, combine or merge accesses - something that would cause
+the device to malfunction.
+
+Inside of the Linux kernel, I/O should be done through the appropriate accessor
+routines - such as inb() or writel() - which know how to make such accesses
+appropriately sequential. Whilst this, for the most part, renders the explicit
+use of memory barriers unnecessary, there are a couple of situations where they
+might be needed:
+
+ (1) On some systems, I/O stores are not strongly ordered across all CPUs, and
+ so for _all_ general drivers locks should be used and mmiowb() must be
+ issued prior to unlocking the critical section.
+
+ (2) If the accessor functions are used to refer to an I/O memory window with
+ relaxed memory access properties, then _mandatory_ memory barriers are
+ required to enforce ordering.
+
+See Documentation/DocBook/deviceiobook.tmpl for more information.
+
+
+INTERRUPTS
+----------
+
+A driver may be interrupted by its own interrupt service routine, and thus the
+two parts of the driver may interfere with each other's attempts to control or
+access the device.
+
+This may be alleviated - at least in part - by disabling local interrupts (a
+form of locking), such that the critical operations are all contained within
+the interrupt-disabled section in the driver. Whilst the driver's interrupt
+routine is executing, the driver's core may not run on the same CPU, and its
+interrupt is not permitted to happen again until the current interrupt has been
+handled, thus the interrupt handler does not need to lock against that.
+
+However, consider a driver that was talking to an ethernet card that sports an
+address register and a data register. If that driver's core talks to the card
+under interrupt-disablement and then the driver's interrupt handler is invoked:
+
+ LOCAL IRQ DISABLE
+ writew(ADDR, 3);
+ writew(DATA, y);
+ LOCAL IRQ ENABLE
+ <interrupt>
+ writew(ADDR, 4);
+ q = readw(DATA);
+ </interrupt>
+
+The store to the data register might happen after the second store to the
+address register if ordering rules are sufficiently relaxed:
+
+ STORE *ADDR = 3, STORE *ADDR = 4, STORE *DATA = y, q = LOAD *DATA
+
+
+If ordering rules are relaxed, it must be assumed that accesses done inside an
+interrupt disabled section may leak outside of it and may interleave with
+accesses performed in an interrupt - and vice versa - unless implicit or
+explicit barriers are used.
+
+Normally this won't be a problem because the I/O accesses done inside such
+sections will include synchronous load operations on strictly ordered I/O
+registers that form implicit I/O barriers. If this isn't sufficient then an
+mmiowb() may need to be used explicitly.
+
+
+A similar situation may occur between an interrupt routine and two routines
+running on separate CPUs that communicate with each other. If such a case is
+likely, then interrupt-disabling locks should be used to guarantee ordering.
+
+
+==========================
+KERNEL I/O BARRIER EFFECTS
+==========================
+
+When accessing I/O memory, drivers should use the appropriate accessor
+functions:
+
+ (*) inX(), outX():
+
+ These are intended to talk to I/O space rather than memory space, but
+ that's primarily a CPU-specific concept. The i386 and x86_64 processors do
+ indeed have special I/O space access cycles and instructions, but many
+ CPUs don't have such a concept.
+
+ The PCI bus, amongst others, defines an I/O space concept - which on such
+ CPUs as i386 and x86_64 cpus readily maps to the CPU's concept of I/O
+ space. However, it may also mapped as a virtual I/O space in the CPU's
+ memory map, particularly on those CPUs that don't support alternate
+ I/O spaces.
+
+ Accesses to this space may be fully synchronous (as on i386), but
+ intermediary bridges (such as the PCI host bridge) may not fully honour
+ that.
+
+ They are guaranteed to be fully ordered with respect to each other.
+
+ They are not guaranteed to be fully ordered with respect to other types of
+ memory and I/O operation.
+
+ (*) readX(), writeX():
+
+ Whether these are guaranteed to be fully ordered and uncombined with
+ respect to each other on the issuing CPU depends on the characteristics
+ defined for the memory window through which they're accessing. On later
+ i386 architecture machines, for example, this is controlled by way of the
+ MTRR registers.
+
+ Ordinarily, these will be guaranteed to be fully ordered and uncombined,,
+ provided they're not accessing a prefetchable device.
+
+ However, intermediary hardware (such as a PCI bridge) may indulge in
+ deferral if it so wishes; to flush a store, a load from the same location
+ is preferred[*], but a load from the same device or from configuration
+ space should suffice for PCI.
+
+ [*] NOTE! attempting to load from the same location as was written to may
+ cause a malfunction - consider the 16550 Rx/Tx serial registers for
+ example.
+
+ Used with prefetchable I/O memory, an mmiowb() barrier may be required to
+ force stores to be ordered.
+
+ Please refer to the PCI specification for more information on interactions
+ between PCI transactions.
+
+ (*) readX_relaxed()
+
+ These are similar to readX(), but are not guaranteed to be ordered in any
+ way. Be aware that there is no I/O read barrier available.
+
+ (*) ioreadX(), iowriteX()
+
+ These will perform as appropriate for the type of access they're actually
+ doing, be it inX()/outX() or readX()/writeX().
+
+
+========================================
+ASSUMED MINIMUM EXECUTION ORDERING MODEL
+========================================
+
+It has to be assumed that the conceptual CPU is weakly-ordered but that it will
+maintain the appearance of program causality with respect to itself. Some CPUs
+(such as i386 or x86_64) are more constrained than others (such as powerpc or
+frv), and so the most relaxed case (namely DEC Alpha) must be assumed outside
+of arch-specific code.
+
+This means that it must be considered that the CPU will execute its instruction
+stream in any order it feels like - or even in parallel - provided that if an
+instruction in the stream depends on the an earlier instruction, then that
+earlier instruction must be sufficiently complete[*] before the later
+instruction may proceed; in other words: provided that the appearance of
+causality is maintained.
+
+ [*] Some instructions have more than one effect - such as changing the
+ condition codes, changing registers or changing memory - and different
+ instructions may depend on different effects.
+
+A CPU may also discard any instruction sequence that winds up having no
+ultimate effect. For example, if two adjacent instructions both load an
+immediate value into the same register, the first may be discarded.
+
+
+Similarly, it has to be assumed that compiler might reorder the instruction
+stream in any way it sees fit, again provided the appearance of causality is
+maintained.
+
+
+============================
+THE EFFECTS OF THE CPU CACHE
+============================
+
+The way cached memory operations are perceived across the system is affected to
+a certain extent by the caches that lie between CPUs and memory, and by the
+memory coherence system that maintains the consistency of state in the system.
+
+As far as the way a CPU interacts with another part of the system through the
+caches goes, the memory system has to include the CPU's caches, and memory
+barriers for the most part act at the interface between the CPU and its cache
+(memory barriers logically act on the dotted line in the following diagram):
+
+ <--- CPU ---> : <----------- Memory ----------->
+ :
+ +--------+ +--------+ : +--------+ +-----------+
+ | | | | : | | | | +--------+
+ | CPU | | Memory | : | CPU | | | | |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | |--->| Memory |
+ | | | | : | | | | | |
+ +--------+ +--------+ : +--------+ | | | |
+ : | Cache | +--------+
+ : | Coherency |
+ : | Mechanism | +--------+
+ +--------+ +--------+ : +--------+ | | | |
+ | | | | : | | | | | |
+ | CPU | | Memory | : | CPU | | |--->| Device |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | | | |
+ | | | | : | | | | +--------+
+ +--------+ +--------+ : +--------+ +-----------+
+ :
+ :
+
+Although any particular load or store may not actually appear outside of the
+CPU that issued it since it may have been satisfied within the CPU's own cache,
+it will still appear as if the full memory access had taken place as far as the
+other CPUs are concerned since the cache coherency mechanisms will migrate the
+cacheline over to the accessing CPU and propagate the effects upon conflict.
+
+The CPU core may execute instructions in any order it deems fit, provided the
+expected program causality appears to be maintained. Some of the instructions
+generate load and store operations which then go into the queue of memory
+accesses to be performed. The core may place these in the queue in any order
+it wishes, and continue execution until it is forced to wait for an instruction
+to complete.
+
+What memory barriers are concerned with is controlling the order in which
+accesses cross from the CPU side of things to the memory side of things, and
+the order in which the effects are perceived to happen by the other observers
+in the system.
+
+[!] Memory barriers are _not_ needed within a given CPU, as CPUs always see
+their own loads and stores as if they had happened in program order.
+
+[!] MMIO or other device accesses may bypass the cache system. This depends on
+the properties of the memory window through which devices are accessed and/or
+the use of any special device communication instructions the CPU may have.
+
+
+CACHE COHERENCY
+---------------
+
+Life isn't quite as simple as it may appear above, however: for while the
+caches are expected to be coherent, there's no guarantee that that coherency
+will be ordered. This means that whilst changes made on one CPU will
+eventually become visible on all CPUs, there's no guarantee that they will
+become apparent in the same order on those other CPUs.
+
+
+Consider dealing with a system that has pair of CPUs (1 & 2), each of which has
+a pair of parallel data caches (CPU 1 has A/B, and CPU 2 has C/D):
+
+ :
+ : +--------+
+ : +---------+ | |
+ +--------+ : +--->| Cache A |<------->| |
+ | | : | +---------+ | |
+ | CPU 1 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache B |<------->| |
+ : +---------+ | |
+ : | Memory |
+ : +---------+ | System |
+ +--------+ : +--->| Cache C |<------->| |
+ | | : | +---------+ | |
+ | CPU 2 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache D |<------->| |
+ : +---------+ | |
+ : +--------+
+ :
+
+Imagine the system has the following properties:
+
+ (*) an odd-numbered cache line may be in cache A, cache C or it may still be
+ resident in memory;
+
+ (*) an even-numbered cache line may be in cache B, cache D or it may still be
+ resident in memory;
+
+ (*) whilst the CPU core is interrogating one cache, the other cache may be
+ making use of the bus to access the rest of the system - perhaps to
+ displace a dirty cacheline or to do a speculative load;
+
+ (*) each cache has a queue of operations that need to be applied to that cache
+ to maintain coherency with the rest of the system;
+
+ (*) the coherency queue is not flushed by normal loads to lines already
+ present in the cache, even though the contents of the queue may
+ potentially effect those loads.
+
+Imagine, then, that two writes are made on the first CPU, with a write barrier
+between them to guarantee that they will appear to reach that CPU's caches in
+the requisite order:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb(); Make sure change to v visible before
+ change to p
+ <A:modify v=2> v is now in cache A exclusively
+ p = &v;
+ <B:modify p=&v> p is now in cache B exclusively
+
+The write memory barrier forces the other CPUs in the system to perceive that
+the local CPU's caches have apparently been updated in the correct order. But
+now imagine that the second CPU that wants to read those values:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ ...
+ q = p;
+ x = *q;
+
+The above pair of reads may then fail to happen in expected order, as the
+cacheline holding p may get updated in one of the second CPU's caches whilst
+the update to the cacheline holding v is delayed in the other of the second
+CPU's caches by some other cache event:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &b; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ x = *q;
+ <C:read *q> Reads from v before v updated in cache
+ <C:unbusy>
+ <C:commit v=2>
+
+Basically, whilst both cachelines will be updated on CPU 2 eventually, there's
+no guarantee that, without intervention, the order of update will be the same
+as that committed on CPU 1.
+
+
+To intervene, we need to interpolate a data dependency barrier or a read
+barrier between the loads. This will force the cache to commit its coherency
+queue before processing any further requests:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &b; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ smp_read_barrier_depends()
+ <C:unbusy>
+ <C:commit v=2>
+ x = *q;
+ <C:read *q> Reads from v after v updated in cache
+
+
+This sort of problem can be encountered on DEC Alpha processors as they have a
+split cache that improves performance by making better use of the data bus.
+Whilst most CPUs do imply a data dependency barrier on the read when a memory
+access depends on a read, not all do, so it may not be relied on.
+
+Other CPUs may also have split caches, but must coordinate between the various
+cachelets for normal memory accesss. The semantics of the Alpha removes the
+need for coordination in absence of memory barriers.
+
+
+CACHE COHERENCY VS DMA
+----------------------
+
+Not all systems maintain cache coherency with respect to devices doing DMA. In
+such cases, a device attempting DMA may obtain stale data from RAM because
+dirty cache lines may be resident in the caches of various CPUs, and may not
+have been written back to RAM yet. To deal with this, the appropriate part of
+the kernel must flush the overlapping bits of cache on each CPU (and maybe
+invalidate them as well).
+
+In addition, the data DMA'd to RAM by a device may be overwritten by dirty
+cache lines being written back to RAM from a CPU's cache after the device has
+installed its own data, or cache lines simply present in a CPUs cache may
+simply obscure the fact that RAM has been updated, until at such time as the
+cacheline is discarded from the CPU's cache and reloaded. To deal with this,
+the appropriate part of the kernel must invalidate the overlapping bits of the
+cache on each CPU.
+
+See Documentation/cachetlb.txt for more information on cache management.
+
+
+CACHE COHERENCY VS MMIO
+-----------------------
+
+Memory mapped I/O usually takes place through memory locations that are part of
+a window in the CPU's memory space that have different properties assigned than
+the usual RAM directed window.
+
+Amongst these properties is usually the fact that such accesses bypass the
+caching entirely and go directly to the device buses. This means MMIO accesses
+may, in effect, overtake accesses to cached memory that were emitted earlier.
+A memory barrier isn't sufficient in such a case, but rather the cache must be
+flushed between the cached memory write and the MMIO access if the two are in
+any way dependent.
+
+
+=========================
+THE THINGS CPUS GET UP TO
+=========================
+
+A programmer might take it for granted that the CPU will perform memory
+operations in exactly the order specified, so that if a CPU is, for example,
+given the following piece of code to execute:
+
+ a = *A;
+ *B = b;
+ c = *C;
+ d = *D;
+ *E = e;
+
+They would then expect that the CPU will complete the memory operation for each
+instruction before moving on to the next one, leading to a definite sequence of
+operations as seen by external observers in the system:
+
+ LOAD *A, STORE *B, LOAD *C, LOAD *D, STORE *E.
+
+
+Reality is, of course, much messier. With many CPUs and compilers, the above
+assumption doesn't hold because:
+
+ (*) loads are more likely to need to be completed immediately to permit
+ execution progress, whereas stores can often be deferred without a
+ problem;
+
+ (*) loads may be done speculatively, and the result discarded should it prove
+ to have been unnecessary;
+
+ (*) loads may be done speculatively, leading to the result having being
+ fetched at the wrong time in the expected sequence of events;
+
+ (*) the order of the memory accesses may be rearranged to promote better use
+ of the CPU buses and caches;
+
+ (*) loads and stores may be combined to improve performance when talking to
+ memory or I/O hardware that can do batched accesses of adjacent locations,
+ thus cutting down on transaction setup costs (memory and PCI devices may
+ both be able to do this); and
+
+ (*) the CPU's data cache may affect the ordering, and whilst cache-coherency
+ mechanisms may alleviate this - once the store has actually hit the cache
+ - there's no guarantee that the coherency management will be propagated in
+ order to other CPUs.
+
+So what another CPU, say, might actually observe from the above piece of code
+is:
+
+ LOAD *A, ..., LOAD {*C,*D}, STORE *E, STORE *B
+
+ (Where "LOAD {*C,*D}" is a combined load)
+
+
+However, it is guaranteed that a CPU will be self-consistent: it will see its
+_own_ accesses appear to be correctly ordered, without the need for a memory
+barrier. For instance with the following code:
+
+ U = *A;
+ *A = V;
+ *A = W;
+ X = *A;
+ *A = Y;
+ Z = *A;
+
+and assuming no intervention by an external influence, it can be assumed that
+the final result will appear to be:
+
+ U == the original value of *A
+ X == W
+ Z == Y
+ *A == Y
+
+The code above may cause the CPU to generate the full sequence of memory
+accesses:
+
+ U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
+
+in that order, but, without intervention, the sequence may have almost any
+combination of elements combined or discarded, provided the program's view of
+the world remains consistent.
+
+The compiler may also combine, discard or defer elements of the sequence before
+the CPU even sees them.
+
+For instance:
+
+ *A = V;
+ *A = W;
+
+may be reduced to:
+
+ *A = W;
+
+since, without a write barrier, it can be assumed that the effect of the
+storage of V to *A is lost. Similarly:
+
+ *A = Y;
+ Z = *A;
+
+may, without a memory barrier, be reduced to:
+
+ *A = Y;
+ Z = Y;
+
+and the LOAD operation never appear outside of the CPU.
+
+
+AND THEN THERE'S THE ALPHA
+--------------------------
+
+The DEC Alpha CPU is one of the most relaxed CPUs there is. Not only that,
+some versions of the Alpha CPU have a split data cache, permitting them to have
+two semantically related cache lines updating at separate times. This is where
+the data dependency barrier really becomes necessary as this synchronises both
+caches with the memory coherence system, thus making it seem like pointer
+changes vs new data occur in the right order.
+
+The Alpha defines the Linux's kernel's memory barrier model.
+
+See the subsection on "Cache Coherency" above.
+
+
+==========
+REFERENCES
+==========
+
+Alpha AXP Architecture Reference Manual, Second Edition (Sites & Witek,
+Digital Press)
+ Chapter 5.2: Physical Address Space Characteristics
+ Chapter 5.4: Caches and Write Buffers
+ Chapter 5.5: Data Sharing
+ Chapter 5.6: Read/Write Ordering
+
+AMD64 Architecture Programmer's Manual Volume 2: System Programming
+ Chapter 7.1: Memory-Access Ordering
+ Chapter 7.4: Buffering and Combining Memory Writes
+
+IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+System Programming Guide
+ Chapter 7.1: Locked Atomic Operations
+ Chapter 7.2: Memory Ordering
+ Chapter 7.4: Serializing Instructions
+
+The SPARC Architecture Manual, Version 9
+ Chapter 8: Memory Models
+ Appendix D: Formal Specification of the Memory Models
+ Appendix J: Programming with the Memory Models
+
+UltraSPARC Programmer Reference Manual
+ Chapter 5: Memory Accesses and Cacheability
+ Chapter 15: Sparc-V9 Memory Models
+
+UltraSPARC III Cu User's Manual
+ Chapter 9: Memory Models
+
+UltraSPARC IIIi Processor User's Manual
+ Chapter 8: Memory Models
+
+UltraSPARC Architecture 2005
+ Chapter 9: Memory
+ Appendix D: Formal Specifications of the Memory Models
+
+UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
+ Chapter 8: Memory Models
+ Appendix F: Caches and Cache Coherency
+
+Solaris Internals, Core Kernel Architecture, p63-68:
+ Chapter 3.3: Hardware Considerations for Locks and
+ Synchronization
+
+Unix Systems for Modern Architectures, Symmetric Multiprocessing and Caching
+for Kernel Programmers:
+ Chapter 13: Other Memory Models
+
+Intel Itanium Architecture Software Developer's Manual: Volume 1:
+ Section 2.6: Speculation
+ Section 4.4: Memory Access
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 5b01d5cc4e95..b1181ce232d9 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -92,8 +92,6 @@ routing.txt
- the new routing mechanism
shaper.txt
- info on the module that can shape/limit transmitted traffic.
-sis900.txt
- - SiS 900/7016 Fast Ethernet device driver info.
sk98lin.txt
- Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit
Ethernet Adapter family driver info
diff --git a/Documentation/networking/README.ipw2100 b/Documentation/networking/README.ipw2100
index 3ab40379d1cf..f3fcaa41f774 100644
--- a/Documentation/networking/README.ipw2100
+++ b/Documentation/networking/README.ipw2100
@@ -3,18 +3,18 @@ Intel(R) PRO/Wireless 2100 Driver for Linux in support of:
Intel(R) PRO/Wireless 2100 Network Connection
-Copyright (C) 2003-2005, Intel Corporation
+Copyright (C) 2003-2006, Intel Corporation
README.ipw2100
-Version: 1.1.3
-Date : October 17, 2005
+Version: git-1.1.5
+Date : January 25, 2006
Index
-----------------------------------------------
0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
1. Introduction
-2. Release 1.1.3 Current Features
+2. Release git-1.1.5 Current Features
3. Command Line Parameters
4. Sysfs Helper Files
5. Radio Kill Switch
@@ -89,7 +89,7 @@ potential fixes and patches, as well as links to the development mailing list
for the driver project.
-2. Release 1.1.3 Current Supported Features
+2. Release git-1.1.5 Current Supported Features
-----------------------------------------------
- Managed (BSS) and Ad-Hoc (IBSS)
- WEP (shared key and open)
@@ -270,7 +270,7 @@ For installation support on the ipw2100 1.1.0 driver on Linux kernels
9. License
-----------------------------------------------
- Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved.
+ Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License (version 2) as
diff --git a/Documentation/networking/README.ipw2200 b/Documentation/networking/README.ipw2200
index c6492d3839fa..acb30c5dcff3 100644
--- a/Documentation/networking/README.ipw2200
+++ b/Documentation/networking/README.ipw2200
@@ -10,7 +10,7 @@ both hardware adapters listed above. In this document the Intel(R)
PRO/Wireless 2915ABG Driver for Linux will be used to reference the
unified driver.
-Copyright (C) 2004-2005, Intel Corporation
+Copyright (C) 2004-2006, Intel Corporation
README.ipw2200
@@ -26,9 +26,11 @@ Index
1.2. Module parameters
1.3. Wireless Extension Private Methods
1.4. Sysfs Helper Files
+1.5. Supported channels
2. Ad-Hoc Networking
3. Interacting with Wireless Tools
3.1. iwconfig mode
+3.2. iwconfig sens
4. About the Version Numbers
5. Firmware installation
6. Support
@@ -314,6 +316,35 @@ For the device level files, see /sys/bus/pci/drivers/ipw2200:
running ifconfig and is therefore disabled by default.
+1.5. Supported channels
+-----------------------------------------------
+
+Upon loading the Intel(R) PRO/Wireless 2915ABG Driver for Linux, a
+message stating the detected geography code and the number of 802.11
+channels supported by the card will be displayed in the log.
+
+The geography code corresponds to a regulatory domain as shown in the
+table below.
+
+ Supported channels
+Code Geography 802.11bg 802.11a
+
+--- Restricted 11 0
+ZZF Custom US/Canada 11 8
+ZZD Rest of World 13 0
+ZZA Custom USA & Europe & High 11 13
+ZZB Custom NA & Europe 11 13
+ZZC Custom Japan 11 4
+ZZM Custom 11 0
+ZZE Europe 13 19
+ZZJ Custom Japan 14 4
+ZZR Rest of World 14 0
+ZZH High Band 13 4
+ZZG Custom Europe 13 4
+ZZK Europe 13 24
+ZZL Europe 11 13
+
+
2. Ad-Hoc Networking
-----------------------------------------------
@@ -353,6 +384,15 @@ When configuring the mode of the adapter, all run-time configured parameters
are reset to the value used when the module was loaded. This includes
channels, rates, ESSID, etc.
+3.2 iwconfig sens
+-----------------------------------------------
+
+The 'iwconfig ethX sens XX' command will not set the signal sensitivity
+threshold, as described in iwconfig documentation, but rather the number
+of consecutive missed beacons that will trigger handover, i.e. roaming
+to another access point. At the same time, it will set the disassociation
+threshold to 3 times the given value.
+
4. About the Version Numbers
-----------------------------------------------
@@ -408,7 +448,7 @@ For general information and support, go to:
7. License
-----------------------------------------------
- Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved.
+ Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License version 2 as
diff --git a/Documentation/networking/TODO b/Documentation/networking/TODO
deleted file mode 100644
index 66d36ff14bae..000000000000
--- a/Documentation/networking/TODO
+++ /dev/null
@@ -1,18 +0,0 @@
-To-do items for network drivers
--------------------------------
-
-* Move ethernet crc routine to generic code
-
-* (for 2.5) Integrate Jamal Hadi Salim's netdev Rx polling API change
-
-* Audit all net drivers to make sure magic packet / wake-on-lan /
- similar features are disabled in the driver by default.
-
-* Audit all net drivers to make sure the module always prints out a
- version string when loaded as a module, but only prints a version
- string when built into the kernel if a device is detected.
-
-* Add ETHTOOL_GDRVINFO ioctl support to all ethernet drivers.
-
-* dmfe PCI DMA is totally wrong and only works on x86
-
diff --git a/Documentation/networking/bcm43xx.txt b/Documentation/networking/bcm43xx.txt
new file mode 100644
index 000000000000..28541d2bee1e
--- /dev/null
+++ b/Documentation/networking/bcm43xx.txt
@@ -0,0 +1,36 @@
+
+ BCM43xx Linux Driver Project
+ ============================
+
+About this software
+-------------------
+
+The goal of this project is to develop a linux driver for Broadcom
+BCM43xx chips, based on the specification at
+http://bcm-specs.sipsolutions.net/
+
+The project page is http://bcm43xx.berlios.de/
+
+
+Requirements
+------------
+
+1) Linux Kernel 2.6.16 or later
+ http://www.kernel.org/
+
+ You may want to configure your kernel with:
+
+ CONFIG_DEBUG_FS (optional):
+ -> Kernel hacking
+ -> Debug Filesystem
+
+2) SoftMAC IEEE 802.11 Networking Stack extension and patched ieee80211
+ modules:
+ http://softmac.sipsolutions.net/
+
+3) Firmware Files
+
+ Please try fwcutter. Fwcutter can extract the firmware from various
+ binary driver files. It supports driver files from Windows, MacOS and
+ Linux. You can get fwcutter from http://bcm43xx.berlios.de/.
+ Also, fwcutter comes with a README file for further instructions.
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
index 4ef9f7cd5dc3..944aa55e79f8 100644
--- a/Documentation/networking/e100.txt
+++ b/Documentation/networking/e100.txt
@@ -1,16 +1,17 @@
Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
==============================================================
-November 17, 2004
-
+November 15, 2005
Contents
========
- In This Release
- Identifying Your Adapter
+- Building and Installation
- Driver Configuration Parameters
- Additional Configurations
+- Known Issues
- Support
@@ -18,18 +19,30 @@ In This Release
===============
This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of
-Adapters, version 3.3.x. This driver supports 2.4.x and 2.6.x kernels.
+Adapters. This driver includes support for Itanium(R)2-based systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel PRO/100 adapter.
+
+The following features are now available in supported kernels:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
Identifying Your Adapter
========================
-For more information on how to identify your adapter, go to the Adapter &
+For more information on how to identify your adapter, go to the Adapter &
Driver ID Guide at:
http://support.intel.com/support/network/adapter/pro100/21397.htm
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
networking link on the left to search for your adapter:
http://downloadfinder.intel.com/scripts-df/support_intel.asp
@@ -40,73 +53,75 @@ Driver Configuration Parameters
The default value for each parameter is generally the recommended setting,
unless otherwise noted.
-Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
- structure that describes a receive buffer and its attributes to the network
- controller. The data in the descriptor is used by the controller to write
- data from the controller to host memory. In the 3.0.x driver the valid
- range for this parameter is 64-256. The default value is 64. This parameter
- can be changed using the command
-
+Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
+ structure that describes a receive buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to write
+ data from the controller to host memory. In the 3.x.x driver the valid range
+ for this parameter is 64-256. The default value is 64. This parameter can be
+ changed using the command:
+
ethtool -G eth? rx n, where n is the number of desired rx descriptors.
-Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a
- data structure that describes a transmit buffer and its attributes to the
- network controller. The data in the descriptor is used by the controller to
- read data from the host memory to the controller. In the 3.0.x driver the
- valid range for this parameter is 64-256. The default value is 64. This
- parameter can be changed using the command
+Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data
+ structure that describes a transmit buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to read
+ data from the host memory to the controller. In the 3.x.x driver the valid
+ range for this parameter is 64-256. The default value is 64. This parameter
+ can be changed using the command:
ethtool -G eth? tx n, where n is the number of desired tx descriptors.
-Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
- default. Ethtool can be used as follows to force speed/duplex.
+Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
+ default. Ethtool can be used as follows to force speed/duplex.
ethtool -s eth? autoneg off speed {10|100} duplex {full|half}
NOTE: setting the speed/duplex to incorrect values will cause the link to
fail.
-Event Log Message Level: The driver uses the message level flag to log events
- to syslog. The message level can be set at driver load time. It can also be
- set using the command
+Event Log Message Level: The driver uses the message level flag to log events
+ to syslog. The message level can be set at driver load time. It can also be
+ set using the command:
ethtool -s eth? msglvl n
+
Additional Configurations
=========================
Configuring the Driver on Different Distributions
-------------------------------------------------
- Configuring a network driver to load properly when the system is started is
- distribution dependent. Typically, the configuration process involves adding
- an alias line to /etc/modules.conf as well as editing other system startup
- scripts and/or configuration files. Many popular Linux distributions ship
- with tools to make these changes for you. To learn the proper way to
- configure a network device for your system, refer to your distribution
- documentation. If during this process you are asked for the driver or module
- name, the name for the Linux Base Driver for the Intel PRO/100 Family of
- Adapters is e100.
+ Configuring a network driver to load properly when the system is started is
+ distribution dependent. Typically, the configuration process involves adding
+ an alias line to /etc/modules.conf or /etc/modprobe.conf as well as editing
+ other system startup scripts and/or configuration files. Many popular Linux
+ distributions ship with tools to make these changes for you. To learn the
+ proper way to configure a network device for your system, refer to your
+ distribution documentation. If during this process you are asked for the
+ driver or module name, the name for the Linux Base Driver for the Intel
+ PRO/100 Family of Adapters is e100.
- As an example, if you install the e100 driver for two PRO/100 adapters
- (eth0 and eth1), add the following to modules.conf:
+ As an example, if you install the e100 driver for two PRO/100 adapters
+ (eth0 and eth1), add the following to modules.conf or modprobe.conf:
alias eth0 e100
alias eth1 e100
Viewing Link Messages
---------------------
- In order to see link messages and other Intel driver information on your
- console, you must set the dmesg level up to six. This can be done by
- entering the following on the command line before loading the e100 driver:
+ In order to see link messages and other Intel driver information on your
+ console, you must set the dmesg level up to six. This can be done by
+ entering the following on the command line before loading the e100 driver:
dmesg -n 8
- If you wish to see all messages issued by the driver, including debug
+ If you wish to see all messages issued by the driver, including debug
messages, set the dmesg level to eight.
NOTE: This setting is not saved across reboots.
+
Ethtool
-------
@@ -114,29 +129,27 @@ Additional Configurations
diagnostics, as well as displaying statistical information. Ethtool
version 1.6 or later is required for this functionality.
- The latest release of ethtool can be found at:
- http://sf.net/projects/gkernel.
+ The latest release of ethtool can be found from
+ http://sourceforge.net/projects/gkernel.
- NOTE: This driver uses mii support from the kernel. As a result, when
- there is no link, ethtool will report speed/duplex to be 10/half.
+ NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
+ for a more complete ethtool feature set can be enabled by upgrading
+ ethtool to ethtool-1.8.1.
- NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
- for a more complete ethtool feature set can be enabled by upgrading
- ethtool to ethtool-1.8.1.
Enabling Wake on LAN* (WoL)
---------------------------
- WoL is provided through the Ethtool* utility. Ethtool is included with Red
- Hat* 8.0. For other Linux distributions, download and install Ethtool from
- the following website: http://sourceforge.net/projects/gkernel.
+ WoL is provided through the Ethtool* utility. Ethtool is included with Red
+ Hat* 8.0. For other Linux distributions, download and install Ethtool from
+ the following website: http://sourceforge.net/projects/gkernel.
- For instructions on enabling WoL with Ethtool, refer to the Ethtool man
- page.
+ For instructions on enabling WoL with Ethtool, refer to the Ethtool man page.
WoL will be enabled on the system during the next shut down or reboot. For
- this driver version, in order to enable WoL, the e100 driver must be
+ this driver version, in order to enable WoL, the e100 driver must be
loaded when shutting down or rebooting the system.
+
NAPI
----
@@ -144,6 +157,25 @@ Additional Configurations
See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, either turn on ARP
+ filtering by
+
+ (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+ (this only works if your kernel's version is higher than 2.4.5), or
+
+ (2) installing the interfaces in separate broadcast domains (either
+ in different switches or in a switch partitioned to VLANs).
+
+
Support
=======
@@ -151,20 +183,24 @@ For general information, go to the Intel support website at:
http://support.intel.com
+ or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related to
-the issue to linux.nics@intel.com.
+kernel with a supported adapter, email the specific information related to the
+issue to e1000-devel@lists.sourceforge.net.
License
=======
-This software program is released under the terms of a license agreement
-between you ('Licensee') and Intel. Do not use or load this software or any
-associated materials (collectively, the 'Software') until you have carefully
-read the full terms and conditions of the LICENSE located in this software
-package. By loading or using the Software, you agree to the terms of this
-Agreement. If you do not agree with the terms of this Agreement, do not
-install or use the Software.
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the file COPYING located in this software
+package. By loading or using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not install
+or use the Software.
* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt
index 2ebd4058d46d..71fe15af356c 100644
--- a/Documentation/networking/e1000.txt
+++ b/Documentation/networking/e1000.txt
@@ -1,7 +1,7 @@
Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters
===============================================================
-November 17, 2004
+November 15, 2005
Contents
@@ -20,254 +20,316 @@ In This Release
===============
This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family
-of Adapters, version 5.x.x.
+of Adapters. This driver includes support for Itanium(R)2-based systems.
-For questions related to hardware requirements, refer to the documentation
-supplied with your Intel PRO/1000 adapter. All hardware requirements listed
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel PRO/1000 adapter. All hardware requirements listed
apply to use with Linux.
-Native VLANs are now available with supported kernels.
+The following features are now available in supported kernels:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+The driver information previously displayed in the /proc filesystem is not
+supported in this release. Alternatively, you can use ethtool (version 1.6
+or later), lspci, and ifconfig to obtain the same information.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
Identifying Your Adapter
========================
-For more information on how to identify your adapter, go to the Adapter &
+For more information on how to identify your adapter, go to the Adapter &
Driver ID Guide at:
http://support.intel.com/support/network/adapter/pro100/21397.htm
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
networking link on the left to search for your adapter:
http://downloadfinder.intel.com/scripts-df/support_intel.asp
-Command Line Parameters
-=======================
-If the driver is built as a module, the following optional parameters are
-used by entering them on the command line with the modprobe or insmod command
-using this syntax:
+Command Line Parameters =======================
+
+If the driver is built as a module, the following optional parameters
+are used by entering them on the command line with the modprobe or insmod
+command using this syntax:
modprobe e1000 [<option>=<VAL1>,<VAL2>,...]
- insmod e1000 [<option>=<VAL1>,<VAL2>,...]
+ insmod e1000 [<option>=<VAL1>,<VAL2>,...]
For example, with two PRO/1000 PCI adapters, entering:
insmod e1000 TxDescriptors=80,128
-loads the e1000 driver with 80 TX descriptors for the first adapter and 128 TX
-descriptors for the second adapter.
+loads the e1000 driver with 80 TX descriptors for the first adapter and 128
+TX descriptors for the second adapter.
The default value for each parameter is generally the recommended setting,
-unless otherwise noted. Also, if the driver is statically built into the
-kernel, the driver is loaded with the default values for all the parameters.
-Ethtool can be used to change some of the parameters at runtime.
+unless otherwise noted.
+
+NOTES: For more information about the AutoNeg, Duplex, and Speed
+ parameters, see the "Speed and Duplex Configuration" section in
+ this document.
- NOTES: For more information about the AutoNeg, Duplex, and Speed
- parameters, see the "Speed and Duplex Configuration" section in
- this document.
+ For more information about the InterruptThrottleRate,
+ RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
+ parameters, see the application note at:
+ http://www.intel.com/design/network/applnots/ap450.htm
- For more information about the InterruptThrottleRate, RxIntDelay,
- TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay parameters, see the
- application note at:
- http://www.intel.com/design/network/applnots/ap450.htm
+ A descriptor describes a data buffer and attributes related to
+ the data buffer. This information is accessed by the hardware.
- A descriptor describes a data buffer and attributes related to the
- data buffer. This information is accessed by the hardware.
-AutoNeg (adapters using copper connections only)
-Valid Range: 0x01-0x0F, 0x20-0x2F
+AutoNeg
+-------
+(Supported only on adapters with copper connections)
+Valid Range: 0x01-0x0F, 0x20-0x2F
Default Value: 0x2F
- This parameter is a bit mask that specifies which speed and duplex
- settings the board advertises. When this parameter is used, the Speed and
- Duplex parameters must not be specified.
- NOTE: Refer to the Speed and Duplex section of this readme for more
- information on the AutoNeg parameter.
-
-Duplex (adapters using copper connections only)
-Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full)
+
+This parameter is a bit mask that specifies which speed and duplex
+settings the board advertises. When this parameter is used, the Speed
+and Duplex parameters must not be specified.
+
+NOTE: Refer to the Speed and Duplex section of this readme for more
+ information on the AutoNeg parameter.
+
+
+Duplex
+------
+(Supported only on adapters with copper connections)
+Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full)
Default Value: 0
- Defines the direction in which data is allowed to flow. Can be either one
- or two-directional. If both Duplex and the link partner are set to auto-
- negotiate, the board auto-detects the correct duplex. If the link partner
- is forced (either full or half), Duplex defaults to half-duplex.
+
+Defines the direction in which data is allowed to flow. Can be either
+one or two-directional. If both Duplex and the link partner are set to
+auto-negotiate, the board auto-detects the correct duplex. If the link
+partner is forced (either full or half), Duplex defaults to half-duplex.
+
FlowControl
-Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-Default: Read flow control settings from the EEPROM
- This parameter controls the automatic generation(Tx) and response(Rx) to
- Ethernet PAUSE frames.
+----------
+Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+Default Value: Reads flow control settings from the EEPROM
+
+This parameter controls the automatic generation(Tx) and response(Rx)
+to Ethernet PAUSE frames.
+
InterruptThrottleRate
-Valid Range: 100-100000 (0=off, 1=dynamic)
+---------------------
+(not supported on Intel 82542, 82543 or 82544-based adapters)
+Valid Range: 100-100000 (0=off, 1=dynamic)
Default Value: 8000
- This value represents the maximum number of interrupts per second the
- controller generates. InterruptThrottleRate is another setting used in
- interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust
- InterruptThrottleRate based on the current traffic load.
-Un-supported Adapters: InterruptThrottleRate is NOT supported by 82542, 82543
- or 82544-based adapters.
-
- NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
- RxAbsIntDelay parameters. In other words, minimizing the receive
- and/or transmit absolute delays does not force the controller to
- generate more interrupts than what the Interrupt Throttle Rate
- allows.
- CAUTION: If you are using the Intel PRO/1000 CT Network Connection
- (controller 82547), setting InterruptThrottleRate to a value
- greater than 75,000, may hang (stop transmitting) adapters under
- certain network conditions. If this occurs a NETDEV WATCHDOG
- message is logged in the system event log. In addition, the
- controller is automatically reset, restoring the network
- connection. To eliminate the potential for the hang, ensure
- that InterruptThrottleRate is set no greater than 75,000 and is
- not set to 0.
- NOTE: When e1000 is loaded with default settings and multiple adapters are
- in use simultaneously, the CPU utilization may increase non-linearly.
- In order to limit the CPU utilization without impacting the overall
- throughput, we recommend that you load the driver as follows:
-
- insmod e1000.o InterruptThrottleRate=3000,3000,3000
-
- This sets the InterruptThrottleRate to 3000 interrupts/sec for the
- first, second, and third instances of the driver. The range of 2000 to
- 3000 interrupts per second works on a majority of systems and is a
- good starting point, but the optimal value will be platform-specific.
- If CPU utilization is not a concern, use RX_POLLING (NAPI) and default
- driver settings.
+
+This value represents the maximum number of interrupts per second the
+controller generates. InterruptThrottleRate is another setting used in
+interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust
+InterruptThrottleRate based on the current traffic load.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+ RxAbsIntDelay parameters. In other words, minimizing the receive
+ and/or transmit absolute delays does not force the controller to
+ generate more interrupts than what the Interrupt Throttle Rate
+ allows.
+
+CAUTION: If you are using the Intel PRO/1000 CT Network Connection
+ (controller 82547), setting InterruptThrottleRate to a value
+ greater than 75,000, may hang (stop transmitting) adapters
+ under certain network conditions. If this occurs a NETDEV
+ WATCHDOG message is logged in the system event log. In
+ addition, the controller is automatically reset, restoring
+ the network connection. To eliminate the potential for the
+ hang, ensure that InterruptThrottleRate is set no greater
+ than 75,000 and is not set to 0.
+
+NOTE: When e1000 is loaded with default settings and multiple adapters
+ are in use simultaneously, the CPU utilization may increase non-
+ linearly. In order to limit the CPU utilization without impacting
+ the overall throughput, we recommend that you load the driver as
+ follows:
+
+ insmod e1000.o InterruptThrottleRate=3000,3000,3000
+
+ This sets the InterruptThrottleRate to 3000 interrupts/sec for
+ the first, second, and third instances of the driver. The range
+ of 2000 to 3000 interrupts per second works on a majority of
+ systems and is a good starting point, but the optimal value will
+ be platform-specific. If CPU utilization is not a concern, use
+ RX_POLLING (NAPI) and default driver settings.
+
RxDescriptors
-Valid Range: 80-256 for 82542 and 82543-based adapters
- 80-4096 for all other supported adapters
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
Default Value: 256
- This value is the number of receive descriptors allocated by the driver.
- Increasing this value allows the driver to buffer more incoming packets.
- Each descriptor is 16 bytes. A receive buffer is allocated for each
- descriptor and can either be 2048 or 4096 bytes long, depending on the MTU
- setting. An incoming packet can span one or more receive descriptors.
- The maximum MTU size is 16110.
+This value specifies the number of receive descriptors allocated by the
+driver. Increasing this value allows the driver to buffer more incoming
+packets. Each descriptor is 16 bytes. A receive buffer is also
+allocated for each descriptor and is 2048.
- NOTE: MTU designates the frame size. It only needs to be set for Jumbo
- Frames.
- NOTE: Depending on the available system resources, the request for a
- higher number of receive descriptors may be denied. In this case,
- use a lower number.
RxIntDelay
-Valid Range: 0-65535 (0=off)
+----------
+Valid Range: 0-65535 (0=off)
Default Value: 0
- This value delays the generation of receive interrupts in units of 1.024
- microseconds. Receive interrupt reduction can improve CPU efficiency if
- properly tuned for specific network traffic. Increasing this value adds
- extra latency to frame reception and can end up decreasing the throughput
- of TCP traffic. If the system is reporting dropped receives, this value
- may be set too high, causing the driver to run out of available receive
- descriptors.
-
- CAUTION: When setting RxIntDelay to a value other than 0, adapters may
- hang (stop transmitting) under certain network conditions. If
- this occurs a NETDEV WATCHDOG message is logged in the system
- event log. In addition, the controller is automatically reset,
- restoring the network connection. To eliminate the potential for
- the hang ensure that RxIntDelay is set to 0.
-
-RxAbsIntDelay (82540, 82545 and later adapters only)
-Valid Range: 0-65535 (0=off)
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds
+extra latency to frame reception and can end up decreasing the throughput
+of TCP traffic. If the system is reporting dropped receives, this value
+may be set too high, causing the driver to run out of available receive
+descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may
+ hang (stop transmitting) under certain network conditions. If
+ this occurs a NETDEV WATCHDOG message is logged in the system
+ event log. In addition, the controller is automatically reset,
+ restoring the network connection. To eliminate the potential
+ for the hang ensure that RxIntDelay is set to 0.
+
+
+RxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
Default Value: 128
- This value, in units of 1.024 microseconds, limits the delay in which a
- receive interrupt is generated. Useful only if RxIntDelay is non-zero,
- this value ensures that an interrupt is generated after the initial
- packet is received within the set amount of time. Proper tuning,
- along with RxIntDelay, may improve traffic throughput in specific network
- conditions.
-
-Speed (adapters using copper connections only)
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. Useful only if RxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is received within the set amount of time. Proper tuning,
+along with RxIntDelay, may improve traffic throughput in specific network
+conditions.
+
+
+Speed
+-----
+(This parameter is supported only on adapters with copper connections.)
Valid Settings: 0, 10, 100, 1000
-Default Value: 0 (auto-negotiate at all supported speeds)
- Speed forces the line speed to the specified value in megabits per second
- (Mbps). If this parameter is not specified or is set to 0 and the link
- partner is set to auto-negotiate, the board will auto-detect the correct
- speed. Duplex should also be set when Speed is set to either 10 or 100.
+Default Value: 0 (auto-negotiate at all supported speeds)
+
+Speed forces the line speed to the specified value in megabits per second
+(Mbps). If this parameter is not specified or is set to 0 and the link
+partner is set to auto-negotiate, the board will auto-detect the correct
+speed. Duplex should also be set when Speed is set to either 10 or 100.
+
TxDescriptors
-Valid Range: 80-256 for 82542 and 82543-based adapters
- 80-4096 for all other supported adapters
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
Default Value: 256
- This value is the number of transmit descriptors allocated by the driver.
- Increasing this value allows the driver to queue more transmits. Each
- descriptor is 16 bytes.
- NOTE: Depending on the available system resources, the request for a
- higher number of transmit descriptors may be denied. In this case,
- use a lower number.
+This value is the number of transmit descriptors allocated by the driver.
+Increasing this value allows the driver to queue more transmits. Each
+descriptor is 16 bytes.
+
+NOTE: Depending on the available system resources, the request for a
+ higher number of transmit descriptors may be denied. In this case,
+ use a lower number.
+
TxIntDelay
-Valid Range: 0-65535 (0=off)
+----------
+Valid Range: 0-65535 (0=off)
Default Value: 64
- This value delays the generation of transmit interrupts in units of
- 1.024 microseconds. Transmit interrupt reduction can improve CPU
- efficiency if properly tuned for specific network traffic. If the
- system is reporting dropped transmits, this value may be set too high
- causing the driver to run out of available transmit descriptors.
-
-TxAbsIntDelay (82540, 82545 and later adapters only)
-Valid Range: 0-65535 (0=off)
+
+This value delays the generation of transmit interrupts in units of
+1.024 microseconds. Transmit interrupt reduction can improve CPU
+efficiency if properly tuned for specific network traffic. If the
+system is reporting dropped transmits, this value may be set too high
+causing the driver to run out of available transmit descriptors.
+
+
+TxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
Default Value: 64
- This value, in units of 1.024 microseconds, limits the delay in which a
- transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
- this value ensures that an interrupt is generated after the initial
- packet is sent on the wire within the set amount of time. Proper tuning,
- along with TxIntDelay, may improve traffic throughput in specific
- network conditions.
-
-XsumRX (not available on the 82542-based adapter)
-Valid Range: 0-1
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is sent on the wire within the set amount of time. Proper tuning,
+along with TxIntDelay, may improve traffic throughput in specific
+network conditions.
+
+XsumRX
+------
+(This parameter is NOT supported on the 82542-based adapter.)
+Valid Range: 0-1
Default Value: 1
- A value of '1' indicates that the driver should enable IP checksum
- offload for received packets (both UDP and TCP) to the adapter hardware.
+
+A value of '1' indicates that the driver should enable IP checksum
+offload for received packets (both UDP and TCP) to the adapter hardware.
+
Speed and Duplex Configuration
==============================
-Three keywords are used to control the speed and duplex configuration. These
-keywords are Speed, Duplex, and AutoNeg.
+Three keywords are used to control the speed and duplex configuration.
+These keywords are Speed, Duplex, and AutoNeg.
-If the board uses a fiber interface, these keywords are ignored, and the
+If the board uses a fiber interface, these keywords are ignored, and the
fiber interface board only links at 1000 Mbps full-duplex.
For copper-based boards, the keywords interact as follows:
- The default operation is auto-negotiate. The board advertises all supported
- speed and duplex combinations, and it links at the highest common speed and
- duplex mode IF the link partner is set to auto-negotiate.
+ The default operation is auto-negotiate. The board advertises all
+ supported speed and duplex combinations, and it links at the highest
+ common speed and duplex mode IF the link partner is set to auto-negotiate.
- If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps is
- advertised (The 1000BaseT spec requires auto-negotiation.)
+ If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps
+ is advertised (The 1000BaseT spec requires auto-negotiation.)
If Speed = 10 or 100, then both Speed and Duplex should be set. Auto-
- negotiation is disabled, and the AutoNeg parameter is ignored. Partner SHOULD
- also be forced.
+ negotiation is disabled, and the AutoNeg parameter is ignored. Partner
+ SHOULD also be forced.
+
+The AutoNeg parameter is used when more control is required over the
+auto-negotiation process. It should be used when you wish to control which
+speed and duplex combinations are advertised during the auto-negotiation
+process.
+
+The parameter may be specified as either a decimal or hexidecimal value as
+determined by the bitmap below.
-The AutoNeg parameter is used when more control is required over the auto-
-negotiation process. When this parameter is used, Speed and Duplex parameters
-must not be specified. The following table describes supported values for the
-AutoNeg parameter:
+Bit position 7 6 5 4 3 2 1 0
+Decimal Value 128 64 32 16 8 4 2 1
+Hex value 80 40 20 10 8 4 2 1
+Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10
+Duplex Full Full Half Full Half
-Speed (Mbps) 1000 100 100 10 10
-Duplex Full Full Half Full Half
-Value (in base 16) 0x20 0x08 0x04 0x02 0x01
+Some examples of using AutoNeg:
-Example: insmod e1000 AutoNeg=0x03, loads e1000 and specifies (10 full duplex,
-10 half duplex) for negotiation with the peer.
+ modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half)
+ modprobe e1000 AutoNeg=1 (Same as above)
+ modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full)
+ modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full)
+ modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half)
+ modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100
+ Half)
+ modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full)
+ modprobe e1000 AutoNeg=32 (Same as above)
-Note that setting AutoNeg does not guarantee that the board will link at the
-highest specified speed or duplex mode, but the board will link at the
-highest possible speed/duplex of the link partner IF the link partner is also
-set to auto-negotiate. If the link partner is forced speed/duplex, the
-adapter MUST be forced to the same speed/duplex.
+Note that when this parameter is used, Speed and Duplex must not be specified.
+
+If the link partner is forced to a specific speed and duplex, then this
+parameter should not be used. Instead, use the Speed and Duplex parameters
+previously mentioned to force the adapter to the same speed and duplex.
Additional Configurations
@@ -276,19 +338,19 @@ Additional Configurations
Configuring the Driver on Different Distributions
-------------------------------------------------
- Configuring a network driver to load properly when the system is started is
- distribution dependent. Typically, the configuration process involves adding
- an alias line to /etc/modules.conf as well as editing other system startup
- scripts and/or configuration files. Many popular Linux distributions ship
- with tools to make these changes for you. To learn the proper way to
- configure a network device for your system, refer to your distribution
- documentation. If during this process you are asked for the driver or module
- name, the name for the Linux Base Driver for the Intel PRO/1000 Family of
- Adapters is e1000.
+ Configuring a network driver to load properly when the system is started
+ is distribution dependent. Typically, the configuration process involves
+ adding an alias line to /etc/modules.conf or /etc/modprobe.conf as well
+ as editing other system startup scripts and/or configuration files. Many
+ popular Linux distributions ship with tools to make these changes for you.
+ To learn the proper way to configure a network device for your system,
+ refer to your distribution documentation. If during this process you are
+ asked for the driver or module name, the name for the Linux Base Driver
+ for the Intel PRO/1000 Family of Adapters is e1000.
- As an example, if you install the e1000 driver for two PRO/1000 adapters
- (eth0 and eth1) and set the speed and duplex to 10full and 100half, add the
- following to modules.conf:
+ As an example, if you install the e1000 driver for two PRO/1000 adapters
+ (eth0 and eth1) and set the speed and duplex to 10full and 100half, add
+ the following to modules.conf or or modprobe.conf:
alias eth0 e1000
alias eth1 e1000
@@ -297,9 +359,9 @@ Additional Configurations
Viewing Link Messages
---------------------
- Link messages will not be displayed to the console if the distribution is
- restricting system messages. In order to see network driver link messages on
- your console, set dmesg to eight by entering the following:
+ Link messages will not be displayed to the console if the distribution is
+ restricting system messages. In order to see network driver link messages
+ on your console, set dmesg to eight by entering the following:
dmesg -n 8
@@ -308,22 +370,42 @@ Additional Configurations
Jumbo Frames
------------
- The driver supports Jumbo Frames for all adapters except 82542-based
- adapters. Jumbo Frames support is enabled by changing the MTU to a value
- larger than the default of 1500. Use the ifconfig command to increase the
- MTU size. For example:
+ The driver supports Jumbo Frames for all adapters except 82542 and
+ 82573-based adapters. Jumbo Frames support is enabled by changing the
+ MTU to a value larger than the default of 1500. Use the ifconfig command
+ to increase the MTU size. For example:
+
+ ifconfig eth<x> mtu 9000 up
+
+ This setting is not saved across reboots. It can be made permanent if
+ you add:
+
+ MTU=9000
- ifconfig ethx mtu 9000 up
+ to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>. This example
+ applies to the Red Hat distributions; other distributions may store this
+ setting in a different location.
- The maximum MTU setting for Jumbo Frames is 16110. This value coincides
- with the maximum Jumbo Frames size of 16128.
+ Notes:
- NOTE: Jumbo Frames are supported at 1000 Mbps only. Using Jumbo Frames at
- 10 or 100 Mbps may result in poor performance or loss of link.
+ - To enable Jumbo Frames, increase the MTU size on the interface beyond
+ 1500.
+ - The maximum MTU setting for Jumbo Frames is 16110. This value coincides
+ with the maximum Jumbo Frames size of 16128.
+ - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or
+ loss of link.
+ - Some Intel gigabit adapters that support Jumbo Frames have a frame size
+ limit of 9238 bytes, with a corresponding MTU size limit of 9216 bytes.
+ The adapters with this limitation are based on the Intel 82571EB and
+ 82572EI controllers, which correspond to these product names:
+ Intel® PRO/1000 PT Dual Port Server Adapter
+ Intel® PRO/1000 PF Dual Port Server Adapter
+ Intel® PRO/1000 PT Server Adapter
+ Intel® PRO/1000 PT Desktop Adapter
+ Intel® PRO/1000 PF Server Adapter
+ - The Intel PRO/1000 PM Network Connection does not support jumbo frames.
- NOTE: MTU designates the frame size. To enable Jumbo Frames, increase the
- MTU size on the interface beyond 1500.
Ethtool
-------
@@ -333,32 +415,41 @@ Additional Configurations
version 1.6 or later is required for this functionality.
The latest release of ethtool can be found from
- http://sf.net/projects/gkernel.
+ http://sourceforge.net/projects/gkernel.
- NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
- for a more complete ethtool feature set can be enabled by upgrading
- ethtool to ethtool-1.8.1.
+ NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
+ for a more complete ethtool feature set can be enabled by upgrading
+ ethtool to ethtool-1.8.1.
Enabling Wake on LAN* (WoL)
---------------------------
WoL is configured through the Ethtool* utility. Ethtool is included with
- all versions of Red Hat after Red Hat 7.2. For other Linux distributions,
- download and install Ethtool from the following website:
+ all versions of Red Hat after Red Hat 7.2. For other Linux distributions,
+ download and install Ethtool from the following website:
http://sourceforge.net/projects/gkernel.
- For instructions on enabling WoL with Ethtool, refer to the website listed
+ For instructions on enabling WoL with Ethtool, refer to the website listed
above.
- WoL will be enabled on the system during the next shut down or reboot.
- For this driver version, in order to enable WoL, the e1000 driver must be
+ WoL will be enabled on the system during the next shut down or reboot.
+ For this driver version, in order to enable WoL, the e1000 driver must be
loaded when shutting down or rebooting the system.
NAPI
----
NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled
- or disabled based on the configuration of the kernel.
+ or disabled based on the configuration of the kernel. To override
+ the default, use the following compile-time flags.
+
+ To enable NAPI, compile the driver module, passing in a configuration option:
+
+ make CFLAGS_EXTRA=-DE1000_NAPI install
+
+ To disable NAPI, compile the driver module, passing in a configuration option:
+
+ make CFLAGS_EXTRA=-DE1000_NO_NAPI install
See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
@@ -369,10 +460,85 @@ Known Issues
Jumbo Frames System Requirement
-------------------------------
- Memory allocation failures have been observed on Linux systems with 64 MB
- of RAM or less that are running Jumbo Frames. If you are using Jumbo Frames,
- your system may require more than the advertised minimum requirement of 64 MB
- of system memory.
+ Memory allocation failures have been observed on Linux systems with 64 MB
+ of RAM or less that are running Jumbo Frames. If you are using Jumbo
+ Frames, your system may require more than the advertised minimum
+ requirement of 64 MB of system memory.
+
+ Performance Degradation with Jumbo Frames
+ -----------------------------------------
+
+ Degradation in throughput performance may be observed in some Jumbo frames
+ environments. If this is observed, increasing the application's socket
+ buffer size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values
+ may help. See the specific application manual and
+ /usr/src/linux*/Documentation/
+ networking/ip-sysctl.txt for more details.
+
+ Jumbo frames on Foundry BigIron 8000 switch
+ -------------------------------------------
+ There is a known issue using Jumbo frames when connected to a Foundry
+ BigIron 8000 switch. This is a 3rd party limitation. If you experience
+ loss of packets, lower the MTU size.
+
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, either turn on ARP
+ filtering by entering:
+
+ echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+ (this only works if your kernel's version is higher than 2.4.5),
+
+ NOTE: This setting is not saved across reboots. The configuration
+ change can be made permanent by adding the line:
+ net.ipv4.conf.all.arp_filter = 1
+ to the file /etc/sysctl.conf
+
+ or,
+
+ install the interfaces in separate broadcast domains (either in
+ different switches or in a switch partitioned to VLANs).
+
+ 82541/82547 can't link or are slow to link with some link partners
+ -----------------------------------------------------------------
+
+ There is a known compatibility issue with 82541/82547 and some
+ low-end switches where the link will not be established, or will
+ be slow to establish. In particular, these switches are known to
+ be incompatible with 82541/82547:
+
+ Planex FXG-08TE
+ I-O Data ETG-SH8
+
+ To workaround this issue, the driver can be compiled with an override
+ of the PHY's master/slave setting. Forcing master or forcing slave
+ mode will improve time-to-link.
+
+ # make EXTRA_CFLAGS=-DE1000_MASTER_SLAVE=<n>
+
+ Where <n> is:
+
+ 0 = Hardware default
+ 1 = Master mode
+ 2 = Slave mode
+ 3 = Auto master/slave
+
+ Disable rx flow control with ethtool
+ ------------------------------------
+
+ In order to disable receive flow control using ethtool, you must turn
+ off auto-negotiation on the same command line.
+
+ For example:
+
+ ethtool -A eth? autoneg off rx off
Support
@@ -382,20 +548,24 @@ For general information, go to the Intel support website at:
http://support.intel.com
+ or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related to
-the issue to linux.nics@intel.com.
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sourceforge.net
License
=======
-This software program is released under the terms of a license agreement
-between you ('Licensee') and Intel. Do not use or load this software or any
-associated materials (collectively, the 'Software') until you have carefully
-read the full terms and conditions of the LICENSE located in this software
-package. By loading or using the Software, you agree to the terms of this
-Agreement. If you do not agree with the terms of this Agreement, do not
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the file COPYING located in this software
+package. By loading or using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not
install or use the Software.
* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 545447ac503a..a12059886755 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -87,7 +87,7 @@
* would fail and generate an error message in the system log.
* - For opt_c: slave should not be set to the master's setting
* while it is running. It was already set during enslave. To
- * simplify things, it is now handeled separately.
+ * simplify things, it is now handled separately.
*
* - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Code cleanup and style changes
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 26364d06ae92..f12007b80a46 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -355,6 +355,13 @@ somaxconn - INTEGER
Defaults to 128. See also tcp_max_syn_backlog for additional tuning
for TCP sockets.
+tcp_workaround_signed_windows - BOOLEAN
+ If set, assume no receipt of a window scaling option means the
+ remote TCP is broken and treats the window as a signed quantity.
+ If unset, assume the remote TCP is not broken even if we do
+ not receive a window scaling option from them.
+ Default: 0
+
IP Variables:
ip_local_port_range - 2 INTEGERS
@@ -619,6 +626,11 @@ arp_ignore - INTEGER
The max value from conf/{all,interface}/arp_ignore is used
when ARP request is received on the {interface}
+arp_accept - BOOLEAN
+ Define behavior when gratuitous arp replies are received:
+ 0 - drop gratuitous arp frames
+ 1 - accept gratuitous arp frames
+
app_solicit - INTEGER
The maximum number of probes to send to the user space ARP daemon
via netlink before dropping back to multicast probes (see
@@ -717,6 +729,33 @@ accept_ra - BOOLEAN
Functional default: enabled if local forwarding is disabled.
disabled if local forwarding is enabled.
+accept_ra_defrtr - BOOLEAN
+ Learn default router in Router Advertisement.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_ra_pinfo - BOOLEAN
+ Learn Prefix Inforamtion in Router Advertisement.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_ra_rt_info_max_plen - INTEGER
+ Maximum prefix length of Route Information in RA.
+
+ Route Information w/ prefix larger than or equal to this
+ variable shall be ignored.
+
+ Functional default: 0 if accept_ra_rtr_pref is enabled.
+ -1 if accept_ra_rtr_pref is disabled.
+
+accept_ra_rtr_pref - BOOLEAN
+ Accept Router Preference in RA.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
accept_redirects - BOOLEAN
Accept Redirects.
@@ -727,8 +766,8 @@ autoconf - BOOLEAN
Autoconfigure addresses using Prefix Information in Router
Advertisements.
- Functional default: enabled if accept_ra is enabled.
- disabled if accept_ra is disabled.
+ Functional default: enabled if accept_ra_pinfo is enabled.
+ disabled if accept_ra_pinfo is disabled.
dad_transmits - INTEGER
The amount of Duplicate Address Detection probes to send.
@@ -771,6 +810,12 @@ mtu - INTEGER
Default Maximum Transfer Unit
Default: 1280 (IPv6 required minimum)
+router_probe_interval - INTEGER
+ Minimum interval (in seconds) between Router Probing described
+ in RFC4191.
+
+ Default: 60
+
router_solicitation_delay - INTEGER
Number of seconds to wait after interface is brought up
before sending Router Solicitations.
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 8d4cf78258e4..4fc8e9874320 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -40,7 +40,7 @@ network interface card supports some sort of interrupt load mitigation or
+ How to use CONFIG_PACKET_MMAP
--------------------------------------------------------------------------------
-From the user standpoint, you should use the higher level libpcap library, wich
+From the user standpoint, you should use the higher level libpcap library, which
is a de facto standard, portable across nearly all operating systems
including Win32.
@@ -217,8 +217,8 @@ called pg_vec, its size limits the number of blocks that can be allocated.
kmalloc allocates any number of bytes of phisically contiguous memory from
a pool of pre-determined sizes. This pool of memory is mantained by the slab
-allocator wich is at the end the responsible for doing the allocation and
-hence wich imposes the maximum memory that kmalloc can allocate.
+allocator which is at the end the responsible for doing the allocation and
+hence which imposes the maximum memory that kmalloc can allocate.
In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The
predetermined sizes that kmalloc uses can be checked in the "size-<bytes>"
@@ -254,7 +254,7 @@ and, the number of frames be
<block number> * <block size> / <frame size>
-Suposse the following parameters, wich apply for 2.6 kernel and an
+Suposse the following parameters, which apply for 2.6 kernel and an
i386 architecture:
<size-max> = 131072 bytes
@@ -360,7 +360,7 @@ TP_STATUS_LOSING : indicates there were packet drops from last time
statistics where checked with getsockopt() and
the PACKET_STATISTICS option.
-TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets wich
+TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which
it's checksum will be done in hardware. So while
reading the packet we should not try to check the
checksum.
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index cc4b4d04129c..278771c9ad99 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -109,6 +109,22 @@ Examples:
cycle through the port range.
pgset "udp_dst_max 9" set UDP destination port max.
+ pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example
+ outer label=16,middle label=32,
+ inner label=0 (IPv4 NULL)) Note that
+ there must be no spaces between the
+ arguments. Leading zeros are required.
+ Do not set the bottom of stack bit,
+ thats done automatically. If you do
+ set the bottom of stack bit, that
+ indicates that you want to randomly
+ generate that address and the flag
+ MPLS_RND will be turned on. You
+ can have any mix of random and fixed
+ labels in the label stack.
+
+ pgset "mpls 0" turn off mpls (or any invalid argument works too!)
+
pgset stop aborts injection. Also, ^C aborts generator.
@@ -167,6 +183,8 @@ pkt_size
min_pkt_size
max_pkt_size
+mpls
+
udp_src_min
udp_src_max
@@ -211,4 +229,4 @@ Grant Grundler for testing on IA-64 and parisc, Harald Welte, Lennert Buytenhek
Stephen Hemminger, Andi Kleen, Dave Miller and many others.
-Good luck with the linux net-development. \ No newline at end of file
+Good luck with the linux net-development.
diff --git a/Documentation/networking/ray_cs.txt b/Documentation/networking/ray_cs.txt
index 5427f8c7df95..145d27a52395 100644
--- a/Documentation/networking/ray_cs.txt
+++ b/Documentation/networking/ray_cs.txt
@@ -25,7 +25,7 @@ the essid= string parameter is available via the kernel command line.
This will change after the method of sorting out parameters for all
the PCMCIA drivers is agreed upon. If you must have a built in driver
with nondefault parameters, they can be edited in
-/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for MODULE_PARM
+/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for module_param
will find them all.
Information on card services is available at:
diff --git a/Documentation/networking/sis900.txt b/Documentation/networking/sis900.txt
deleted file mode 100644
index bddffd7385ae..000000000000
--- a/Documentation/networking/sis900.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-
-SiS 900/7016 Fast Ethernet Device Driver
-
-Ollie Lho
-
-Lei Chun Chang
-
- Copyright © 1999 by Silicon Integrated System Corp.
-
- This document gives some information on installation and usage of SiS
- 900/7016 device driver under Linux.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or (at
- your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- USA
- _________________________________________________________________
-
- Table of Contents
- 1. Introduction
- 2. Changes
- 3. Tested Environment
- 4. Files in This Package
- 5. Installation
-
- Building the driver as loadable module
- Building the driver into kernel
-
- 6. Known Problems and Bugs
- 7. Revision History
- 8. Acknowledgements
- _________________________________________________________________
-
-Chapter 1. Introduction
-
- This document describes the revision 1.06 and 1.07 of SiS 900/7016
- Fast Ethernet device driver under Linux. The driver is developed by
- Silicon Integrated System Corp. and distributed freely under the GNU
- General Public License (GPL). The driver can be compiled as a loadable
- module and used under Linux kernel version 2.2.x. (rev. 1.06) With
- minimal changes, the driver can also be used under 2.3.x and 2.4.x
- kernel (rev. 1.07), please see Chapter 5. If you are intended to use
- the driver for earlier kernels, you are on your own.
-
- The driver is tested with usual TCP/IP applications including FTP,
- Telnet, Netscape etc. and is used constantly by the developers.
-
- Please send all comments/fixes/questions to Lei-Chun Chang.
- _________________________________________________________________
-
-Chapter 2. Changes
-
- Changes made in Revision 1.07
-
- 1. Separation of sis900.c and sis900.h in order to move most constant
- definition to sis900.h (many of those constants were corrected)
- 2. Clean up PCI detection, the pci-scan from Donald Becker were not
- used, just simple pci_find_*.
- 3. MII detection is modified to support multiple mii transceiver.
- 4. Bugs in read_eeprom, mdio_* were removed.
- 5. Lot of sis900 irrelevant comments were removed/changed and more
- comments were added to reflect the real situation.
- 6. Clean up of physical/virtual address space mess in buffer
- descriptors.
- 7. Better transmit/receive error handling.
- 8. The driver now uses zero-copy single buffer management scheme to
- improve performance.
- 9. Names of variables were changed to be more consistent.
- 10. Clean up of auo-negotiation and timer code.
- 11. Automatic detection and change of PHY on the fly.
- 12. Bug in mac probing fixed.
- 13. Fix 630E equalier problem by modifying the equalizer workaround
- rule.
- 14. Support for ICS1893 10/100 Interated PHYceiver.
- 15. Support for media select by ifconfig.
- 16. Added kernel-doc extratable documentation.
- _________________________________________________________________
-
-Chapter 3. Tested Environment
-
- This driver is developed on the following hardware
-
- * Intel Celeron 500 with SiS 630 (rev 02) chipset
- * SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card
-
- and tested with these software environments
-
- * Red Hat Linux version 6.2
- * Linux kernel version 2.4.0
- * Netscape version 4.6
- * NcFTP 3.0.0 beta 18
- * Samba version 2.0.3
- _________________________________________________________________
-
-Chapter 4. Files in This Package
-
- In the package you can find these files:
-
- sis900.c
- Driver source file in C
-
- sis900.h
- Header file for sis900.c
-
- sis900.sgml
- DocBook SGML source of the document
-
- sis900.txt
- Driver document in plain text
- _________________________________________________________________
-
-Chapter 5. Installation
-
- Silicon Integrated System Corp. is cooperating closely with core Linux
- Kernel developers. The revisions of SiS 900 driver are distributed by
- the usuall channels for kernel tar files and patches. Those kernel tar
- files for official kernel and patches for kernel pre-release can be
- download at official kernel ftp site and its mirrors. The 1.06
- revision can be found in kernel version later than 2.3.15 and
- pre-2.2.14, and 1.07 revision can be found in kernel version 2.4.0. If
- you have no prior experience in networking under Linux, please read
- Ethernet HOWTO and Networking HOWTO available from Linux Documentation
- Project (LDP).
-
- The driver is bundled in release later than 2.2.11 and 2.3.15 so this
- is the most easy case. Be sure you have the appropriate packages for
- compiling kernel source. Those packages are listed in Document/Changes
- in kernel source distribution. If you have to install the driver other
- than those bundled in kernel release, you should have your driver file
- sis900.c and sis900.h copied into /usr/src/linux/drivers/net/ first.
- There are two alternative ways to install the driver
- _________________________________________________________________
-
-Building the driver as loadable module
-
- To build the driver as a loadable kernel module you have to
- reconfigure the kernel to activate network support by
-
-make menuconfig
-
- Choose "Loadable module support --->", then select "Enable loadable
- module support".
-
- Choose "Network Device Support --->", select "Ethernet (10 or
- 100Mbit)". Then select "EISA, VLB, PCI and on board controllers", and
- choose "SiS 900/7016 PCI Fast Ethernet Adapter support" to "M".
-
- After reconfiguring the kernel, you can make the driver module by
-
-make modules
-
- The driver should be compiled with no errors. After compiling the
- driver, the driver can be installed to proper place by
-
-make modules_install
-
- Load the driver into kernel by
-
-insmod sis900
-
- When loading the driver into memory, some information message can be
- view by
-
-dmesg
-
- or
-cat /var/log/message
-
- If the driver is loaded properly you will have messages similar to
- this:
-
-sis900.c: v1.07.06 11/07/2000
-eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4.
-eth0: SiS 900 Internal MII PHY transceiver found at address 1.
-eth0: Using SiS 900 Internal MII PHY as default
-
- showing the version of the driver and the results of probing routine.
-
- Once the driver is loaded, network can be brought up by
-
-/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE
-
- where IPADDR, BROADCAST, NETMASK are your IP address, broadcast
- address and netmask respectively. TYPE is used to set medium type used
- by the device. Typical values are "10baseT"(twisted-pair 10Mbps
- Ethernet) or "100baseT" (twisted-pair 100Mbps Ethernet). For more
- information on how to configure network interface, please refer to
- Networking HOWTO.
-
- The link status is also shown by kernel messages. For example, after
- the network interface is activated, you may have the message:
-
-eth0: Media Link On 100mbps full-duplex
-
- If you try to unplug the twist pair (TP) cable you will get
-
-eth0: Media Link Off
-
- indicating that the link is failed.
- _________________________________________________________________
-
-Building the driver into kernel
-
- If you want to make the driver into kernel, choose "Y" rather than "M"
- on "SiS 900/7016 PCI Fast Ethernet Adapter support" when configuring
- the kernel. Build the kernel image in the usual way
-
-make clean
-
-make bzlilo
-
- Next time the system reboot, you have the driver in memory.
- _________________________________________________________________
-
-Chapter 6. Known Problems and Bugs
-
- There are some known problems and bugs. If you find any other bugs
- please mail to lcchang@sis.com.tw
-
- 1. AM79C901 HomePNA PHY is not thoroughly tested, there may be some
- bugs in the "on the fly" change of transceiver.
- 2. A bug is hidden somewhere in the receive buffer management code,
- the bug causes NULL pointer reference in the kernel. This fault is
- caught before bad things happen and reported with the message:
- eth0: NULL pointer encountered in Rx ring, skipping which can be
- viewed with dmesg or cat /var/log/message.
- 3. The media type change from 10Mbps to 100Mbps twisted-pair ethernet
- by ifconfig causes the media link down.
- _________________________________________________________________
-
-Chapter 7. Revision History
-
- * November 13, 2000, Revision 1.07, seventh release, 630E problem
- fixed and further clean up.
- * November 4, 1999, Revision 1.06, Second release, lots of clean up
- and optimization.
- * August 8, 1999, Revision 1.05, Initial Public Release
- _________________________________________________________________
-
-Chapter 8. Acknowledgements
-
- This driver was originally derived form Donald Becker's pci-skeleton
- and rtl8139 drivers. Donald also provided various suggestion regarded
- with improvements made in revision 1.06.
-
- The 1.05 revision was created by Jim Huang, AMD 79c901 support was
- added by Chin-Shan Li.
diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt
index 3759acf95b29..6091e5f6794f 100644
--- a/Documentation/networking/vortex.txt
+++ b/Documentation/networking/vortex.txt
@@ -24,36 +24,44 @@ Since kernel 2.3.99-pre6, this driver incorporates the support for the
This driver supports the following hardware:
- 3c590 Vortex 10Mbps
- 3c592 EISA 10mbps Demon/Vortex
- 3c597 EISA Fast Demon/Vortex
- 3c595 Vortex 100baseTx
- 3c595 Vortex 100baseT4
- 3c595 Vortex 100base-MII
- 3Com Vortex
- 3c900 Boomerang 10baseT
- 3c900 Boomerang 10Mbps Combo
- 3c900 Cyclone 10Mbps TPO
- 3c900B Cyclone 10Mbps T
- 3c900 Cyclone 10Mbps Combo
- 3c900 Cyclone 10Mbps TPC
- 3c900B-FL Cyclone 10base-FL
- 3c905 Boomerang 100baseTx
- 3c905 Boomerang 100baseT4
- 3c905B Cyclone 100baseTx
- 3c905B Cyclone 10/100/BNC
- 3c905B-FX Cyclone 100baseFx
- 3c905C Tornado
- 3c980 Cyclone
- 3cSOHO100-TX Hurricane
- 3c555 Laptop Hurricane
- 3c575 Boomerang CardBus
- 3CCFE575 Cyclone CardBus
- 3CCFE575CT Cyclone CardBus
- 3CCFE656 Cyclone CardBus
- 3CCFEM656 Cyclone CardBus
- 3c450 Cyclone/unknown
-
+ 3c590 Vortex 10Mbps
+ 3c592 EISA 10Mbps Demon/Vortex
+ 3c597 EISA Fast Demon/Vortex
+ 3c595 Vortex 100baseTx
+ 3c595 Vortex 100baseT4
+ 3c595 Vortex 100base-MII
+ 3c900 Boomerang 10baseT
+ 3c900 Boomerang 10Mbps Combo
+ 3c900 Cyclone 10Mbps TPO
+ 3c900 Cyclone 10Mbps Combo
+ 3c900 Cyclone 10Mbps TPC
+ 3c900B-FL Cyclone 10base-FL
+ 3c905 Boomerang 100baseTx
+ 3c905 Boomerang 100baseT4
+ 3c905B Cyclone 100baseTx
+ 3c905B Cyclone 10/100/BNC
+ 3c905B-FX Cyclone 100baseFx
+ 3c905C Tornado
+ 3c920B-EMB-WNM (ATI Radeon 9100 IGP)
+ 3c980 Cyclone
+ 3c980C Python-T
+ 3cSOHO100-TX Hurricane
+ 3c555 Laptop Hurricane
+ 3c556 Laptop Tornado
+ 3c556B Laptop Hurricane
+ 3c575 [Megahertz] 10/100 LAN CardBus
+ 3c575 Boomerang CardBus
+ 3CCFE575BT Cyclone CardBus
+ 3CCFE575CT Tornado CardBus
+ 3CCFE656 Cyclone CardBus
+ 3CCFEM656B Cyclone+Winmodem CardBus
+ 3CXFEM656C Tornado+Winmodem CardBus
+ 3c450 HomePNA Tornado
+ 3c920 Tornado
+ 3c982 Hydra Dual Port A
+ 3c982 Hydra Dual Port B
+ 3c905B-T4
+ 3c920B-EMB-WNM Tornado
Module parameters
=================
@@ -293,11 +301,6 @@ Donald's wake-on-LAN page:
http://www.scyld.com/wakeonlan.html
-3Com's documentation for many NICs, including the ones supported by
-this driver is available at
-
- http://support.3com.com/partners/developer/developer_form.html
-
3Com's DOS-based application for setting up the NICs EEPROMs:
ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe
@@ -312,10 +315,10 @@ Autonegotiation notes
---------------------
The driver uses a one-minute heartbeat for adapting to changes in
- the external LAN environment. This means that when, for example, a
- machine is unplugged from a hubbed 10baseT LAN plugged into a
- switched 100baseT LAN, the throughput will be quite dreadful for up
- to sixty seconds. Be patient.
+ the external LAN environment if link is up and 5 seconds if link is down.
+ This means that when, for example, a machine is unplugged from a hubbed
+ 10baseT LAN plugged into a switched 100baseT LAN, the throughput
+ will be quite dreadful for up to sixty seconds. Be patient.
Cisco interoperability note from Walter Wong <wcw+@CMU.EDU>:
diff --git a/Documentation/nfsroot.txt b/Documentation/nfsroot.txt
index a87d4af216c0..d56dc71d9430 100644
--- a/Documentation/nfsroot.txt
+++ b/Documentation/nfsroot.txt
@@ -3,6 +3,7 @@ Mounting the root filesystem via NFS (nfsroot)
Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
@@ -168,7 +169,6 @@ depend on what facilities are available:
root. If it got a BOOTP answer the directory name in that answer
is used.
-
3.2) Using LILO
When using LILO you can specify all necessary command line
parameters with the 'append=' command in the LILO configuration
@@ -177,7 +177,11 @@ depend on what facilities are available:
LILO and its 'append=' command please refer to the LILO
documentation.
-3.3) Using loadlin
+3.3) Using GRUB
+ When you use GRUB, you simply append the parameters after the kernel
+ specification: "kernel <kernel> <parameters>" (without the quotes).
+
+3.4) Using loadlin
When you want to boot Linux from a DOS command prompt without
having a local hard disk to mount as root, you can use loadlin.
I was told that it works, but haven't used it myself yet. In
@@ -185,7 +189,7 @@ depend on what facilities are available:
lar to how LILO is doing it. Please refer to the loadlin docu-
mentation for further information.
-3.4) Using a boot ROM
+3.5) Using a boot ROM
This is probably the most elegant way of booting a diskless
client. With a boot ROM the kernel gets loaded using the TFTP
protocol. As far as I know, no commercial boot ROMs yet
@@ -194,6 +198,13 @@ depend on what facilities are available:
and its mirrors. They are called 'netboot-nfs' and 'etherboot'.
Both contain everything you need to boot a diskless Linux client.
+3.6) Using pxelinux
+ Using pxelinux you specify the kernel you built with
+ "kernel <relative-path-below /tftpboot>". The nfsroot parameters
+ are passed to the kernel by adding them to the "append" line.
+ You may perhaps also want to fine tune the console output,
+ see Documentation/serial-console.txt for serial console help.
+
diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
index af0f6eabfa1c..9529c9c9fd59 100644
--- a/Documentation/pnp.txt
+++ b/Documentation/pnp.txt
@@ -115,6 +115,9 @@ pnp_unregister_protocol
pnp_register_driver
- adds a PnP driver to the Plug and Play Layer
- this includes driver model integration
+- returns zero for success or a negative error number for failure; count
+ calls to the .add() method if you need to know how many devices bind to
+ the driver
pnp_unregister_driver
- removes a PnP driver from the Plug and Play Layer
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index b28b7f04abb8..d7814a113ee1 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -17,6 +17,11 @@ Some warnings, first.
* but it will probably only crash.
*
* (*) suspend/resume support is needed to make it safe.
+ *
+ * If you have any filesystems on USB devices mounted before suspend,
+ * they won't be accessible after resume and you may lose data, as though
+ * you have unplugged the USB devices with mounted filesystems on them
+ * (see the FAQ below for details).
You need to append resume=/dev/your_swap_partition to kernel command
line. Then you suspend by
@@ -27,19 +32,18 @@ echo shutdown > /sys/power/disk; echo disk > /sys/power/state
echo platform > /sys/power/disk; echo disk > /sys/power/state
+. If you have SATA disks, you'll need recent kernels with SATA suspend
+support. For suspend and resume to work, make sure your disk drivers
+are built into kernel -- not modules. [There's way to make
+suspend/resume with modular disk drivers, see FAQ, but you probably
+should not do that.]
+
If you want to limit the suspend image size to N bytes, do
echo N > /sys/power/image_size
before suspend (it is limited to 500 MB by default).
-Encrypted suspend image:
-------------------------
-If you want to store your suspend image encrypted with a temporary
-key to prevent data gathering after resume you must compile
-crypto and the aes algorithm into the kernel - modules won't work
-as they cannot be loaded at resume time.
-
Article about goals and implementation of Software Suspend for Linux
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -333,4 +337,37 @@ init=/bin/bash, then swapon and starting suspend sequence manually
usually does the trick. Then it is good idea to try with latest
vanilla kernel.
+Q: How can distributions ship a swsusp-supporting kernel with modular
+disk drivers (especially SATA)?
+
+A: Well, it can be done, load the drivers, then do echo into
+/sys/power/disk/resume file from initrd. Be sure not to mount
+anything, not even read-only mount, or you are going to lose your
+data.
+
+Q: How do I make suspend more verbose?
+
+A: If you want to see any non-error kernel messages on the virtual
+terminal the kernel switches to during suspend, you have to set the
+kernel console loglevel to at least 5, for example by doing
+
+ echo 5 > /proc/sys/kernel/printk
+
+Q: Is this true that if I have a mounted filesystem on a USB device and
+I suspend to disk, I can lose data unless the filesystem has been mounted
+with "sync"?
+
+A: That's right. It depends on your hardware, and it could be true even for
+suspend-to-RAM. In fact, even with "-o sync" you can lose data if your
+programs have information in buffers they haven't written out to disk.
+
+If you're lucky, your hardware will support low-power modes for USB
+controllers while the system is asleep. Lots of hardware doesn't,
+however. Shutting off the power to a USB controller is equivalent to
+unplugging all the attached devices.
+
+Remember that it's always a bad idea to unplug a disk drive containing a
+mounted filesystem. With USB that's true even when your system is asleep!
+The safest thing is to unmount all USB-based filesystems before suspending
+and remount them after resuming.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
new file mode 100644
index 000000000000..94058220aaf0
--- /dev/null
+++ b/Documentation/power/userland-swsusp.txt
@@ -0,0 +1,149 @@
+Documentation for userland software suspend interface
+ (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+First, the warnings at the beginning of swsusp.txt still apply.
+
+Second, you should read the FAQ in swsusp.txt _now_ if you have not
+done it already.
+
+Now, to use the userland interface for software suspend you need special
+utilities that will read/write the system memory snapshot from/to the
+kernel. Such utilities are available, for example, from
+<http://www.sisk.pl/kernel/utilities/suspend>. You may want to have
+a look at them if you are going to develop your own suspend/resume
+utilities.
+
+The interface consists of a character device providing the open(),
+release(), read(), and write() operations as well as several ioctl()
+commands defined in kernel/power/power.h. The major and minor
+numbers of the device are, respectively, 10 and 231, and they can
+be read from /sys/class/misc/snapshot/dev.
+
+The device can be open either for reading or for writing. If open for
+reading, it is considered to be in the suspend mode. Otherwise it is
+assumed to be in the resume mode. The device cannot be open for reading
+and writing. It is also impossible to have the device open more than once
+at a time.
+
+The ioctl() commands recognized by the device are:
+
+SNAPSHOT_FREEZE - freeze user space processes (the current process is
+ not frozen); this is required for SNAPSHOT_ATOMIC_SNAPSHOT
+ and SNAPSHOT_ATOMIC_RESTORE to succeed
+
+SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE
+
+SNAPSHOT_ATOMIC_SNAPSHOT - create a snapshot of the system memory; the
+ last argument of ioctl() should be a pointer to an int variable,
+ the value of which will indicate whether the call returned after
+ creating the snapshot (1) or after restoring the system memory state
+ from it (0) (after resume the system finds itself finishing the
+ SNAPSHOT_ATOMIC_SNAPSHOT ioctl() again); after the snapshot
+ has been created the read() operation can be used to transfer
+ it out of the kernel
+
+SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the
+ uploaded snapshot image; before calling it you should transfer
+ the system memory snapshot back to the kernel using the write()
+ operation; this call will not succeed if the snapshot
+ image is not available to the kernel
+
+SNAPSHOT_FREE - free memory allocated for the snapshot image
+
+SNAPSHOT_SET_IMAGE_SIZE - set the preferred maximum size of the image
+ (the kernel will do its best to ensure the image size will not exceed
+ this number, but if it turns out to be impossible, the kernel will
+ create the smallest image possible)
+
+SNAPSHOT_AVAIL_SWAP - return the amount of available swap in bytes (the last
+ argument should be a pointer to an unsigned int variable that will
+ contain the result if the call is successful).
+
+SNAPSHOT_GET_SWAP_PAGE - allocate a swap page from the resume partition
+ (the last argument should be a pointer to a loff_t variable that
+ will contain the swap page offset if the call is successful)
+
+SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated with
+ SNAPSHOT_GET_SWAP_PAGE
+
+SNAPSHOT_SET_SWAP_FILE - set the resume partition (the last ioctl() argument
+ should specify the device's major and minor numbers in the old
+ two-byte format, as returned by the stat() function in the .st_rdev
+ member of the stat structure); it is recommended to always use this
+ call, because the code to set the resume partition could be removed from
+ future kernels
+
+The device's read() operation can be used to transfer the snapshot image from
+the kernel. It has the following limitations:
+- you cannot read() more than one virtual memory page at a time
+- read()s accross page boundaries are impossible (ie. if ypu read() 1/2 of
+ a page in the previous call, you will only be able to read()
+ _at_ _most_ 1/2 of the page in the next call)
+
+The device's write() operation is used for uploading the system memory snapshot
+into the kernel. It has the same limitations as the read() operation.
+
+The release() operation frees all memory allocated for the snapshot image
+and all swap pages allocated with SNAPSHOT_GET_SWAP_PAGE (if any).
+Thus it is not necessary to use either SNAPSHOT_FREE or
+SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
+unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
+still frozen when the device is being closed).
+
+Currently it is assumed that the userland utilities reading/writing the
+snapshot image from/to the kernel will use a swap parition, called the resume
+partition, as storage space. However, this is not really required, as they
+can use, for example, a special (blank) suspend partition or a file on a partition
+that is unmounted before SNAPSHOT_ATOMIC_SNAPSHOT and mounted afterwards.
+
+These utilities SHOULD NOT make any assumptions regarding the ordering of
+data within the snapshot image, except for the image header that MAY be
+assumed to start with an swsusp_info structure, as specified in
+kernel/power/power.h. This structure MAY be used by the userland utilities
+to obtain some information about the snapshot image, such as the size
+of the snapshot image, including the metadata and the header itself,
+contained in the .size member of swsusp_info.
+
+The snapshot image MUST be written to the kernel unaltered (ie. all of the image
+data, metadata and header MUST be written in _exactly_ the same amount, form
+and order in which they have been read). Otherwise, the behavior of the
+resumed system may be totally unpredictable.
+
+While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
+structure of the snapshot image is consistent with the information stored
+in the image header. If any inconsistencies are detected,
+SNAPSHOT_ATOMIC_RESTORE will not succeed. Still, this is not a fool-proof
+mechanism and the userland utilities using the interface SHOULD use additional
+means, such as checksums, to ensure the integrity of the snapshot image.
+
+The suspending and resuming utilities MUST lock themselves in memory,
+preferrably using mlockall(), before calling SNAPSHOT_FREEZE.
+
+The suspending utility MUST check the value stored by SNAPSHOT_ATOMIC_SNAPSHOT
+in the memory location pointed to by the last argument of ioctl() and proceed
+in accordance with it:
+1. If the value is 1 (ie. the system memory snapshot has just been
+ created and the system is ready for saving it):
+ (a) The suspending utility MUST NOT close the snapshot device
+ _unless_ the whole suspend procedure is to be cancelled, in
+ which case, if the snapshot image has already been saved, the
+ suspending utility SHOULD destroy it, preferrably by zapping
+ its header. If the suspend is not to be cancelled, the
+ system MUST be powered off or rebooted after the snapshot
+ image has been saved.
+ (b) The suspending utility SHOULD NOT attempt to perform any
+ file system operations (including reads) on the file systems
+ that were mounted before SNAPSHOT_ATOMIC_SNAPSHOT has been
+ called. However, it MAY mount a file system that was not
+ mounted at that time and perform some operations on it (eg.
+ use it for saving the image).
+2. If the value is 0 (ie. the system state has just been restored from
+ the snapshot image), the suspending utility MUST close the snapshot
+ device. Afterwards it will be treated as a regular userland process,
+ so it need not exit.
+
+The resuming utility SHOULD NOT attempt to mount any file systems that could
+be mounted before suspend and SHOULD NOT attempt to perform any operations
+involving such file systems.
+
+For details, please refer to the source code.
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
index 912bed87c758..d18a57d1a531 100644
--- a/Documentation/power/video.txt
+++ b/Documentation/power/video.txt
@@ -1,7 +1,7 @@
Video issues with S3 resume
~~~~~~~~~~~~~~~~~~~~~~~~~~~
- 2003-2005, Pavel Machek
+ 2003-2006, Pavel Machek
During S3 resume, hardware needs to be reinitialized. For most
devices, this is easy, and kernel driver knows how to do
@@ -15,6 +15,27 @@ run normally so video card is normally initialized. It should not be
problem for S1 standby, because hardware should retain its state over
that.
+We either have to run video BIOS during early resume, or interpret it
+using vbetool later, or maybe nothing is neccessary on particular
+system because video state is preserved. Unfortunately different
+methods work on different systems, and no known method suits all of
+them.
+
+Userland application called s2ram has been developed; it contains long
+whitelist of systems, and automatically selects working method for a
+given system. It can be downloaded from CVS at
+www.sf.net/projects/suspend . If you get a system that is not in the
+whitelist, please try to find a working solution, and submit whitelist
+entry so that work does not need to be repeated.
+
+Currently, VBE_SAVE method (6 below) works on most
+systems. Unfortunately, vbetool only runs after userland is resumed,
+so it makes debugging of early resume problems
+hard/impossible. Methods that do not rely on userland are preferable.
+
+Details
+~~~~~~~
+
There are a few types of systems where video works after S3 resume:
(1) systems where video state is preserved over S3.
@@ -104,6 +125,7 @@ HP NX7000 ??? (*)
HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X
HP Omnibook XE3 athlon version none (1)
HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV
+HP Omnibook 5150 none (1), (S1 also works OK)
IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
IBM TP R32 / Type 2658-MMG none (1)
@@ -120,18 +142,24 @@ IBM ThinkPad T42p (2373-GTG) s3_bios (2)
IBM TP X20 ??? (*)
IBM TP X30 s3_bios (2)
IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
-IBM TP X32 none (1), but backlight is on and video is trashed after long suspend
+IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4)
+IBM TP 600e none(1), but a switch to console and back to X is needed
Medion MD4220 ??? (*)
Samsung P35 vbetool needed (6)
-Sharp PC-AR10 (ATI rage) none (1)
+Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off
Sony Vaio PCG-C1VRX/K s3_bios (2)
Sony Vaio PCG-F403 ??? (*)
+Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver
+Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
Sony Vaio PCG-N505SN ??? (*)
Sony Vaio vgn-s260 X or boot-radeon can init it (5)
+Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X.
+Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4)
Toshiba Libretto L5 none (1)
-Toshiba Satellite 4030CDT s3_mode (3)
-Toshiba Satellite 4080XCDT s3_mode (3)
+Toshiba Portege 3020CT s3_mode (3)
+Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK)
Toshiba Satellite 4090XCDT ??? (*)
Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****)
Toshiba M30 (2) xor X with nvidia driver using internal AGP
@@ -151,39 +179,3 @@ Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
(***) To be tested with a newer kernel.
(****) Not with SMP kernel, UP only.
-
-VBEtool details
-~~~~~~~~~~~~~~~
-(with thanks to Carl-Daniel Hailfinger)
-
-First, boot into X and run the following script ONCE:
-#!/bin/bash
-statedir=/root/s3/state
-mkdir -p $statedir
-chvt 2
-sleep 1
-vbetool vbestate save >$statedir/vbe
-
-
-To suspend and resume properly, call the following script as root:
-#!/bin/bash
-statedir=/root/s3/state
-curcons=`fgconsole`
-fuser /dev/tty$curcons 2>/dev/null|xargs ps -o comm= -p|grep -q X && chvt 2
-cat /dev/vcsa >$statedir/vcsa
-sync
-echo 3 >/proc/acpi/sleep
-sync
-vbetool post
-vbetool vbestate restore <$statedir/vbe
-cat $statedir/vcsa >/dev/vcsa
-rckbd restart
-chvt $[curcons%6+1]
-chvt $curcons
-
-
-Unless you change your graphics card or other hardware configuration,
-the state once saved will be OK for every resume afterwards.
-NOTE: The "rckbd restart" command may be different for your
-distribution. Simply replace it with the command you would use to
-set the fonts on screen.
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index d02c64953dcd..217e51768b87 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -719,6 +719,11 @@ address which can extend beyond that limit.
- model : this is your board name/model
- #address-cells : address representation for "root" devices
- #size-cells: the size representation for "root" devices
+ - device_type : This property shouldn't be necessary. However, if
+ you decide to create a device_type for your root node, make sure it
+ is _not_ "chrp" unless your platform is a pSeries or PAPR compliant
+ one for 64-bit, or a CHRP-type machine for 32-bit as this will
+ matched by the kernel this way.
Additionally, some recommended properties are:
@@ -1365,6 +1370,78 @@ platforms are moved over to use the flattened-device-tree model.
};
+ g) Freescale SOC SEC Security Engines
+
+ Required properties:
+
+ - device_type : Should be "crypto"
+ - model : Model of the device. Should be "SEC1" or "SEC2"
+ - compatible : Should be "talitos"
+ - reg : Offset and length of the register set for the device
+ - interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+ - num-channels : An integer representing the number of channels
+ available.
+ - channel-fifo-len : An integer representing the number of
+ descriptor pointers each channel fetch fifo can hold.
+ - exec-units-mask : The bitmask representing what execution units
+ (EUs) are available. It's a single 32 bit cell. EU information
+ should be encoded following the SEC's Descriptor Header Dword
+ EU_SEL0 field documentation, i.e. as follows:
+
+ bit 0 = reserved - should be 0
+ bit 1 = set if SEC has the ARC4 EU (AFEU)
+ bit 2 = set if SEC has the DES/3DES EU (DEU)
+ bit 3 = set if SEC has the message digest EU (MDEU)
+ bit 4 = set if SEC has the random number generator EU (RNG)
+ bit 5 = set if SEC has the public key EU (PKEU)
+ bit 6 = set if SEC has the AES EU (AESU)
+ bit 7 = set if SEC has the Kasumi EU (KEU)
+
+ bits 8 through 31 are reserved for future SEC EUs.
+
+ - descriptor-types-mask : The bitmask representing what descriptors
+ are available. It's a single 32 bit cell. Descriptor type
+ information should be encoded following the SEC's Descriptor
+ Header Dword DESC_TYPE field documentation, i.e. as follows:
+
+ bit 0 = set if SEC supports the aesu_ctr_nonsnoop desc. type
+ bit 1 = set if SEC supports the ipsec_esp descriptor type
+ bit 2 = set if SEC supports the common_nonsnoop desc. type
+ bit 3 = set if SEC supports the 802.11i AES ccmp desc. type
+ bit 4 = set if SEC supports the hmac_snoop_no_afeu desc. type
+ bit 5 = set if SEC supports the srtp descriptor type
+ bit 6 = set if SEC supports the non_hmac_snoop_no_afeu desc.type
+ bit 7 = set if SEC supports the pkeu_assemble descriptor type
+ bit 8 = set if SEC supports the aesu_key_expand_output desc.type
+ bit 9 = set if SEC supports the pkeu_ptmul descriptor type
+ bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type
+ bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type
+
+ ..and so on and so forth.
+
+ Example:
+
+ /* MPC8548E */
+ crypto@30000 {
+ device_type = "crypto";
+ model = "SEC2";
+ compatible = "talitos";
+ reg = <30000 10000>;
+ interrupts = <1d 3>;
+ interrupt-parent = <40000>;
+ num-channels = <4>;
+ channel-fifo-len = <24>;
+ exec-units-mask = <000000fe>;
+ descriptor-types-mask = <073f1127>;
+ };
+
+
More devices will be defined as this spec matures.
diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.txt
index 67a11a36270c..3764dd4b12cb 100644
--- a/Documentation/powerpc/eeh-pci-error-recovery.txt
+++ b/Documentation/powerpc/eeh-pci-error-recovery.txt
@@ -121,7 +121,7 @@ accomplished.
EEH must be enabled in the PHB's very early during the boot process,
and if a PCI slot is hot-plugged. The former is performed by
-eeh_init() in arch/ppc64/kernel/eeh.c, and the later by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
EEH must be enabled before a PCI scan of the device can proceed.
Current Power5 hardware will not work unless EEH is enabled;
@@ -133,7 +133,7 @@ error. Given an arbitrary address, the routine
pci_get_device_by_addr() will find the pci device associated
with that address (if any).
-The default include/asm-ppc64/io.h macros readb(), inb(), insb(),
+The default include/asm-powerpc/io.h macros readb(), inb(), insb(),
etc. include a check to see if the i/o read returned all-0xff's.
If so, these make a call to eeh_dn_check_failure(), which in turn
asks the firmware if the all-ff's value is the sign of a true EEH
@@ -143,11 +143,12 @@ seen in /proc/ppc64/eeh (subject to change). Normally, almost
all of these occur during boot, when the PCI bus is scanned, where
a large number of 0xff reads are part of the bus scan procedure.
-If a frozen slot is detected, code in arch/ppc64/kernel/eeh.c will
-print a stack trace to syslog (/var/log/messages). This stack trace
-has proven to be very useful to device-driver authors for finding
-out at what point the EEH error was detected, as the error itself
-usually occurs slightly beforehand.
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages). This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
+beforehand.
Next, it uses the Linux kernel notifier chain/work queue mechanism to
allow any interested parties to find out about the failure. Device
diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.txt
index dca75cbda6f8..1e38166f4e54 100644
--- a/Documentation/powerpc/hvcs.txt
+++ b/Documentation/powerpc/hvcs.txt
@@ -558,9 +558,9 @@ partitions.
The proper channel for reporting bugs is either through the Linux OS
distribution company that provided your OS or by posting issues to the
-ppc64 development mailing list at:
+PowerPC development mailing list at:
-linuxppc64-dev@lists.linuxppc.org
+linuxppc-dev@ozlabs.org
This request is to provide a documented and searchable public exchange
of the problems and solutions surrounding this driver for the benefit of
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
new file mode 100644
index 000000000000..8529a17ffaa1
--- /dev/null
+++ b/Documentation/robust-futex-ABI.txt
@@ -0,0 +1,182 @@
+Started by Paul Jackson <pj@sgi.com>
+
+The robust futex ABI
+--------------------
+
+Robust_futexes provide a mechanism that is used in addition to normal
+futexes, for kernel assist of cleanup of held locks on task exit.
+
+The interesting data as to what futexes a thread is holding is kept on a
+linked list in user space, where it can be updated efficiently as locks
+are taken and dropped, without kernel intervention. The only additional
+kernel intervention required for robust_futexes above and beyond what is
+required for futexes is:
+
+ 1) a one time call, per thread, to tell the kernel where its list of
+ held robust_futexes begins, and
+ 2) internal kernel code at exit, to handle any listed locks held
+ by the exiting thread.
+
+The existing normal futexes already provide a "Fast Userspace Locking"
+mechanism, which handles uncontested locking without needing a system
+call, and handles contested locking by maintaining a list of waiting
+threads in the kernel. Options on the sys_futex(2) system call support
+waiting on a particular futex, and waking up the next waiter on a
+particular futex.
+
+For robust_futexes to work, the user code (typically in a library such
+as glibc linked with the application) has to manage and place the
+necessary list elements exactly as the kernel expects them. If it fails
+to do so, then improperly listed locks will not be cleaned up on exit,
+probably causing deadlock or other such failure of the other threads
+waiting on the same locks.
+
+A thread that anticipates possibly using robust_futexes should first
+issue the system call:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head, size_t len);
+
+The pointer 'head' points to a structure in the threads address space
+consisting of three words. Each word is 32 bits on 32 bit arch's, or 64
+bits on 64 bit arch's, and local byte order. Each thread should have
+its own thread private 'head'.
+
+If a thread is running in 32 bit compatibility mode on a 64 native arch
+kernel, then it can actually have two such structures - one using 32 bit
+words for 32 bit compatibility mode, and one using 64 bit words for 64
+bit native mode. The kernel, if it is a 64 bit kernel supporting 32 bit
+compatibility mode, will attempt to process both lists on each task
+exit, if the corresponding sys_set_robust_list() call has been made to
+setup that list.
+
+ The first word in the memory structure at 'head' contains a
+ pointer to a single linked list of 'lock entries', one per lock,
+ as described below. If the list is empty, the pointer will point
+ to itself, 'head'. The last 'lock entry' points back to the 'head'.
+
+ The second word, called 'offset', specifies the offset from the
+ address of the associated 'lock entry', plus or minus, of what will
+ be called the 'lock word', from that 'lock entry'. The 'lock word'
+ is always a 32 bit word, unlike the other words above. The 'lock
+ word' holds 3 flag bits in the upper 3 bits, and the thread id (TID)
+ of the thread holding the lock in the bottom 29 bits. See further
+ below for a description of the flag bits.
+
+ The third word, called 'list_op_pending', contains transient copy of
+ the address of the 'lock entry', during list insertion and removal,
+ and is needed to correctly resolve races should a thread exit while
+ in the middle of a locking or unlocking operation.
+
+Each 'lock entry' on the single linked list starting at 'head' consists
+of just a single word, pointing to the next 'lock entry', or back to
+'head' if there are no more entries. In addition, nearby to each 'lock
+entry', at an offset from the 'lock entry' specified by the 'offset'
+word, is one 'lock word'.
+
+The 'lock word' is always 32 bits, and is intended to be the same 32 bit
+lock variable used by the futex mechanism, in conjunction with
+robust_futexes. The kernel will only be able to wakeup the next thread
+waiting for a lock on a threads exit if that next thread used the futex
+mechanism to register the address of that 'lock word' with the kernel.
+
+For each futex lock currently held by a thread, if it wants this
+robust_futex support for exit cleanup of that lock, it should have one
+'lock entry' on this list, with its associated 'lock word' at the
+specified 'offset'. Should a thread die while holding any such locks,
+the kernel will walk this list, mark any such locks with a bit
+indicating their holder died, and wakeup the next thread waiting for
+that lock using the futex mechanism.
+
+When a thread has invoked the above system call to indicate it
+anticipates using robust_futexes, the kernel stores the passed in 'head'
+pointer for that task. The task may retrieve that value later on by
+using the system call:
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+It is anticipated that threads will use robust_futexes embedded in
+larger, user level locking structures, one per lock. The kernel
+robust_futex mechanism doesn't care what else is in that structure, so
+long as the 'offset' to the 'lock word' is the same for all
+robust_futexes used by that thread. The thread should link those locks
+it currently holds using the 'lock entry' pointers. It may also have
+other links between the locks, such as the reverse side of a double
+linked list, but that doesn't matter to the kernel.
+
+By keeping its locks linked this way, on a list starting with a 'head'
+pointer known to the kernel, the kernel can provide to a thread the
+essential service available for robust_futexes, which is to help clean
+up locks held at the time of (a perhaps unexpectedly) exit.
+
+Actual locking and unlocking, during normal operations, is handled
+entirely by user level code in the contending threads, and by the
+existing futex mechanism to wait for, and wakeup, locks. The kernels
+only essential involvement in robust_futexes is to remember where the
+list 'head' is, and to walk the list on thread exit, handling locks
+still held by the departing thread, as described below.
+
+There may exist thousands of futex lock structures in a threads shared
+memory, on various data structures, at a given point in time. Only those
+lock structures for locks currently held by that thread should be on
+that thread's robust_futex linked lock list a given time.
+
+A given futex lock structure in a user shared memory region may be held
+at different times by any of the threads with access to that region. The
+thread currently holding such a lock, if any, is marked with the threads
+TID in the lower 29 bits of the 'lock word'.
+
+When adding or removing a lock from its list of held locks, in order for
+the kernel to correctly handle lock cleanup regardless of when the task
+exits (perhaps it gets an unexpected signal 9 in the middle of
+manipulating this list), the user code must observe the following
+protocol on 'lock entry' insertion and removal:
+
+On insertion:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+ to be inserted,
+ 2) acquire the futex lock,
+ 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
+ of the 'lock word', to the linked list starting at 'head', and
+ 4) clear the 'list_op_pending' word.
+
+On removal:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+ to be removed,
+ 2) remove the lock entry for this lock from the 'head' list,
+ 2) release the futex lock, and
+ 2) clear the 'lock_op_pending' word.
+
+On exit, the kernel will consider the address stored in
+'list_op_pending' and the address of each 'lock word' found by walking
+the list starting at 'head'. For each such address, if the bottom 29
+bits of the 'lock word' at offset 'offset' from that address equals the
+exiting threads TID, then the kernel will do two things:
+
+ 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
+ wakeup on that address, which will waken the next thread that has
+ used to the futex mechanism to wait on that address, and
+ 2) atomically set bit 30 (0x40000000) in the 'lock word'.
+
+In the above, bit 31 was set by futex waiters on that lock to indicate
+they were waiting, and bit 30 is set by the kernel to indicate that the
+lock owner died holding the lock.
+
+The kernel exit code will silently stop scanning the list further if at
+any point:
+
+ 1) the 'head' pointer or an subsequent linked list pointer
+ is not a valid address of a user space word
+ 2) the calculated location of the 'lock word' (address plus
+ 'offset') is not the valud address of a 32 bit user space
+ word
+ 3) if the list contains more than 1 million (subject to
+ future kernel configuration changes) elements.
+
+When the kernel sees a list entry whose 'lock word' doesn't have the
+current threads TID in the lower 29 bits, it does nothing with that
+entry, and goes on to the next entry.
+
+Bit 29 (0x20000000) of the 'lock word' is reserved for future use.
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
new file mode 100644
index 000000000000..df82d75245a0
--- /dev/null
+++ b/Documentation/robust-futexes.txt
@@ -0,0 +1,218 @@
+Started by: Ingo Molnar <mingo@redhat.com>
+
+Background
+----------
+
+what are robust futexes? To answer that, we first need to understand
+what futexes are: normal futexes are special types of locks that in the
+noncontended case can be acquired/released from userspace without having
+to enter the kernel.
+
+A futex is in essence a user-space address, e.g. a 32-bit lock variable
+field. If userspace notices contention (the lock is already owned and
+someone else wants to grab it too) then the lock is marked with a value
+that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
+syscall is used to wait for the other guy to release it. The kernel
+creates a 'futex queue' internally, so that it can later on match up the
+waiter with the waker - without them having to know about each other.
+When the owner thread releases the futex, it notices (via the variable
+value) that there were waiter(s) pending, and does the
+sys_futex(FUTEX_WAKE) syscall to wake them up. Once all waiters have
+taken and released the lock, the futex is again back to 'uncontended'
+state, and there's no in-kernel state associated with it. The kernel
+completely forgets that there ever was a futex at that address. This
+method makes futexes very lightweight and scalable.
+
+"Robustness" is about dealing with crashes while holding a lock: if a
+process exits prematurely while holding a pthread_mutex_t lock that is
+also shared with some other process (e.g. yum segfaults while holding a
+pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
+to be notified that the last owner of the lock exited in some irregular
+way.
+
+To solve such types of problems, "robust mutex" userspace APIs were
+created: pthread_mutex_lock() returns an error value if the owner exits
+prematurely - and the new owner can decide whether the data protected by
+the lock can be recovered safely.
+
+There is a big conceptual problem with futex based mutexes though: it is
+the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
+the kernel cannot help with the cleanup: if there is no 'futex queue'
+(and in most cases there is none, futexes being fast lightweight locks)
+then the kernel has no information to clean up after the held lock!
+Userspace has no chance to clean up after the lock either - userspace is
+the one that crashes, so it has no opportunity to clean up. Catch-22.
+
+In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
+is needed to release that futex based lock. This is one of the leading
+bugreports against yum.
+
+To solve this problem, the traditional approach was to extend the vma
+(virtual memory area descriptor) concept to have a notion of 'pending
+robust futexes attached to this area'. This approach requires 3 new
+syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
+FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
+they have a robust_head set. This approach has two fundamental problems
+left:
+
+ - it has quite complex locking and race scenarios. The vma-based
+ approach had been pending for years, but they are still not completely
+ reliable.
+
+ - they have to scan _every_ vma at sys_exit() time, per thread!
+
+The second disadvantage is a real killer: pthread_exit() takes around 1
+microsecond on Linux, but with thousands (or tens of thousands) of vmas
+every pthread_exit() takes a millisecond or more, also totally
+destroying the CPU's L1 and L2 caches!
+
+This is very much noticeable even for normal process sys_exit_group()
+calls: the kernel has to do the vma scanning unconditionally! (this is
+because the kernel has no knowledge about how many robust futexes there
+are to be cleaned up, because a robust futex might have been registered
+in another task, and the futex variable might have been simply mmap()-ed
+into this process's address space).
+
+This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
+normal kernels can turn it off, but worse than that: the overhead makes
+robust futexes impractical for any type of generic Linux distribution.
+
+So something had to be done.
+
+New approach to robust futexes
+------------------------------
+
+At the heart of this new approach there is a per-thread private list of
+robust locks that userspace is holding (maintained by glibc) - which
+userspace list is registered with the kernel via a new syscall [this
+registration happens at most once per thread lifetime]. At do_exit()
+time, the kernel checks this user-space list: are there any robust futex
+locks to be cleaned up?
+
+In the common case, at do_exit() time, there is no list registered, so
+the cost of robust futexes is just a simple current->robust_list != NULL
+comparison. If the thread has registered a list, then normally the list
+is empty. If the thread/process crashed or terminated in some incorrect
+way then the list might be non-empty: in this case the kernel carefully
+walks the list [not trusting it], and marks all locks that are owned by
+this thread with the FUTEX_OWNER_DEAD bit, and wakes up one waiter (if
+any).
+
+The list is guaranteed to be private and per-thread at do_exit() time,
+so it can be accessed by the kernel in a lockless way.
+
+There is one race possible though: since adding to and removing from the
+list is done after the futex is acquired by glibc, there is a few
+instructions window for the thread (or process) to die there, leaving
+the futex hung. To protect against this possibility, userspace (glibc)
+also maintains a simple per-thread 'list_op_pending' field, to allow the
+kernel to clean up if the thread dies after acquiring the lock, but just
+before it could have added itself to the list. Glibc sets this
+list_op_pending field before it tries to acquire the futex, and clears
+it after the list-add (or list-remove) has finished.
+
+That's all that is needed - all the rest of robust-futex cleanup is done
+in userspace [just like with the previous patches].
+
+Ulrich Drepper has implemented the necessary glibc support for this new
+mechanism, which fully enables robust mutexes.
+
+Key differences of this userspace-list based approach, compared to the
+vma based method:
+
+ - it's much, much faster: at thread exit time, there's no need to loop
+ over every vma (!), which the VM-based method has to do. Only a very
+ simple 'is the list empty' op is done.
+
+ - no VM changes are needed - 'struct address_space' is left alone.
+
+ - no registration of individual locks is needed: robust mutexes dont
+ need any extra per-lock syscalls. Robust mutexes thus become a very
+ lightweight primitive - so they dont force the application designer
+ to do a hard choice between performance and robustness - robust
+ mutexes are just as fast.
+
+ - no per-lock kernel allocation happens.
+
+ - no resource limits are needed.
+
+ - no kernel-space recovery call (FUTEX_RECOVER) is needed.
+
+ - the implementation and the locking is "obvious", and there are no
+ interactions with the VM.
+
+Performance
+-----------
+
+I have benchmarked the time needed for the kernel to process a list of 1
+million (!) held locks, using the new method [on a 2GHz CPU]:
+
+ - with FUTEX_WAIT set [contended mutex]: 130 msecs
+ - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
+
+I have also measured an approach where glibc does the lock notification
+[which it currently does for !pshared robust mutexes], and that took 256
+msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
+userspace had to do.
+
+(1 million held locks are unheard of - we expect at most a handful of
+locks to be held at a time. Nevertheless it's nice to know that this
+approach scales nicely.)
+
+Implementation details
+----------------------
+
+The patch adds two new syscalls: one to register the userspace list, and
+one to query the registered list pointer:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head,
+ size_t len);
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+List registration is very fast: the pointer is simply stored in
+current->robust_list. [Note that in the future, if robust futexes become
+widespread, we could extend sys_clone() to register a robust-list head
+for new threads, without the need of another syscall.]
+
+So there is virtually zero overhead for tasks not using robust futexes,
+and even for robust futex users, there is only one extra syscall per
+thread lifetime, and the cleanup operation, if it happens, is fast and
+straightforward. The kernel doesnt have any internal distinction between
+robust and normal futexes.
+
+If a futex is found to be held at exit time, the kernel sets the
+following bit of the futex word:
+
+ #define FUTEX_OWNER_DIED 0x40000000
+
+and wakes up the next futex waiter (if any). User-space does the rest of
+the cleanup.
+
+Otherwise, robust futexes are acquired by glibc by putting the TID into
+the futex field atomically. Waiters set the FUTEX_WAITERS bit:
+
+ #define FUTEX_WAITERS 0x80000000
+
+and the remaining bits are for the TID.
+
+Testing, architecture support
+-----------------------------
+
+i've tested the new syscalls on x86 and x86_64, and have made sure the
+parsing of the userspace list is robust [ ;-) ] even if the list is
+deliberately corrupted.
+
+i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
+tested the new glibc code (on x86_64 and i386), and it works for his
+robust-mutex testcases.
+
+All other architectures should build just fine too - but they wont have
+the new syscalls yet.
+
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
+inline function before writing up the syscalls (that function returns
+-ENOSYS right now).
diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt
index 2b5d4434fa5a..5f757c8cf979 100644
--- a/Documentation/rpc-cache.txt
+++ b/Documentation/rpc-cache.txt
@@ -1,4 +1,4 @@
-This document gives a brief introduction to the caching
+ This document gives a brief introduction to the caching
mechanisms in the sunrpc layer that is used, in particular,
for NFS authentication.
@@ -25,25 +25,17 @@ The common code handles such things as:
- supporting 'NEGATIVE' as well as positive entries
- allowing an EXPIRED time on cache items, and removing
items after they expire, and are no longe in-use.
-
- Future code extensions are expect to handle
- making requests to user-space to fill in cache entries
- allowing user-space to directly set entries in the cache
- delaying RPC requests that depend on as-yet incomplete
cache entries, and replaying those requests when the cache entry
is complete.
- - maintaining last-access times on cache entries
- - clean out old entries when the caches become full
-
-The code for performing a cache lookup is also common, but in the form
-of a template. i.e. a #define.
-Each cache defines a lookup function by using the DefineCacheLookup
-macro, or the simpler DefineSimpleCacheLookup macro
+ - clean out old entries as they expire.
Creating a Cache
----------------
-1/ A cache needs a datum to cache. This is in the form of a
+1/ A cache needs a datum to store. This is in the form of a
structure definition that must contain a
struct cache_head
as an element, usually the first.
@@ -51,35 +43,69 @@ Creating a Cache
Each cache element is reference counted and contains
expiry and update times for use in cache management.
2/ A cache needs a "cache_detail" structure that
- describes the cache. This stores the hash table, and some
- parameters for cache management.
-3/ A cache needs a lookup function. This is created using
- the DefineCacheLookup macro. This lookup function is used both
- to find entries and to update entries. The normal mode for
- updating an entry is to replace the old entry with a new
- entry. However it is possible to allow update-in-place
- for those caches where it makes sense (no atomicity issues
- or indirect reference counting issue)
-4/ A cache needs to be registered using cache_register(). This
- includes in on a list of caches that will be regularly
- cleaned to discard old data. For this to work, some
- thread must periodically call cache_clean
-
+ describes the cache. This stores the hash table, some
+ parameters for cache management, and some operations detailing how
+ to work with particular cache items.
+ The operations requires are:
+ struct cache_head *alloc(void)
+ This simply allocates appropriate memory and returns
+ a pointer to the cache_detail embedded within the
+ structure
+ void cache_put(struct kref *)
+ This is called when the last reference to an item is
+ is dropped. The pointer passed is to the 'ref' field
+ in the cache_head. cache_put should release any
+ references create by 'cache_init' and, if CACHE_VALID
+ is set, any references created by cache_update.
+ It should then release the memory allocated by
+ 'alloc'.
+ int match(struct cache_head *orig, struct cache_head *new)
+ test if the keys in the two structures match. Return
+ 1 if they do, 0 if they don't.
+ void init(struct cache_head *orig, struct cache_head *new)
+ Set the 'key' fields in 'new' from 'orig'. This may
+ include taking references to shared objects.
+ void update(struct cache_head *orig, struct cache_head *new)
+ Set the 'content' fileds in 'new' from 'orig'.
+ int cache_show(struct seq_file *m, struct cache_detail *cd,
+ struct cache_head *h)
+ Optional. Used to provide a /proc file that lists the
+ contents of a cache. This should show one item,
+ usually on just one line.
+ int cache_request(struct cache_detail *cd, struct cache_head *h,
+ char **bpp, int *blen)
+ Format a request to be send to user-space for an item
+ to be instantiated. *bpp is a buffer of size *blen.
+ bpp should be moved forward over the encoded message,
+ and *blen should be reduced to show how much free
+ space remains. Return 0 on success or <0 if not
+ enough room or other problem.
+ int cache_parse(struct cache_detail *cd, char *buf, int len)
+ A message from user space has arrived to fill out a
+ cache entry. It is in 'buf' of length 'len'.
+ cache_parse should parse this, find the item in the
+ cache with sunrpc_cache_lookup, and update the item
+ with sunrpc_cache_update.
+
+
+3/ A cache needs to be registered using cache_register(). This
+ includes it on a list of caches that will be regularly
+ cleaned to discard old data.
+
Using a cache
-------------
-To find a value in a cache, call the lookup function passing it a the
-datum which contains key, and possibly content, and a flag saying
-whether to update the cache with new data from the datum. Depending
-on how the cache lookup function was defined, it may take an extra
-argument to identify the particular cache in question.
+To find a value in a cache, call sunrpc_cache_lookup passing a pointer
+to the cache_head in a sample item with the 'key' fields filled in.
+This will be passed to ->match to identify the target entry. If no
+entry is found, a new entry will be create, added to the cache, and
+marked as not containing valid data.
-Except in cases of kmalloc failure, the lookup function
-will return a new datum which will store the key and
-may contain valid content, or may not.
-This datum is typically passed to cache_check which determines the
-validity of the datum and may later initiate an upcall to fill
-in the data.
+The item returned is typically passed to cache_check which will check
+if the data is valid, and may initiate an up-call to get fresh data.
+cache_check will return -ENOENT in the entry is negative or if an up
+call is needed but not possible, -EAGAIN if an upcall is pending,
+or 0 if the data is valid;
cache_check can be passed a "struct cache_req *". This structure is
typically embedded in the actual request and can be used to create a
@@ -90,6 +116,13 @@ item does become valid, the deferred copy of the request will be
revisited (->revisit). It is expected that this method will
reschedule the request for processing.
+The value returned by sunrpc_cache_lookup can also be passed to
+sunrpc_cache_update to set the content for the item. A second item is
+passed which should hold the content. If the item found by _lookup
+has valid data, then it is discarded and a new item is created. This
+saves any user of an item from worrying about content changing while
+it is being inspected. If the item found by _lookup does not contain
+valid data, then the content is copied across and CACHE_VALID is set.
Populating a cache
------------------
@@ -114,8 +147,8 @@ should be create or updated to have the given content, and the
expiry time should be set on that item.
Reading from a channel is a bit more interesting. When a cache
-lookup fail, or when it suceeds but finds an entry that may soon
-expiry, a request is lodged for that cache item to be updated by
+lookup fails, or when it succeeds but finds an entry that may soon
+expire, a request is lodged for that cache item to be updated by
user-space. These requests appear in the channel file.
Successive reads will return successive requests.
@@ -130,7 +163,7 @@ Thus a user-space helper is likely to:
write a response
loop.
-If it dies and needs to be restarted, any requests that have not be
+If it dies and needs to be restarted, any requests that have not been
answered will still appear in the file and will be read by the new
instance of the helper.
@@ -142,10 +175,9 @@ Each cache should also define a "cache_request" method which
takes a cache item and encodes a request into the buffer
provided.
-
Note: If a cache has no active readers on the channel, and has had not
active readers for more than 60 seconds, further requests will not be
-added to the channel but instead all looks that do not find a valid
+added to the channel but instead all lookups that do not find a valid
entry will fail. This is partly for backward compatibility: The
previous nfs exports table was deemed to be authoritative and a
failed lookup meant a definite 'no'.
@@ -154,18 +186,17 @@ request/response format
-----------------------
While each cache is free to use it's own format for requests
-and responses over channel, the following is recommended are
+and responses over channel, the following is recommended as
appropriate and support routines are available to help:
Each request or response record should be printable ASCII
with precisely one newline character which should be at the end.
Fields within the record should be separated by spaces, normally one.
If spaces, newlines, or nul characters are needed in a field they
-much be quotes. two mechanisms are available:
+much be quoted. two mechanisms are available:
1/ If a field begins '\x' then it must contain an even number of
hex digits, and pairs of these digits provide the bytes in the
field.
2/ otherwise a \ in the field must be followed by 3 octal digits
which give the code for a byte. Other characters are treated
- as them selves. At the very least, space, newlines nul, and
+ as them selves. At the very least, space, newline, nul, and
'\' must be quoted in this way.
-
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.txt
index df09758bf3fe..efb674eda4d4 100644
--- a/Documentation/s390/driver-model.txt
+++ b/Documentation/s390/driver-model.txt
@@ -16,10 +16,12 @@ devices/
- 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
- 0.0.0002/
+ - 0.1.0000/0.1.1234/
...
-In this example, device 0815 is accessed via subchannel 0, device 4711 via
-subchannel 1, and subchannel 2 is a non-I/O subchannel.
+In this example, device 0815 is accessed via subchannel 0 in subchannel set 0,
+device 4711 via subchannel 1 in subchannel set 0, and subchannel 2 is a non-I/O
+subchannel. Device 1234 is accessed via subchannel 0 in subchannel set 1.
You should address a ccw device via its bus id (e.g. 0.0.4711); the device can
be found under bus/ccw/devices/.
@@ -97,7 +99,7 @@ is not available to the device driver.
Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
and/or device types/models it is interested. This information can later be found
-found in the struct ccw_device_id fields:
+in the struct ccw_device_id fields:
struct ccw_device_id {
__u16 match_flags;
@@ -208,6 +210,11 @@ Each ccwgroup device also provides an 'ungroup' attribute to destroy the device
again (only when offline). This is a generic ccwgroup mechanism (the driver does
not need to implement anything beyond normal removal routines).
+A ccw device which is a member of a ccwgroup device carries a pointer to the
+ccwgroup device in the driver_data of its device struct. This field must not be
+touched by the driver - it should use the ccwgroup device's driver_data for its
+private data.
+
To implement a ccwgroup driver, please refer to include/asm/ccwgroup.h. Keep in
mind that most drivers will need to implement both a ccwgroup and a ccw driver
(unless you have a meta ccw driver, like cu3088 for lcs and ctc).
@@ -230,6 +237,8 @@ status - Can be 'online' or 'offline'.
a channel path the user knows to be online, but the machine hasn't
created a machine check for.
+type - The physical type of the channel path.
+
3. System devices
-----------------
diff --git a/Documentation/serial-console.txt b/Documentation/serial-console.txt
index 6c689b0df2b8..9a7bc8b3f479 100644
--- a/Documentation/serial-console.txt
+++ b/Documentation/serial-console.txt
@@ -17,11 +17,13 @@ The format of this option is:
ttyX for any other virtual console
ttySx for a serial port
lp0 for the first parallel port
+ ttyUSB0 for the first USB serial device
options: depend on the driver. For the serial port this
- defines the baudrate/parity/bits of the port,
- in the format BBBBPN, where BBBB is the speed,
- P is parity (n/o/e), and N is bits. Default is
+ defines the baudrate/parity/bits/flow control of
+ the port, in the format BBBBPNF, where BBBB is the
+ speed, P is parity (n/o/e), N is number of bits,
+ and F is flow control ('r' for RTS). Default is
9600n8. The maximum baudrate is 115200.
You can specify multiple console= options on the kernel command line.
@@ -45,6 +47,9 @@ become the console.
You will need to create a new device to use /dev/console. The official
/dev/console is now character device 5,1.
+(You can also use a network device as a console. See
+Documentation/networking/netconsole.txt for information on that.)
+
Here's an example that will use /dev/ttyS1 (COM2) as the console.
Replace the sample values as needed.
diff --git a/Documentation/smart-config.txt b/Documentation/smart-config.txt
index c9bed4cf8773..8467447b5a87 100644
--- a/Documentation/smart-config.txt
+++ b/Documentation/smart-config.txt
@@ -56,10 +56,6 @@ Here is the solution:
writing one file per option. It updates only the files for options
that have changed.
- mkdep.c no longer generates warning messages for missing or unneeded
- <linux/config.h> lines. The new top-level target 'make checkconfig'
- checks for these problems.
-
Flag Dependencies
Martin Von Loewis contributed another feature to this patch:
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 36b511c7cade..1def6049784c 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -513,6 +513,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
This module supports multiple cards and autoprobe.
+ The power-management is supported.
+
Module snd-ens1371
------------------
@@ -526,6 +528,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
This module supports multiple cards and autoprobe.
+ The power-management is supported.
+
Module snd-es968
----------------
@@ -671,6 +675,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
model - force the model name
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
+ single_cmd - Use single immediate commands to communicate with
+ codecs (for debugging only)
This module supports one card and autoprobe.
@@ -694,13 +700,34 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
asus 3-jack
uniwill 3-jack
F1734 2-jack
+ lg LG laptop (m1 express dual)
test for testing/debugging purpose, almost all controls can be
adjusted. Appearing only when compiled with
$CONFIG_SND_DEBUG=y
+ auto auto-config reading BIOS (default)
ALC260
hp HP machines
fujitsu Fujitsu S7020
+ acer Acer TravelMate
+ basic fixed pin assignment (old default model)
+ auto auto-config reading BIOS (default)
+
+ ALC262
+ fujitsu Fujitsu Laptop
+ basic fixed pin assignment w/o SPDIF
+ auto auto-config reading BIOS (default)
+
+ ALC882/883/885
+ 3stack-dig 3-jack with SPDIF I/O
+ 6stck-dig 6-jack digital with SPDIF I/O
+ auto auto-config reading BIOS (default)
+
+ ALC861
+ 3stack 3-jack
+ 3stack-dig 3-jack with SPDIF I/O
+ 6stack-dig 6-jack with SPDIF I/O
+ auto auto-config reading BIOS (default)
CMI9880
minimal 3-jack in back
@@ -710,6 +737,28 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
allout 5-jack in back, 2-jack in front, SPDIF out
auto auto-config reading BIOS (default)
+ AD1981
+ basic 3-jack (default)
+ hp HP nx6320
+
+ AD1986A
+ 6stack 6-jack, separate surrounds (default)
+ 3stack 3-stack, shared surrounds
+ laptop 2-channel only (FSC V2060, Samsung M50)
+ laptop-eapd 2-channel with EAPD (Samsung R65, ASUS A6J)
+
+ AD1988
+ 6stack 6-jack
+ 6stack-dig ditto with SPDIF
+ 3stack 3-jack
+ 3stack-dig ditto with SPDIF
+ laptop 3-jack with hp-jack automute
+ laptop-dig ditto with SPDIF
+ auto auto-confgi reading BIOS (default)
+
+ STAC7661(?)
+ vaio Setup for VAIO FE550G/SZ110
+
If the default configuration doesn't work and one of the above
matches with your device, report it together with the PCI
subsystem ID (output of "lspci -nv") to ALSA BTS or alsa-devel
@@ -723,6 +772,17 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
(Usually SD_LPLIB register is more accurate than the
position buffer.)
+ NB: If you get many "azx_get_response timeout" messages at
+ loading, it's likely a problem of interrupts (e.g. ACPI irq
+ routing). Try to boot with options like "pci=noacpi". Also, you
+ can try "single_cmd=1" module option. This will switch the
+ communication method between HDA controller and codecs to the
+ single immediate commands instead of CORB/RIRB. Basically, the
+ single command mode is provided only for BIOS, and you won't get
+ unsolicited events, too. But, at least, this works independently
+ from the irq. Remember this is a last resort, and should be
+ avoided as much as possible...
+
The power-management is supported.
Module snd-hdsp
@@ -802,6 +862,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
------------------
Module for Envy24HT (VT/ICE1724), Envy24PT (VT1720) based PCI sound cards.
+ * MidiMan M Audio Revolution 5.1
* MidiMan M Audio Revolution 7.1
* AMP Ltd AUDIO2000
* TerraTec Aureon 5.1 Sky
@@ -810,6 +871,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
* TerraTec Phase 22
* TerraTec Phase 28
* AudioTrak Prodigy 7.1
+ * AudioTrak Prodigy 7.1LT
* AudioTrak Prodigy 192
* Pontis MS300
* Albatron K8X800 Pro II
@@ -820,9 +882,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
* Shuttle SN25P
model - Use the given board model, one of the following:
- revo71, amp2000, prodigy71, prodigy192, aureon51,
- aureon71, universe, k8x800, phase22, phase28, ms300,
- av710
+ revo51, revo71, amp2000, prodigy71, prodigy71lt,
+ prodigy192, aureon51, aureon71, universe,
+ k8x800, phase22, phase28, ms300, av710
This module supports multiple cards and autoprobe.
@@ -1353,6 +1415,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
vid - Vendor ID for the device (optional)
pid - Product ID for the device (optional)
+ device_setup - Device specific magic number (optional)
+ - Influence depends on the device
+ - Default: 0x0000
This module supports multiple devices, autoprobe and hotplugging.
diff --git a/Documentation/sound/alsa/Audiophile-Usb.txt b/Documentation/sound/alsa/Audiophile-Usb.txt
new file mode 100644
index 000000000000..4692c8e77dc1
--- /dev/null
+++ b/Documentation/sound/alsa/Audiophile-Usb.txt
@@ -0,0 +1,333 @@
+ Guide to using M-Audio Audiophile USB with ALSA and Jack v1.2
+ ========================================================
+
+ Thibault Le Meur <Thibault.LeMeur@supelec.fr>
+
+This document is a guide to using the M-Audio Audiophile USB (tm) device with
+ALSA and JACK.
+
+1 - Audiophile USB Specs and correct usage
+==========================================
+This part is a reminder of important facts about the functions and limitations
+of the device.
+
+The device has 4 audio interfaces, and 2 MIDI ports:
+ * Analog Stereo Input (Ai)
+ - This port supports 2 pairs of line-level audio inputs (1/4" TS and RCA)
+ - When the 1/4" TS (jack) connectors are connected, the RCA connectors
+ are disabled
+ * Analog Stereo Output (Ao)
+ * Digital Stereo Input (Di)
+ * Digital Stereo Output (Do)
+ * Midi In (Mi)
+ * Midi Out (Mo)
+
+The internal DAC/ADC has the following caracteristics:
+* sample depth of 16 or 24 bits
+* sample rate from 8kHz to 96kHz
+* Two ports can't use different sample depths at the same time.Moreover, the
+Audiophile USB documentation gives the following Warning: "Please exit any
+audio application running before switching between bit depths"
+
+Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be
+activated at the same time depending on the audio mode selected:
+ * 16-bit/48kHz ==> 4 channels in/ 4 channels out
+ - Ai+Ao+Di+Do
+ * 24-bit/48kHz ==> 4 channels in/2 channels out,
+ or 2 channels in/4 channels out
+ - Ai+Ao+Do or Ai+Di+Ao or Ai+Di+Do or Di+Ao+Do
+ * 24-bit/96kHz ==> 2 channels in, or 2 channels out (half duplex only)
+ - Ai or Ao or Di or Do
+
+Important facts about the Digital interface:
+--------------------------------------------
+ * The Do port additionnaly supports surround-encoded AC-3 and DTS passthrough,
+though I haven't tested it under linux
+ - Note that in this setup only the Do interface can be enabled
+ * Apart from recording an audio digital stream, enabling the Di port is a way
+to synchronize the device to an external sample clock
+ - As a consequence, the Di port must be enable only if an active Digital
+source is connected
+ - Enabling Di when no digital source is connected can result in a
+synchronization error (for instance sound played at an odd sample rate)
+
+
+2 - Audiophile USB support in ALSA
+==================================
+
+2.1 - MIDI ports
+----------------
+The Audiophile USB MIDI ports will be automatically supported once the
+following modules have been loaded:
+ * snd-usb-audio
+ * snd-seq
+ * snd-seq-midi
+
+No additionnal setting is required.
+
+2.2 - Audio ports
+-----------------
+
+Audio functions of the Audiophile USB device are handled by the snd-usb-audio
+module. This module can work in a default mode (without any device-specific
+parameter), or in an advanced mode with the device-specific parameter called
+"device_setup".
+
+2.2.1 - Default Alsa driver mode
+
+The default behaviour of the snd-usb-audio driver is to parse the device
+capabilities at startup and enable all functions inside the device (including
+all ports at any sample rates and any sample depths supported). This approach
+has the advantage to let the driver easily switch from sample rates/depths
+automatically according to the need of the application claiming the device.
+
+In this case the Audiophile ports are mapped to alsa pcm devices in the
+following way (I suppose the device's index is 1):
+ * hw:1,0 is Ao in playback and Di in capture
+ * hw:1,1 is Do in playback and Ai in capture
+ * hw:1,2 is Do in AC3/DTS passthrough mode
+
+You must note as well that the device uses Big Endian byte encoding so that
+supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
+24-bits depth mode. One exception is the hw:1,2 port which is Little Endian
+compliant and thus uses S16_LE.
+
+Examples:
+ * playing a S24_3BE encoded raw file to the Ao port
+ % aplay -D hw:1,0 -c2 -t raw -r48000 -fS24_3BE test.raw
+ * recording a S24_3BE encoded raw file from the Ai port
+ % arecord -D hw:1,1 -c2 -t raw -r48000 -fS24_3BE test.raw
+ * playing a S16_BE encoded raw file to the Do port
+ % aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test.raw
+
+If you're happy with the default Alsa driver setup and don't experience any
+issue with this mode, then you can skip the following chapter.
+
+2.2.2 - Advanced module setup
+
+Due to the hardware constraints described above, the device initialization made
+by the Alsa driver in default mode may result in a corrupted state of the
+device. For instance, a particularly annoying issue is that the sound captured
+from the Ai port sounds distorted (as if boosted with an excessive high volume
+gain).
+
+For people having this problem, the snd-usb-audio module has a new module
+parameter called "device_setup".
+
+2.2.2.1 - Initializing the working mode of the Audiohile USB
+
+As far as the Audiohile USB device is concerned, this value let the user
+specify:
+ * the sample depth
+ * the sample rate
+ * whether the Di port is used or not
+
+Here is a list of supported device_setup values for this device:
+ * device_setup=0x00 (or omitted)
+ - Alsa driver default mode
+ - maintains backward compatibility with setups that do not use this
+ parameter by not introducing any change
+ - results sometimes in corrupted sound as decribed earlier
+ * device_setup=0x01
+ - 16bits 48kHz mode with Di disabled
+ - Ai,Ao,Do can be used at the same time
+ - hw:1,0 is not available in capture mode
+ - hw:1,2 is not available
+ * device_setup=0x11
+ - 16bits 48kHz mode with Di enabled
+ - Ai,Ao,Di,Do can be used at the same time
+ - hw:1,0 is available in capture mode
+ - hw:1,2 is not available
+ * device_setup=0x09
+ - 24bits 48kHz mode with Di disabled
+ - Ai,Ao,Do can be used at the same time
+ - hw:1,0 is not available in capture mode
+ - hw:1,2 is not available
+ * device_setup=0x19
+ - 24bits 48kHz mode with Di enabled
+ - 3 ports from {Ai,Ao,Di,Do} can be used at the same time
+ - hw:1,0 is available in capture mode and an active digital source must be
+ connected to Di
+ - hw:1,2 is not available
+ * device_setup=0x0D or 0x10
+ - 24bits 96kHz mode
+ - Di is enabled by default for this mode but does not need to be connected
+ to an active source
+ - Only 1 port from {Ai,Ao,Di,Do} can be used at the same time
+ - hw:1,0 is available in captured mode
+ - hw:1,2 is not available
+ * device_setup=0x03
+ - 16bits 48kHz mode with only the Do port enabled
+ - AC3 with DTS passthru (not tested)
+ - Caution with this setup the Do port is mapped to the pcm device hw:1,0
+
+2.2.2.2 - Setting and switching configurations with the device_setup parameter
+
+The parameter can be given:
+ * By manually probing the device (as root):
+ # modprobe -r snd-usb-audio
+ # modprobe snd-usb-audio index=1 device_setup=0x09
+ * Or while configuring the modules options in your modules configuration file
+ - For Fedora distributions, edit the /etc/modprobe.conf file:
+ alias snd-card-1 snd-usb-audio
+ options snd-usb-audio index=1 device_setup=0x09
+
+IMPORTANT NOTE WHEN SWITCHING CONFIGURATION:
+-------------------------------------------
+ * You may need to _first_ intialize the module with the correct device_setup
+ parameter and _only_after_ turn on the Audiophile USB device
+ * This is especially true when switching the sample depth:
+ - first trun off the device
+ - de-register the snd-usb-audio module
+ - change the device_setup parameter (by either manually reprobing the module
+ or changing modprobe.conf)
+ - turn on the device
+
+2.2.2.3 - Audiophile USB's device_setup structure
+
+If you want to understand the device_setup magic numbers for the Audiophile
+USB, you need some very basic understanding of binary computation. However,
+this is not required to use the parameter and you may skip thi section.
+
+The device_setup is one byte long and its structure is the following:
+
+ +---+---+---+---+---+---+---+---+
+ | b7| b6| b5| b4| b3| b2| b1| b0|
+ +---+---+---+---+---+---+---+---+
+ | 0 | 0 | 0 | Di|24B|96K|DTS|SET|
+ +---+---+---+---+---+---+---+---+
+
+Where:
+ * b0 is the "SET" bit
+ - it MUST be set if device_setup is initialized
+ * b1 is the "DTS" bit
+ - it is set only for Digital output with DTS/AC3
+ - this setup is not tested
+ * b2 is the Rate selection flag
+ - When set to "1" the rate range is 48.1-96kHz
+ - Otherwise the sample rate range is 8-48kHz
+ * b3 is the bit depth selection flag
+ - When set to "1" samples are 24bits long
+ - Otherwise they are 16bits long
+ - Note that b2 implies b3 as the 96kHz mode is only supported for 24 bits
+ samples
+ * b4 is the Digital input flag
+ - When set to "1" the device assumes that an active digital source is
+ connected
+ - You shouldn't enable Di if no source is seen on the port (this leads to
+ synchronization issues)
+ - b4 is implied by b2 (since only one port is enabled at a time no synch
+ error can occur)
+ * b5 to b7 are reserved for future uses, and must be set to "0"
+ - might become Ao, Do, Ai, for b7, b6, b4 respectively
+
+Caution:
+ * there is no check on the value you will give to device_setup
+ - for instance choosing 0x05 (16bits 96kHz) will fail back to 0x09 since
+ b2 implies b3. But _there_will_be_no_warning_ in /var/log/messages
+ * Hardware constraints due to the USB bus limitation aren't checked
+ - choosing b2 will prepare all interfaces for 24bits/96kHz but you'll
+ only be able to use one at the same time
+
+2.2.3 - USB implementation details for this device
+
+You may safely skip this section if you're not interrested in driver
+development.
+
+This section describes some internals aspect of the device and summarize the
+data I got by usb-snooping the windows and linux drivers.
+
+The M-Audio Audiophile USB has 7 USB Interfaces:
+a "USB interface":
+ * USB Interface nb.0
+ * USB Interface nb.1
+ - Audio Control function
+ * USB Interface nb.2
+ - Analog Output
+ * USB Interface nb.3
+ - Digital Output
+ * USB Interface nb.4
+ - Analog Input
+ * USB Interface nb.5
+ - Digital Input
+ * USB Interface nb.6
+ - MIDI interface compliant with the MIDIMAN quirk
+
+Each interface has 5 altsettings (AltSet 1,2,3,4,5) except:
+ * Interface 3 (Digital Out) has an extra Alset nb.6
+ * Interface 5 (Digital In) does not have Alset nb.3 and 5
+
+Here is a short description of the AltSettings capabilities:
+ * AltSettings 1 corresponds to
+ - 24-bit depth, 48.1-96kHz sample mode
+ - Adaptive playback (Ao and Do), Synch capture (Ai), or Asynch capture (Di)
+ * AltSettings 2 corresponds to
+ - 24-bit depth, 8-48kHz sample mode
+ - Asynch capture and playback (Ao,Ai,Do,Di)
+ * AltSettings 3 corresponds to
+ - 24-bit depth, 8-48kHz sample mode
+ - Synch capture (Ai) and Adaptive playback (Ao,Do)
+ * AltSettings 4 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Asynch capture and playback (Ao,Ai,Do,Di)
+ * AltSettings 5 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Synch capture (Ai) and Adaptive playback (Ao,Do)
+ * AltSettings 6 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Synch playback (Do), audio format type III IEC1937_AC-3
+
+In order to ensure a correct intialization of the device, the driver
+_must_know_ how the device will be used:
+ * if DTS is choosen, only Interface 2 with AltSet nb.6 must be
+ registered
+ * if 96KHz only AltSets nb.1 of each interface must be selected
+ * if samples are using 24bits/48KHz then AltSet 2 must me used if
+ Digital input is connected, and only AltSet nb.3 if Digital input
+ is not connected
+ * if samples are using 16bits/48KHz then AltSet 4 must me used if
+ Digital input is connected, and only AltSet nb.5 if Digital input
+ is not connected
+
+When device_setup is given as a parameter to the snd-usb-audio module, the
+parse_audio_enpoint function uses a quirk called
+"audiophile_skip_setting_quirk" in order to prevent AltSettings not
+corresponding to device_setup from being registered in the driver.
+
+3 - Audiophile USB and Jack support
+===================================
+
+This section deals with support of the Audiophile USB device in Jack.
+The main issue regarding this support is that the device is Big Endian
+compliant.
+
+3.1 - Using the plug alsa plugin
+--------------------------------
+
+Jack doesn't directly support big endian devices. Thus, one way to have support
+for this device with Alsa is to use the Alsa "plug" converter.
+
+For instance here is one way to run Jack with 2 playback channels on Ao and 2
+capture channels from Ai:
+ % jackd -R -dalsa -dplughw:1 -r48000 -p256 -n2 -D -Cplughw:1,1
+
+
+However you may see the following warning message:
+"You appear to be using the ALSA software "plug" layer, probably a result of
+using the "default" ALSA device. This is less efficient than it could be.
+Consider using a hardware device instead rather than using the plug layer."
+
+
+3.2 - Patching alsa to use direct pcm device
+-------------------------------------------
+A patch for Jack by Andreas Steinmetz adds support for Big Endian devices.
+However it has not been included in the CVS tree.
+
+You can find it at the following URL:
+http://sourceforge.net/tracker/index.php?func=detail&aid=1289682&group_id=39687&
+atid=425939
+
+After having applied the patch you can run jackd with the following command
+line:
+ % jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
+
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index 4251085d38d3..6feef9e82b63 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1834,7 +1834,7 @@
mychip_set_sample_format(chip, runtime->format);
mychip_set_sample_rate(chip, runtime->rate);
mychip_set_channels(chip, runtime->channels);
- mychip_set_dma_setup(chip, runtime->dma_area,
+ mychip_set_dma_setup(chip, runtime->dma_addr,
chip->buffer_size,
chip->period_size);
return 0;
@@ -2836,7 +2836,7 @@ struct _snd_pcm_runtime {
<para>
Note that this callback became non-atomic since the recent version.
- You can use schedule-related fucntions safely in this callback now.
+ You can use schedule-related functions safely in this callback now.
</para>
<para>
@@ -3388,7 +3388,7 @@ struct _snd_pcm_runtime {
.name = "PCM Playback Switch",
.index = 0,
.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
- .private_values = 0xffff,
+ .private_value = 0xffff,
.info = my_control_info,
.get = my_control_get,
.put = my_control_put
@@ -3449,7 +3449,7 @@ struct _snd_pcm_runtime {
</para>
<para>
- The <structfield>private_values</structfield> field contains
+ The <structfield>private_value</structfield> field contains
an arbitrary long integer value for this record. When using
generic <structfield>info</structfield>,
<structfield>get</structfield> and
diff --git a/Documentation/sound/oss/Introduction b/Documentation/sound/oss/Introduction
index 15d4fb975ac0..f04ba6bb7395 100644
--- a/Documentation/sound/oss/Introduction
+++ b/Documentation/sound/oss/Introduction
@@ -69,7 +69,7 @@ are available, for example IRQ, address, DMA.
Warning, the options for different cards sometime use different names
for the same or a similar feature (dma1= versus dma16=). As a last
-resort, inspect the code (search for MODULE_PARM).
+resort, inspect the code (search for module_param).
Notes:
diff --git a/Documentation/sound/oss/cs46xx b/Documentation/sound/oss/cs46xx
index 88d6cf8b39f3..b54432709863 100644
--- a/Documentation/sound/oss/cs46xx
+++ b/Documentation/sound/oss/cs46xx
@@ -88,7 +88,7 @@ parameters. for a copy email: twoller@crystal.cirrus.com
MODULE_PARMS definitions
------------------------
-MODULE_PARM(defaultorder, "i");
+module_param(defaultorder, ulong, 0);
defaultorder=N
where N is a value from 1 to 12
The buffer order determines the size of the dma buffer for the driver.
@@ -98,18 +98,18 @@ to not underrun the dma buffer as easily. As default, use 32k (order=3)
rather than 64k as some of the games work more responsively.
(2^N) * PAGE_SIZE = allocated buffer size
-MODULE_PARM(cs_debuglevel, "i");
-MODULE_PARM(cs_debugmask, "i");
+module_param(cs_debuglevel, ulong, 0644);
+module_param(cs_debugmask, ulong, 0644);
cs_debuglevel=N
cs_debugmask=0xMMMMMMMM
where N is a value from 0 (no debug printfs), to 9 (maximum)
0xMMMMMMMM is a debug mask corresponding to the CS_xxx bits (see driver source).
-MODULE_PARM(hercules_egpio_disable, "i");
+module_param(hercules_egpio_disable, ulong, 0);
hercules_egpio_disable=N
where N is a 0 (enable egpio), or a 1 (disable egpio support)
-MODULE_PARM(initdelay, "i");
+module_param(initdelay, ulong, 0);
initdelay=N
This value is used to determine the millescond delay during the initialization
code prior to powering up the PLL. On laptops this value can be used to
@@ -118,19 +118,19 @@ system is booted under battery power then the mdelay()/udelay() functions fail t
properly delay the required time. Also, if the system is booted under AC power
and then the power removed, the mdelay()/udelay() functions will not delay properly.
-MODULE_PARM(powerdown, "i");
+module_param(powerdown, ulong, 0);
powerdown=N
where N is 0 (disable any powerdown of the internal blocks) or 1 (enable powerdown)
-MODULE_PARM(external_amp, "i");
+module_param(external_amp, bool, 0);
external_amp=1
if N is set to 1, then force enabling the EAPD support in the primary AC97 codec.
override the detection logic and force the external amp bit in the AC97 0x26 register
to be reset (0). EAPD should be 0 for powerup, and 1 for powerdown. The VTB Santa Cruz
card has inverted logic, so there is a special function for these cards.
-MODULE_PARM(thinkpad, "i");
+module_param(thinkpad, bool, 0);
thinkpad=1
if N is set to 1, then force enabling the clkrun functionality.
Currently, when the part is being used, then clkrun is disabled for the entire system,
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index c2122996631e..a661d684768e 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -9,7 +9,7 @@ removed soon. So for any new code dynamic initialization should be used:
static int __init xxx_init(void)
{
spin_lock_init(&xxx_lock);
- rw_lock_init(&xxx_rw_lock);
+ rwlock_init(&xxx_rw_lock);
...
}
diff --git a/Documentation/usb/et61x251.txt b/Documentation/usb/et61x251.txt
index b44dda407ce2..29340282ab5f 100644
--- a/Documentation/usb/et61x251.txt
+++ b/Documentation/usb/et61x251.txt
@@ -176,6 +176,14 @@ Description: Force the application to unmap previously mapped buffer memory
1 = force memory unmapping (save memory)
Default: 0
-------------------------------------------------------------------------------
+Name: frame_timeout
+Type: uint array (min = 0, max = 64)
+Syntax: <n[,...]>
+Description: Timeout for a video frame in seconds. This parameter is
+ specific for each detected camera. This parameter can be
+ changed at runtime thanks to the /sys filesystem interface.
+Default: 2
+-------------------------------------------------------------------------------
Name: debug
Type: ushort
Syntax: <n>
@@ -266,7 +274,7 @@ the V4L2 interface.
10. Notes for V4L2 application developers
-========================================
+=========================================
This driver follows the V4L2 API specifications. In particular, it enforces two
rules:
diff --git a/Documentation/usb/sn9c102.txt b/Documentation/usb/sn9c102.txt
index c6b76414172c..b957beae5607 100644
--- a/Documentation/usb/sn9c102.txt
+++ b/Documentation/usb/sn9c102.txt
@@ -196,6 +196,14 @@ Description: Force the application to unmap previously mapped buffer memory
1 = force memory unmapping (save memory)
Default: 0
-------------------------------------------------------------------------------
+Name: frame_timeout
+Type: uint array (min = 0, max = 64)
+Syntax: <n[,...]>
+Description: Timeout for a video frame in seconds. This parameter is
+ specific for each detected camera. This parameter can be
+ changed at runtime thanks to the /sys filesystem interface.
+Default: 2
+-------------------------------------------------------------------------------
Name: debug
Type: ushort
Syntax: <n>
@@ -321,6 +329,7 @@ Vendor ID Product ID
--------- ----------
0x0c45 0x6001
0x0c45 0x6005
+0x0c45 0x6007
0x0c45 0x6009
0x0c45 0x600d
0x0c45 0x6024
@@ -370,6 +379,7 @@ HV7131D Hynix Semiconductor, Inc.
MI-0343 Micron Technology, Inc.
OV7630 OmniVision Technologies, Inc.
PAS106B PixArt Imaging, Inc.
+PAS202BCA PixArt Imaging, Inc.
PAS202BCB PixArt Imaging, Inc.
TAS5110C1B Taiwan Advanced Sensor Corporation
TAS5130D1B Taiwan Advanced Sensor Corporation
@@ -493,6 +503,7 @@ Many thanks to following persons for their contribute (listed in alphabetical
order):
- Luca Capello for the donation of a webcam;
+- Philippe Coval for having helped testing the PAS202BCA image sensor;
- Joao Rodrigo Fuzaro, Joao Limirio, Claudio Filho and Caio Begotti for the
donation of a webcam;
- Jon Hollstrom for the donation of a webcam;
diff --git a/Documentation/usb/zc0301.txt b/Documentation/usb/zc0301.txt
new file mode 100644
index 000000000000..f55262c6733b
--- /dev/null
+++ b/Documentation/usb/zc0301.txt
@@ -0,0 +1,254 @@
+
+ ZC0301 Image Processor and Control Chip
+ Driver for Linux
+ =======================================
+
+ - Documentation -
+
+
+Index
+=====
+1. Copyright
+2. Disclaimer
+3. License
+4. Overview and features
+5. Module dependencies
+6. Module loading
+7. Module parameters
+8. Supported devices
+9. Notes for V4L2 application developers
+10. Contact information
+11. Credits
+
+
+1. Copyright
+============
+Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>
+
+
+2. Disclaimer
+=============
+This software is not developed or sponsored by Z-Star Microelectronics Corp.
+Trademarks are property of their respective owner.
+
+
+3. License
+==========
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+4. Overview and features
+========================
+This driver supports the video interface of the devices mounting the ZC0301
+Image Processor and Control Chip.
+
+The driver relies on the Video4Linux2 and USB core modules. It has been
+designed to run properly on SMP systems as well.
+
+The latest version of the ZC0301 driver can be found at the following URL:
+http://www.linux-projects.org/
+
+Some of the features of the driver are:
+
+- full compliance with the Video4Linux2 API (see also "Notes for V4L2
+ application developers" paragraph);
+- available mmap or read/poll methods for video streaming through isochronous
+ data transfers;
+- automatic detection of image sensor;
+- video format is standard JPEG;
+- dynamic driver control thanks to various module parameters (see "Module
+ parameters" paragraph);
+- up to 64 cameras can be handled at the same time; they can be connected and
+ disconnected from the host many times without turning off the computer, if
+ the system supports hotplugging;
+
+
+5. Module dependencies
+======================
+For it to work properly, the driver needs kernel support for Video4Linux and
+USB.
+
+The following options of the kernel configuration file must be enabled and
+corresponding modules must be compiled:
+
+ # Multimedia devices
+ #
+ CONFIG_VIDEO_DEV=m
+
+ # USB support
+ #
+ CONFIG_USB=m
+
+In addition, depending on the hardware being used, the modules below are
+necessary:
+
+ # USB Host Controller Drivers
+ #
+ CONFIG_USB_EHCI_HCD=m
+ CONFIG_USB_UHCI_HCD=m
+ CONFIG_USB_OHCI_HCD=m
+
+The ZC0301 controller also provides a built-in microphone interface. It is
+supported by the USB Audio driver thanks to the ALSA API:
+
+ # Sound
+ #
+ CONFIG_SOUND=y
+
+ # Advanced Linux Sound Architecture
+ #
+ CONFIG_SND=m
+
+ # USB devices
+ #
+ CONFIG_SND_USB_AUDIO=m
+
+And finally:
+
+ # USB Multimedia devices
+ #
+ CONFIG_USB_ZC0301=m
+
+
+6. Module loading
+=================
+To use the driver, it is necessary to load the "zc0301" module into memory
+after every other module required: "videodev", "usbcore" and, depending on
+the USB host controller you have, "ehci-hcd", "uhci-hcd" or "ohci-hcd".
+
+Loading can be done as shown below:
+
+ [root@localhost home]# modprobe zc0301
+
+At this point the devices should be recognized. You can invoke "dmesg" to
+analyze kernel messages and verify that the loading process has gone well:
+
+ [user@localhost home]$ dmesg
+
+
+7. Module parameters
+====================
+Module parameters are listed below:
+-------------------------------------------------------------------------------
+Name: video_nr
+Type: short array (min = 0, max = 64)
+Syntax: <-1|n[,...]>
+Description: Specify V4L2 minor mode number:
+ -1 = use next available
+ n = use minor number n
+ You can specify up to 64 cameras this way.
+ For example:
+ video_nr=-1,2,-1 would assign minor number 2 to the second
+ registered camera and use auto for the first one and for every
+ other camera.
+Default: -1
+-------------------------------------------------------------------------------
+Name: force_munmap
+Type: bool array (min = 0, max = 64)
+Syntax: <0|1[,...]>
+Description: Force the application to unmap previously mapped buffer memory
+ before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
+ all the applications support this feature. This parameter is
+ specific for each detected camera.
+ 0 = do not force memory unmapping
+ 1 = force memory unmapping (save memory)
+Default: 0
+-------------------------------------------------------------------------------
+Name: frame_timeout
+Type: uint array (min = 0, max = 64)
+Syntax: <n[,...]>
+Description: Timeout for a video frame in seconds. This parameter is
+ specific for each detected camera. This parameter can be
+ changed at runtime thanks to the /sys filesystem interface.
+Default: 2
+-------------------------------------------------------------------------------
+Name: debug
+Type: ushort
+Syntax: <n>
+Description: Debugging information level, from 0 to 3:
+ 0 = none (use carefully)
+ 1 = critical errors
+ 2 = significant informations
+ 3 = more verbose messages
+ Level 3 is useful for testing only, when only one device
+ is used at the same time. It also shows some more informations
+ about the hardware being detected. This module parameter can be
+ changed at runtime thanks to the /sys filesystem interface.
+Default: 2
+-------------------------------------------------------------------------------
+
+
+8. Supported devices
+====================
+None of the names of the companies as well as their products will be mentioned
+here. They have never collaborated with the author, so no advertising.
+
+From the point of view of a driver, what unambiguously identify a device are
+its vendor and product USB identifiers. Below is a list of known identifiers of
+devices mounting the ZC0301 Image Processor and Control Chips:
+
+Vendor ID Product ID
+--------- ----------
+0x041e 0x4017
+0x041e 0x401c
+0x041e 0x401e
+0x041e 0x4034
+0x041e 0x4035
+0x046d 0x08ae
+0x0ac8 0x0301
+0x10fd 0x8050
+
+The list above does not imply that all those devices work with this driver: up
+until now only the ones that mount the following image sensors are supported;
+kernel messages will always tell you whether this is the case:
+
+Model Manufacturer
+----- ------------
+PAS202BCB PixArt Imaging, Inc.
+
+
+9. Notes for V4L2 application developers
+========================================
+This driver follows the V4L2 API specifications. In particular, it enforces two
+rules:
+
+- exactly one I/O method, either "mmap" or "read", is associated with each
+file descriptor. Once it is selected, the application must close and reopen the
+device to switch to the other I/O method;
+
+- although it is not mandatory, previously mapped buffer memory should always
+be unmapped before calling any "VIDIOC_S_CROP" or "VIDIOC_S_FMT" ioctl's.
+The same number of buffers as before will be allocated again to match the size
+of the new video frames, so you have to map the buffers again before any I/O
+attempts on them.
+
+
+10. Contact information
+=======================
+The author may be contacted by e-mail at <luca.risolia@studio.unibo.it>.
+
+GPG/PGP encrypted e-mail's are accepted. The GPG key ID of the author is
+'FCE635A4'; the public 1024-bit key should be available at any keyserver;
+the fingerprint is: '88E8 F32F 7244 68BA 3958 5D40 99DA 5D2A FCE6 35A4'.
+
+
+11. Credits
+===========
+- Informations about the chip internals needed to enable the I2C protocol have
+ been taken from the documentation of the ZC030x Video4Linux1 driver written
+ by Andrew Birkett <andy@nobugs.org>;
+- The initialization values of the ZC0301 controller connected to the PAS202BCB
+ image sensor have been taken from the SPCA5XX driver maintained by
+ Michel Xhaard <mxhaard@magic.fr>.
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88
index 8bea3fbd0548..3b39a91b24bd 100644
--- a/Documentation/video4linux/CARDLIST.cx88
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -43,3 +43,5 @@
42 -> digitalnow DNTV Live! DVB-T Pro [1822:0025]
43 -> KWorld/VStream XPert DVB-T with cx22702 [17de:08a1]
44 -> DViCO FusionHDTV DVB-T Dual Digital [18ac:db50,18ac:db54]
+ 45 -> KWorld HardwareMpegTV XPert [17de:0840]
+ 46 -> DViCO FusionHDTV DVB-T Hybrid [18ac:db40,18ac:db44]
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index a0c7cad20971..a3026689bbe6 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -8,3 +8,4 @@
7 -> Leadtek Winfast USB II (em2800)
8 -> Kworld USB2800 (em2800)
9 -> Pinnacle Dazzle DVC 90 (em2820/em2840) [2304:0207]
+ 12 -> Kworld PVR TV 2800 RF (em2820/em2840)
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index da4fb890165f..8c7195455963 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -83,3 +83,12 @@
82 -> MSI TV@Anywhere plus [1462:6231]
83 -> Terratec Cinergy 250 PCI TV [153b:1160]
84 -> LifeView FlyDVB Trio [5168:0319]
+ 85 -> AverTV DVB-T 777 [1461:2c05]
+ 86 -> LifeView FlyDVB-T [5168:0301]
+ 87 -> ADS Instant TV Duo Cardbus PTV331 [0331:1421]
+ 88 -> Tevion/KWorld DVB-T 220RF [17de:7201]
+ 89 -> ELSA EX-VISION 700TV [1048:226c]
+ 90 -> Kworld ATSC110 [17de:7350]
+ 91 -> AVerMedia A169 B [1461:7360]
+ 92 -> AVerMedia A169 B1 [1461:6360]
+ 93 -> Medion 7134 Bridge #2 [16be:0005]
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index f6d0cf7b7922..1bcdac67dd8c 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -64,8 +64,10 @@ tuner=62 - Philips TEA5767HN FM Radio
tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner
tuner=64 - LG TDVS-H062F/TUA6034
tuner=65 - Ymec TVF66T5-B/DFF
-tuner=66 - LG NTSC (TALN mini series)
+tuner=66 - LG TALN series
tuner=67 - Philips TD1316 Hybrid Tuner
tuner=68 - Philips TUV1236D ATSC/NTSC dual in
-tuner=69 - Tena TNF 5335 MF
+tuner=69 - Tena TNF 5335 and similar models
tuner=70 - Samsung TCPN 2121P30A
+tuner=71 - Xceive xc3028
+tuner=72 - Thomson FE6600
diff --git a/Documentation/video4linux/CQcam.txt b/Documentation/video4linux/CQcam.txt
index e415e3604539..464e4cec94cb 100644
--- a/Documentation/video4linux/CQcam.txt
+++ b/Documentation/video4linux/CQcam.txt
@@ -1,7 +1,7 @@
c-qcam - Connectix Color QuickCam video4linux kernel driver
Copyright (C) 1999 Dave Forrest <drf5n@virginia.edu>
- released under GNU GPL.
+ released under GNU GPL.
1999-12-08 Dave Forrest, written with kernel version 2.2.12 in mind
@@ -45,21 +45,21 @@ configuration. The appropriate flags are:
CONFIG_PNP_PARPORT M for autoprobe.o IEEE1284 readback module
CONFIG_PRINTER_READBACK M for parport_probe.o IEEE1284 readback module
CONFIG_VIDEO_DEV M for videodev.o video4linux module
- CONFIG_VIDEO_CQCAM M for c-qcam.o Color Quickcam module
+ CONFIG_VIDEO_CQCAM M for c-qcam.o Color Quickcam module
With these flags, the kernel should compile and install the modules.
To record and monitor the compilation, I use:
(make zlilo ; \
make modules; \
- make modules_install ;
+ make modules_install ;
depmod -a ) &>log &
less log # then a capital 'F' to watch the progress
-
+
But that is my personal preference.
2.2 Configuration
-
+
The configuration requires module configuration and device
configuration. I like kmod or kerneld process with the
/etc/modprobe.conf file so the modules can automatically load/unload as
@@ -68,7 +68,7 @@ using MAKEDEV, or need to be created. The following sections detail
these procedures.
-2.1 Module Configuration
+2.1 Module Configuration
Using modules requires a bit of work to install and pass the
parameters. Understand that entries in /etc/modprobe.conf of:
@@ -128,9 +128,9 @@ system (CONFIG_PROC_FS), the parallel printer support
(CONFIG_PRINTER), the IEEE 1284 system,(CONFIG_PRINTER_READBACK), you
should be able to read some identification from your quickcam with
- modprobe -v parport
- modprobe -v parport_probe
- cat /proc/parport/PORTNUMBER/autoprobe
+ modprobe -v parport
+ modprobe -v parport_probe
+ cat /proc/parport/PORTNUMBER/autoprobe
Returns:
CLASS:MEDIA;
MODEL:Color QuickCam 2.0;
@@ -140,7 +140,7 @@ Returns:
and well. A common problem is that the current driver does not
reliably detect a c-qcam, even though one is attached. In this case,
- modprobe -v c-qcam
+ modprobe -v c-qcam
or
insmod -v c-qcam
@@ -152,16 +152,16 @@ video4linux mailing list and archive for more current information.
3.1 Checklist:
Can you get an image?
- v4lgrab >qcam.ppm ; wc qcam.ppm ; xv qcam.ppm
+ v4lgrab >qcam.ppm ; wc qcam.ppm ; xv qcam.ppm
- Is a working c-qcam connected to the port?
- grep ^ /proc/parport/?/autoprobe
+ Is a working c-qcam connected to the port?
+ grep ^ /proc/parport/?/autoprobe
- Do the /dev/video* files exist?
- ls -lad /dev/video
+ Do the /dev/video* files exist?
+ ls -lad /dev/video
- Is the c-qcam module loaded?
- modprobe -v c-qcam ; lsmod
+ Is the c-qcam module loaded?
+ modprobe -v c-qcam ; lsmod
Does the camera work with alternate programs? cqcam, etc?
@@ -174,7 +174,7 @@ video4linux mailing list and archive for more current information.
isn't, you might try patching the c-qcam module to add a parport=xxx
option as in the bw-qcam module so you can specify the parallel port:
- insmod -v c-qcam parport=0
+ insmod -v c-qcam parport=0
And bypass the detection code, see ../../drivers/char/c-qcam.c and
look for the 'qc_detect' code and call.
@@ -183,12 +183,12 @@ look for the 'qc_detect' code and call.
this work is documented at the video4linux2 site listed below.
-9.0 --- A sample program using v4lgrabber,
+9.0 --- A sample program using v4lgrabber,
This program is a simple image grabber that will copy a frame from the
first video device, /dev/video0 to standard output in portable pixmap
format (.ppm) Using this like: 'v4lgrab | convert - c-qcam.jpg'
-produced this picture of me at
+produced this picture of me at
http://mug.sys.virginia.edu/~drf5n/extras/c-qcam.jpg
-------------------- 8< ---------------- 8< -----------------------------
@@ -202,8 +202,8 @@ produced this picture of me at
* Use as:
* v4lgrab >image.ppm
*
- * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org>
- * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c
+ * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org>
+ * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c
* with minor modifications (Dave Forrest, drf5n@virginia.edu).
*
*/
@@ -225,55 +225,55 @@ produced this picture of me at
#define READ_VIDEO_PIXEL(buf, format, depth, r, g, b) \
{ \
- switch (format) \
- { \
- case VIDEO_PALETTE_GREY: \
- switch (depth) \
- { \
- case 4: \
- case 6: \
- case 8: \
- (r) = (g) = (b) = (*buf++ << 8);\
- break; \
- \
- case 16: \
- (r) = (g) = (b) = \
- *((unsigned short *) buf); \
- buf += 2; \
- break; \
- } \
- break; \
- \
- \
- case VIDEO_PALETTE_RGB565: \
- { \
- unsigned short tmp = *(unsigned short *)buf; \
- (r) = tmp&0xF800; \
- (g) = (tmp<<5)&0xFC00; \
- (b) = (tmp<<11)&0xF800; \
- buf += 2; \
- } \
- break; \
- \
- case VIDEO_PALETTE_RGB555: \
- (r) = (buf[0]&0xF8)<<8; \
- (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \
- (b) = ((buf[1] << 2 ) & 0xF8)<<8; \
- buf += 2; \
- break; \
- \
- case VIDEO_PALETTE_RGB24: \
- (r) = buf[0] << 8; (g) = buf[1] << 8; \
- (b) = buf[2] << 8; \
- buf += 3; \
- break; \
- \
- default: \
- fprintf(stderr, \
- "Format %d not yet supported\n", \
- format); \
- } \
-}
+ switch (format) \
+ { \
+ case VIDEO_PALETTE_GREY: \
+ switch (depth) \
+ { \
+ case 4: \
+ case 6: \
+ case 8: \
+ (r) = (g) = (b) = (*buf++ << 8);\
+ break; \
+ \
+ case 16: \
+ (r) = (g) = (b) = \
+ *((unsigned short *) buf); \
+ buf += 2; \
+ break; \
+ } \
+ break; \
+ \
+ \
+ case VIDEO_PALETTE_RGB565: \
+ { \
+ unsigned short tmp = *(unsigned short *)buf; \
+ (r) = tmp&0xF800; \
+ (g) = (tmp<<5)&0xFC00; \
+ (b) = (tmp<<11)&0xF800; \
+ buf += 2; \
+ } \
+ break; \
+ \
+ case VIDEO_PALETTE_RGB555: \
+ (r) = (buf[0]&0xF8)<<8; \
+ (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \
+ (b) = ((buf[1] << 2 ) & 0xF8)<<8; \
+ buf += 2; \
+ break; \
+ \
+ case VIDEO_PALETTE_RGB24: \
+ (r) = buf[0] << 8; (g) = buf[1] << 8; \
+ (b) = buf[2] << 8; \
+ buf += 3; \
+ break; \
+ \
+ default: \
+ fprintf(stderr, \
+ "Format %d not yet supported\n", \
+ format); \
+ } \
+}
int get_brightness_adj(unsigned char *image, long size, int *brightness) {
long i, tot = 0;
@@ -324,40 +324,40 @@ int main(int argc, char ** argv)
if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
vpic.depth=6;
if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
- vpic.depth=4;
- if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
- fprintf(stderr, "Unable to find a supported capture format.\n");
- close(fd);
- exit(1);
- }
+ vpic.depth=4;
+ if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
+ fprintf(stderr, "Unable to find a supported capture format.\n");
+ close(fd);
+ exit(1);
+ }
}
}
} else {
vpic.depth=24;
vpic.palette=VIDEO_PALETTE_RGB24;
-
+
if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
vpic.palette=VIDEO_PALETTE_RGB565;
vpic.depth=16;
-
+
if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
- vpic.palette=VIDEO_PALETTE_RGB555;
- vpic.depth=15;
-
- if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
- fprintf(stderr, "Unable to find a supported capture format.\n");
- return -1;
- }
+ vpic.palette=VIDEO_PALETTE_RGB555;
+ vpic.depth=15;
+
+ if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
+ fprintf(stderr, "Unable to find a supported capture format.\n");
+ return -1;
+ }
}
}
}
-
+
buffer = malloc(win.width * win.height * bpp);
if (!buffer) {
fprintf(stderr, "Out of memory.\n");
exit(1);
}
-
+
do {
int newbright;
read(fd, buffer, win.width * win.height * bpp);
@@ -365,8 +365,8 @@ int main(int argc, char ** argv)
if (f) {
vpic.brightness += (newbright << 8);
if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
- perror("VIDIOSPICT");
- break;
+ perror("VIDIOSPICT");
+ break;
}
}
} while (f);
@@ -381,7 +381,7 @@ int main(int argc, char ** argv)
fputc(g>>8, stdout);
fputc(b>>8, stdout);
}
-
+
close(fd);
return 0;
}
diff --git a/Documentation/video4linux/README.cpia b/Documentation/video4linux/README.cpia
index c95e7bbc0fdf..19cd3bf24981 100644
--- a/Documentation/video4linux/README.cpia
+++ b/Documentation/video4linux/README.cpia
@@ -87,7 +87,7 @@ hardware configuration of the parport. You can give the boot-parameter
at the LILO-prompt or specify it in lilo.conf. I use the following
append-line in lilo.conf:
- append="parport=0x378,7,3"
+ append="parport=0x378,7,3"
See Documentation/parport.txt for more information about the
configuration of the parport and the values given above. Do not simply
@@ -175,7 +175,7 @@ THANKS (in no particular order):
- Manuel J. Petit de Gabriel <mpetit@dit.upm.es> for providing help
with Isabel (http://isabel.dit.upm.es/)
- Bas Huisman <bhuism@cs.utwente.nl> for writing the initial parport code
-- Jarl Totland <Jarl.Totland@bdc.no> for setting up the mailing list
+- Jarl Totland <Jarl.Totland@bdc.no> for setting up the mailing list
and maintaining the web-server[3]
- Chris Whiteford <Chris@informinteractive.com> for fixes related to the
1.02 firmware
diff --git a/Documentation/video4linux/README.cpia2 b/Documentation/video4linux/README.cpia2
new file mode 100644
index 000000000000..ce8213d28b67
--- /dev/null
+++ b/Documentation/video4linux/README.cpia2
@@ -0,0 +1,130 @@
+$Id: README,v 1.7 2005/08/29 23:39:57 sbertin Exp $
+
+1. Introduction
+
+ This is a driver for STMicroelectronics's CPiA2 (second generation
+Colour Processor Interface ASIC) based cameras. This camera outputs an MJPEG
+stream at up to vga size. It implements the Video4Linux interface as much as
+possible. Since the V4L interface does not support compressed formats, only
+an mjpeg enabled application can be used with the camera. We have modified the
+gqcam application to view this stream.
+
+ The driver is implemented as two kernel modules. The cpia2 module
+contains the camera functions and the V4L interface. The cpia2_usb module
+contains usb specific functions. The main reason for this was the size of the
+module was getting out of hand, so I separted them. It is not likely that
+there will be a parallel port version.
+
+FEATURES:
+ - Supports cameras with the Vision stv6410 (CIF) and stv6500 (VGA) cmos
+ sensors. I only have the vga sensor, so can't test the other.
+ - Image formats: VGA, QVGA, CIF, QCIF, and a number of sizes in between.
+ VGA and QVGA are the native image sizes for the VGA camera. CIF is done
+ in the coprocessor by scaling QVGA. All other sizes are done by clipping.
+ - Palette: YCrCb, compressed with MJPEG.
+ - Some compression parameters are settable.
+ - Sensor framerate is adjustable (up to 30 fps CIF, 15 fps VGA).
+ - Adjust brightness, color, contrast while streaming.
+ - Flicker control settable for 50 or 60 Hz mains frequency.
+
+2. Making and installing the stv672 driver modules:
+
+ Requirements:
+ -------------
+ This should work with 2.4 (2.4.23 and later) and 2.6 kernels, but has
+only been tested on 2.6. Video4Linux must be either compiled into the kernel or
+available as a module. Video4Linux2 is automatically detected and made
+available at compile time.
+
+ Compiling:
+ ----------
+ As root, do a make install. This will compile and install the modules
+into the media/video directory in the module tree. For 2.4 kernels, use
+Makefile_2.4 (aka do make -f Makefile_2.4 install).
+
+ Setup:
+ ------
+ Use 'modprobe cpia2' to load and 'modprobe -r cpia2' to unload. This
+may be done automatically by your distribution.
+
+3. Driver options
+
+ Option Description
+ ------ -----------
+ video_nr video device to register (0=/dev/video0, etc)
+ range -1 to 64. default is -1 (first available)
+ If you have more than 1 camera, this MUST be -1.
+ buffer_size Size for each frame buffer in bytes (default 68k)
+ num_buffers Number of frame buffers (1-32, default 3)
+ alternate USB Alternate (2-7, default 7)
+ flicker_freq Frequency for flicker reduction(50 or 60, default 60)
+ flicker_mode 0 to disable, or 1 to enable flicker reduction.
+ (default 0). This is only effective if the camera
+ uses a stv0672 coprocessor.
+
+ Setting the options:
+ --------------------
+ If you are using modules, edit /etc/modules.conf and add an options
+line like this:
+ options cpia2 num_buffers=3 buffer_size=65535
+
+ If the driver is compiled into the kernel, at boot time specify them
+like this:
+ cpia2.num_buffers=3 cpia2.buffer_size=65535
+
+ What buffer size should I use?
+ ------------------------------
+ The maximum image size depends on the alternate you choose, and the
+frame rate achieved by the camera. If the compression engine is able to
+keep up with the frame rate, the maximum image size is given by the table
+below.
+ The compression engine starts out at maximum compression, and will
+increase image quality until it is close to the size in the table. As long
+as the compression engine can keep up with the frame rate, after a short time
+the images will all be about the size in the table, regardless of resolution.
+ At low alternate settings, the compression engine may not be able to
+compress the image enough and will reduce the frame rate by producing larger
+images.
+ The default of 68k should be good for most users. This will handle
+any alternate at frame rates down to 15fps. For lower frame rates, it may
+be necessary to increase the buffer size to avoid having frames dropped due
+to insufficient space.
+
+ Image size(bytes)
+ Alternate bytes/ms 15fps 30fps
+ 2 128 8533 4267
+ 3 384 25600 12800
+ 4 640 42667 21333
+ 5 768 51200 25600
+ 6 896 59733 29867
+ 7 1023 68200 34100
+
+ How many buffers should I use?
+ ------------------------------
+ For normal streaming, 3 should give the best results. With only 2,
+it is possible for the camera to finish sending one image just after a
+program has started reading the other. If this happens, the driver must drop
+a frame. The exception to this is if you have a heavily loaded machine. In
+this case use 2 buffers. You are probably not reading at the full frame rate.
+If the camera can send multiple images before a read finishes, it could
+overwrite the third buffer before the read finishes, leading to a corrupt
+image. Single and double buffering have extra checks to avoid overwriting.
+
+4. Using the camera
+
+ We are providing a modified gqcam application to view the output. In
+order to avoid confusion, here it is called mview. There is also the qx5view
+program which can also control the lights on the qx5 microscope. MJPEG Tools
+(http://mjpeg.sourceforge.net) can also be used to record from the camera.
+
+5. Notes to developers:
+
+ - This is a driver version stripped of the 2.4 back compatibility
+ and old MJPEG ioctl API. See cpia2.sf.net for 2.4 support.
+
+6. Thanks:
+
+ - Peter Pregler <Peter_Pregler@email.com>,
+ Scott J. Bertin <scottbertin@yahoo.com>, and
+ Jarl Totland <Jarl.Totland@bdc.no> for the original cpia driver, which
+ this one was modelled from.
diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran
index 52c94bd7dca1..be9f21b84555 100644
--- a/Documentation/video4linux/Zoran
+++ b/Documentation/video4linux/Zoran
@@ -28,7 +28,7 @@ Iomega Buz:
* Philips saa7111 TV decoder
* Philips saa7185 TV encoder
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, saa7111, saa7185, zr36060, zr36067
+ videocodec, saa7111, saa7185, zr36060, zr36067
Inputs/outputs: Composite and S-video
Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
Card number: 7
@@ -39,7 +39,7 @@ Linux Media Labs LML33:
* Brooktree bt819 TV decoder
* Brooktree bt856 TV encoder
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, bt819, bt856, zr36060, zr36067
+ videocodec, bt819, bt856, zr36060, zr36067
Inputs/outputs: Composite and S-video
Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
Card number: 5
@@ -50,7 +50,7 @@ Linux Media Labs LML33R10:
* Philips saa7114 TV decoder
* Analog Devices adv7170 TV encoder
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, saa7114, adv7170, zr36060, zr36067
+ videocodec, saa7114, adv7170, zr36060, zr36067
Inputs/outputs: Composite and S-video
Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
Card number: 6
@@ -61,7 +61,7 @@ Pinnacle/Miro DC10(new):
* Philips saa7110a TV decoder
* Analog Devices adv7176 TV encoder
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, saa7110, adv7175, zr36060, zr36067
+ videocodec, saa7110, adv7175, zr36060, zr36067
Inputs/outputs: Composite, S-video and Internal
Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
Card number: 1
@@ -84,7 +84,7 @@ Pinnacle/Miro DC10(old): *
* Micronas vpx3220a TV decoder
* mse3000 TV encoder or Analog Devices adv7176 TV encoder *
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067
+ videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067
Inputs/outputs: Composite, S-video and Internal
Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
Card number: 0
@@ -96,7 +96,7 @@ Pinnacle/Miro DC30: *
* Micronas vpx3225d/vpx3220a/vpx3216b TV decoder
* Analog Devices adv7176 TV encoder
Drivers to use: videodev, i2c-core, i2c-algo-bit,
- videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067
+ videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067
Inputs/outputs: Composite, S-video and Internal
Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
Card number: 3
@@ -123,11 +123,11 @@ Note: use encoder=X or decoder=X for non-default i2c chips (see i2c-id.h)
The best know TV standards are NTSC/PAL/SECAM. but for decoding a frame that
information is not enough. There are several formats of the TV standards.
-And not every TV decoder is able to handle every format. Also the every
-combination is supported by the driver. There are currently 11 different
-tv broadcast formats all aver the world.
+And not every TV decoder is able to handle every format. Also the every
+combination is supported by the driver. There are currently 11 different
+tv broadcast formats all aver the world.
-The CCIR defines parameters needed for broadcasting the signal.
+The CCIR defines parameters needed for broadcasting the signal.
The CCIR has defined different standards: A,B,D,E,F,G,D,H,I,K,K1,L,M,N,...
The CCIR says not much about about the colorsystem used !!!
And talking about a colorsystem says not to much about how it is broadcast.
@@ -136,18 +136,18 @@ The CCIR standards A,E,F are not used any more.
When you speak about NTSC, you usually mean the standard: CCIR - M using
the NTSC colorsystem which is used in the USA, Japan, Mexico, Canada
-and a few others.
+and a few others.
When you talk about PAL, you usually mean: CCIR - B/G using the PAL
-colorsystem which is used in many Countries.
+colorsystem which is used in many Countries.
-When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem
+When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem
which is used in France, and a few others.
There the other version of SECAM, CCIR - D/K is used in Bulgaria, China,
-Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others.
+Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others.
-The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in
+The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in
Egypt, Libya, Sri Lanka, Syrain Arab. Rep.
The CCIR - I uses the PAL colorsystem, and is used in Great Britain, Hong Kong,
@@ -158,30 +158,30 @@ and is used in Argentinia, Uruguay, an a few others
We do not talk about how the audio is broadcast !
-A rather good sites about the TV standards are:
+A rather good sites about the TV standards are:
http://www.sony.jp/ServiceArea/Voltage_map/
http://info.electronicwerkstatt.de/bereiche/fernsehtechnik/frequenzen_und_normen/Fernsehnormen/
and http://www.cabl.com/restaurant/channel.html
Other weird things around: NTSC 4.43 is a modificated NTSC, which is mainly
used in PAL VCR's that are able to play back NTSC. PAL 60 seems to be the same
-as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would
-be the same as NTSC 4.43.
+as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would
+be the same as NTSC 4.43.
NTSC Combs seems to be a decoder mode where the decoder uses a comb filter
to split coma and luma instead of a Delay line.
But I did not defiantly find out what NTSC Comb is.
Philips saa7111 TV decoder
-was introduced in 1997, is used in the BUZ and
-can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM
+was introduced in 1997, is used in the BUZ and
+can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM
Philips saa7110a TV decoder
was introduced in 1995, is used in the Pinnacle/Miro DC10(new), DC10+ and
-can handle: PAL B/G, NTSC M and SECAM
+can handle: PAL B/G, NTSC M and SECAM
Philips saa7114 TV decoder
-was introduced in 2000, is used in the LML33R10 and
+was introduced in 2000, is used in the LML33R10 and
can handle: PAL B/G/D/H/I/N, PAL N, PAL M, NTSC M, NTSC 4.43 and SECAM
Brooktree bt819 TV decoder
@@ -206,7 +206,7 @@ was introduced in 1996, is used in the BUZ
can generate: PAL B/G, NTSC M
Brooktree bt856 TV Encoder
-was introduced in 1994, is used in the LML33
+was introduced in 1994, is used in the LML33
can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL-N (Argentina)
Analog Devices adv7170 TV Encoder
@@ -221,9 +221,9 @@ ITT mse3000 TV encoder
was introduced in 1991, is used in the DC10 old
can generate: PAL , NTSC , SECAM
-The adv717x, should be able to produce PAL N. But you find nothing PAL N
+The adv717x, should be able to produce PAL N. But you find nothing PAL N
specific in the registers. Seem that you have to reuse a other standard
-to generate PAL N, maybe it would work if you use the PAL M settings.
+to generate PAL N, maybe it would work if you use the PAL M settings.
==========================
@@ -261,7 +261,7 @@ Here's my experience of using LML33 and Buz on various motherboards:
VIA MVP3
Forget it. Pointless. Doesn't work.
-Intel 430FX (Pentium 200)
+Intel 430FX (Pentium 200)
LML33 perfect, Buz tolerable (3 or 4 frames dropped per movie)
Intel 440BX (early stepping)
LML33 tolerable. Buz starting to get annoying (6-10 frames/hour)
@@ -438,52 +438,52 @@ importance of buffer sizes:
> -q 25 -b 128 : 24.655.992
> -q 25 -b 256 : 25.859.820
-I woke up, and can't go to sleep again. I'll kill some time explaining why
+I woke up, and can't go to sleep again. I'll kill some time explaining why
this doesn't look strange to me.
-Let's do some math using a width of 704 pixels. I'm not sure whether the Buz
+Let's do some math using a width of 704 pixels. I'm not sure whether the Buz
actually use that number or not, but that's not too important right now.
-704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block;
-3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block;
-1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum
-output becomes 512 bits per block. Actually 510, but 512 is simpler to use
+704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block;
+3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block;
+1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum
+output becomes 512 bits per block. Actually 510, but 512 is simpler to use
for calculations.
-Let's say that we specify d1q50. We thus want 256 bits per block; times 3168
-becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes
-here, so we don't need to do any fancy corrections for bits-per-pixel or such
+Let's say that we specify d1q50. We thus want 256 bits per block; times 3168
+becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes
+here, so we don't need to do any fancy corrections for bits-per-pixel or such
things. 101376 bytes per field.
-d1 video contains two fields per frame. Those sum up to 202752 bytes per
+d1 video contains two fields per frame. Those sum up to 202752 bytes per
frame, and one of those frames goes into each buffer.
-But wait a second! -b128 gives 128kB buffers! It's not possible to cram
+But wait a second! -b128 gives 128kB buffers! It's not possible to cram
202752 bytes of JPEG data into 128kB!
-This is what the driver notice and automatically compensate for in your
+This is what the driver notice and automatically compensate for in your
examples. Let's do some math using this information:
-128kB is 131072 bytes. In this buffer, we want to store two fields, which
-leaves 65536 bytes for each field. Using 3168 blocks per field, we get
-20.68686868... available bytes per block; 165 bits. We can't allow the
-request for 256 bits per block when there's only 165 bits available! The -q50
-option is silently overridden, and the -b128 option takes precedence, leaving
+128kB is 131072 bytes. In this buffer, we want to store two fields, which
+leaves 65536 bytes for each field. Using 3168 blocks per field, we get
+20.68686868... available bytes per block; 165 bits. We can't allow the
+request for 256 bits per block when there's only 165 bits available! The -q50
+option is silently overridden, and the -b128 option takes precedence, leaving
us with the equivalence of -q32.
-This gives us a data rate of 165 bits per block, which, times 3168, sums up
-to 65340 bytes per field, out of the allowed 65536. The current driver has
-another level of rate limiting; it won't accept -q values that fill more than
-6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be
-a safe bet. Personally, I think I would have lowered requested-bits-per-block
-by one, or something like that.) We can't use 165 bits per block, but have to
-lower it again, to 6/8 of the available buffer space: We end up with 124 bits
-per block, the equivalence of -q24. With 128kB buffers, you can't use greater
+This gives us a data rate of 165 bits per block, which, times 3168, sums up
+to 65340 bytes per field, out of the allowed 65536. The current driver has
+another level of rate limiting; it won't accept -q values that fill more than
+6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be
+a safe bet. Personally, I think I would have lowered requested-bits-per-block
+by one, or something like that.) We can't use 165 bits per block, but have to
+lower it again, to 6/8 of the available buffer space: We end up with 124 bits
+per block, the equivalence of -q24. With 128kB buffers, you can't use greater
than -q24 at -d1. (And PAL, and 704 pixels width...)
-The third example is limited to -q24 through the same process. The second
-example, using very similar calculations, is limited to -q48. The only
-example that actually grab at the specified -q value is the last one, which
+The third example is limited to -q24 through the same process. The second
+example, using very similar calculations, is limited to -q48. The only
+example that actually grab at the specified -q value is the last one, which
is clearly visible, looking at the file size.
--
diff --git a/Documentation/video4linux/bttv/ICs b/Documentation/video4linux/bttv/ICs
index 6b7491336967..611315f87c3e 100644
--- a/Documentation/video4linux/bttv/ICs
+++ b/Documentation/video4linux/bttv/ICs
@@ -14,13 +14,13 @@ Hauppauge Win/TV pci (version 405):
Microchip 24LC02B or
Philips 8582E2Y: 256 Byte EEPROM with configuration information
- I2C 0xa0-0xa1, (24LC02B also responds to 0xa2-0xaf)
+ I2C 0xa0-0xa1, (24LC02B also responds to 0xa2-0xaf)
Philips SAA5246AGP/E: Videotext decoder chip, I2C 0x22-0x23
TDA9800: sound decoder
Winbond W24257AS-35: 32Kx8 CMOS static RAM (Videotext buffer mem)
14052B: analog switch for selection of sound source
-PAL:
+PAL:
TDA5737: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners
TSA5522: 1.4 GHz I2C-bus controlled synthesizer, I2C 0xc2-0xc3
diff --git a/Documentation/video4linux/bttv/PROBLEMS b/Documentation/video4linux/bttv/PROBLEMS
index 8e31e9e36bf7..2b8b0079f7c7 100644
--- a/Documentation/video4linux/bttv/PROBLEMS
+++ b/Documentation/video4linux/bttv/PROBLEMS
@@ -3,7 +3,7 @@
- Start capturing by pressing "c" or by selecting it via a menu!!!
- The memory of some S3 cards is not recognized right:
-
+
First of all, if you are not using XFree-3.2 or newer, upgrade AT LEAST to
XFree-3.2A! This solved the problem for most people.
@@ -31,23 +31,23 @@
(mostly with Trio 64 but also with some others)
Get the free demo version of Accelerated X from www.xinside.com and try
bttv with it. bttv seems to work with most S3 cards with Accelerated X.
-
+
Since I do not know much (better make that almost nothing) about VGA card
programming I do not know the reason for this.
Looks like XFree does something different when setting up the video memory?
- Maybe somebody can enlighten me?
- Would be nice if somebody could get this to work with XFree since
- Accelerated X costs more than some of the grabber cards ...
-
+ Maybe somebody can enlighten me?
+ Would be nice if somebody could get this to work with XFree since
+ Accelerated X costs more than some of the grabber cards ...
+
Better linear frame buffer support for S3 cards will probably be in
XFree 4.0.
-
+
- Grabbing is not switched off when changing consoles with XFree.
That's because XFree and some AcceleratedX versions do not send unmap
events.
- Some popup windows (e.g. of the window manager) are not refreshed.
-
+
Disable backing store by starting X with the option "-bs"
- When using 32 bpp in XFree or 24+8bpp mode in AccelX 3.1 the system
diff --git a/Documentation/video4linux/bttv/README.quirks b/Documentation/video4linux/bttv/README.quirks
index e8edb87df711..92e03929a6b2 100644
--- a/Documentation/video4linux/bttv/README.quirks
+++ b/Documentation/video4linux/bttv/README.quirks
@@ -38,9 +38,9 @@ tolerate.
------------------------
When using the 430FX PCI, the following rules will ensure
-compatibility:
+compatibility:
- (1) Deassert REQ at the same time as asserting FRAME.
+ (1) Deassert REQ at the same time as asserting FRAME.
(2) Do not reassert REQ to request another bus transaction until after
finish-ing the previous transaction.
diff --git a/Documentation/video4linux/bttv/THANKS b/Documentation/video4linux/bttv/THANKS
index 2085399da7d4..950aa781c2e9 100644
--- a/Documentation/video4linux/bttv/THANKS
+++ b/Documentation/video4linux/bttv/THANKS
@@ -1,6 +1,6 @@
Many thanks to:
-- Markus Schroeder <schroedm@uni-duesseldorf.de> for information on the Bt848
+- Markus Schroeder <schroedm@uni-duesseldorf.de> for information on the Bt848
and tuner programming and his control program xtvc.
- Martin Buck <martin-2.buck@student.uni-ulm.de> for his great Videotext
@@ -16,7 +16,7 @@ Many thanks to:
- MIRO for providing a free PCTV card and detailed information about the
components on their cards. (E.g. how the tuner type is detected)
Without their card I could not have debugged the NTSC mode.
-
+
- Hauppauge for telling how the sound input is selected and what components
they do and will use on their radio cards.
Also many thanks for faxing me the FM1216 data sheet.
diff --git a/Documentation/video4linux/cpia2_overview.txt b/Documentation/video4linux/cpia2_overview.txt
new file mode 100644
index 000000000000..a6e53665216b
--- /dev/null
+++ b/Documentation/video4linux/cpia2_overview.txt
@@ -0,0 +1,38 @@
+ Programmer's View of Cpia2
+
+Cpia2 is the second generation video coprocessor from VLSI Vision Ltd (now a
+division of ST Microelectronics). There are two versions. The first is the
+STV0672, which is capable of up to 30 frames per second (fps) in frame sizes
+up to CIF, and 15 fps for VGA frames. The STV0676 is an improved version,
+which can handle up to 30 fps VGA. Both coprocessors can be attached to two
+CMOS sensors - the vvl6410 CIF sensor and the vvl6500 VGA sensor. These will
+be referred to as the 410 and the 500 sensors, or the CIF and VGA sensors.
+
+The two chipsets operate almost identically. The core is an 8051 processor,
+running two different versions of firmware. The 672 runs the VP4 video
+processor code, the 676 runs VP5. There are a few differences in register
+mappings for the two chips. In these cases, the symbols defined in the
+header files are marked with VP4 or VP5 as part of the symbol name.
+
+The cameras appear externally as three sets of registers. Setting register
+values is the only way to control the camera. Some settings are
+interdependant, such as the sequence required to power up the camera. I will
+try to make note of all of these cases.
+
+The register sets are called blocks. Block 0 is the system block. This
+section is always powered on when the camera is plugged in. It contains
+registers that control housekeeping functions such as powering up the video
+processor. The video processor is the VP block. These registers control
+how the video from the sensor is processed. Examples are timing registers,
+user mode (vga, qvga), scaling, cropping, framerates, and so on. The last
+block is the video compressor (VC). The video stream sent from the camera is
+compressed as Motion JPEG (JPEGA). The VC controls all of the compression
+parameters. Looking at the file cpia2_registers.h, you can get a full view
+of these registers and the possible values for most of them.
+
+One or more registers can be set or read by sending a usb control message to
+the camera. There are three modes for this. Block mode requests a number
+of contiguous registers. Random mode reads or writes random registers with
+a tuple structure containing address/value pairs. The repeat mode is only
+used by VP4 to load a firmware patch. It contains a starting address and
+a sequence of bytes to be written into a gpio port. \ No newline at end of file
diff --git a/Documentation/video4linux/radiotrack.txt b/Documentation/video4linux/radiotrack.txt
index 2b75345f13e3..d1f3ed199186 100644
--- a/Documentation/video4linux/radiotrack.txt
+++ b/Documentation/video4linux/radiotrack.txt
@@ -131,17 +131,17 @@ Check Stereo: BASE <-- 0xd8 (current volume, stereo detect,
x=0xff ==> "not stereo", x=0xfd ==> "stereo detected"
Set Frequency: code = (freq*40) + 10486188
- foreach of the 24 bits in code,
- (from Least to Most Significant):
- to write a "zero" bit,
- BASE <-- 0x01 (audio mute, no stereo detect, radio
+ foreach of the 24 bits in code,
+ (from Least to Most Significant):
+ to write a "zero" bit,
+ BASE <-- 0x01 (audio mute, no stereo detect, radio
disable, "zero" bit phase 1, tuner adjust)
- BASE <-- 0x03 (audio mute, no stereo detect, radio
+ BASE <-- 0x03 (audio mute, no stereo detect, radio
disable, "zero" bit phase 2, tuner adjust)
- to write a "one" bit,
- BASE <-- 0x05 (audio mute, no stereo detect, radio
+ to write a "one" bit,
+ BASE <-- 0x05 (audio mute, no stereo detect, radio
disable, "one" bit phase 1, tuner adjust)
- BASE <-- 0x07 (audio mute, no stereo detect, radio
+ BASE <-- 0x07 (audio mute, no stereo detect, radio
disable, "one" bit phase 2, tuner adjust)
----------------------------------------------------------------------------
diff --git a/Documentation/video4linux/w9966.txt b/Documentation/video4linux/w9966.txt
index e7ac33a7eb06..78a651254b84 100644
--- a/Documentation/video4linux/w9966.txt
+++ b/Documentation/video4linux/w9966.txt
@@ -26,7 +26,7 @@ is called VIDEO_PALETTE_YUV422 (16 bpp).
A minimal test application (with source) is available from:
http://hem.fyristorg.com/mogul/w9966.html
-The slow framerate is due to missing DMA ECP read support in the
+The slow framerate is due to missing DMA ECP read support in the
parport drivers. I might add working EPP support later.
Good luck!
diff --git a/Documentation/video4linux/zr36120.txt b/Documentation/video4linux/zr36120.txt
index 5d6357eefde4..ac6d92d01944 100644
--- a/Documentation/video4linux/zr36120.txt
+++ b/Documentation/video4linux/zr36120.txt
@@ -2,7 +2,7 @@ Driver for Trust Computer Products Framegrabber, version 0.6.1
------ --- ----- -------- -------- ------------ ------- - - -
- ZORAN ------------------------------------------------------
- Author: Pauline Middelink <middelin@polyware.nl>
+ Author: Pauline Middelink <middelin@polyware.nl>
Date: 18 September 1999
Version: 0.6.1
@@ -115,7 +115,7 @@ After making/checking the devices do:
<n> is the cardtype of the card you have. The cardnumber can
be found in the source of zr36120. Look for tvcards. If your
card is not there, please try if any other card gives some
-response, and mail me if you got a working tvcard addition.
+response, and mail me if you got a working tvcard addition.
PS. <TVCard editors behold!)
Dont forget to set video_input to the number of inputs
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index c52820fcf500..0dd4ef30c361 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -12,12 +12,18 @@ is running.
Page migration allows a process to manually relocate the node on which its
pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
-a new memory policy. The pages of process can also be relocated
+a new memory policy via mbind(). The pages of process can also be relocated
from another process using the sys_migrate_pages() function call. The
migrate_pages function call takes two sets of nodes and moves pages of a
process that are located on the from nodes to the destination nodes.
-
-Manual migration is very useful if for example the scheduler has relocated
+Page migration functions are provided by the numactl package by Andi Kleen
+(a version later than 0.9.3 is required. Get it from
+ftp://ftp.suse.com/pub/people/ak). numactl provided libnuma which
+provides an interface similar to other numa functionality for page migration.
+cat /proc/<pid>/numa_maps allows an easy review of where the pages of
+a process are located. See also the numa_maps manpage in the numactl package.
+
+Manual migration is useful if for example the scheduler has relocated
a process to a processor on a distant node. A batch scheduler or an
administrator may detect the situation and move the pages of the process
nearer to the new processor. At some point in the future we may have
@@ -25,10 +31,12 @@ some mechanism in the scheduler that will automatically move the pages.
Larger installations usually partition the system using cpusets into
sections of nodes. Paul Jackson has equipped cpusets with the ability to
-move pages when a task is moved to another cpuset. This allows automatic
-control over locality of a process. If a task is moved to a new cpuset
-then also all its pages are moved with it so that the performance of the
-process does not sink dramatically (as is the case today).
+move pages when a task is moved to another cpuset (See ../cpusets.txt).
+Cpusets allows the automation of process locality. If a task is moved to
+a new cpuset then also all its pages are moved with it so that the
+performance of the process does not sink dramatically. Also the pages
+of processes in a cpuset are moved if the allowed memory nodes of a
+cpuset are changed.
Page migration allows the preservation of the relative location of pages
within a group of nodes for all migration techniques which will preserve a
@@ -37,22 +45,26 @@ process. This is necessary in order to preserve the memory latencies.
Processes will run with similar performance after migration.
Page migration occurs in several steps. First a high level
-description for those trying to use migrate_pages() and then
-a low level description of how the low level details work.
+description for those trying to use migrate_pages() from the kernel
+(for userspace usage see the Andi Kleen's numactl package mentioned above)
+and then a low level description of how the low level details work.
-A. Use of migrate_pages()
--------------------------
+A. In kernel use of migrate_pages()
+-----------------------------------
1. Remove pages from the LRU.
Lists of pages to be migrated are generated by scanning over
pages and moving them into lists. This is done by
- calling isolate_lru_page() or __isolate_lru_page().
+ calling isolate_lru_page().
Calling isolate_lru_page increases the references to the page
- so that it cannot vanish under us.
+ so that it cannot vanish while the page migration occurs.
+ It also prevents the swapper or other scans to encounter
+ the page.
-2. Generate a list of newly allocates page to move the contents
- of the first list to.
+2. Generate a list of newly allocates page. These pages will contain the
+ contents of the pages from the first list after page migration is
+ complete.
3. The migrate_pages() function is called which attempts
to do the migration. It returns the moved pages in the
@@ -63,13 +75,17 @@ A. Use of migrate_pages()
4. The leftover pages of various types are returned
to the LRU using putback_to_lru_pages() or otherwise
disposed of. The pages will still have the refcount as
- increased by isolate_lru_pages()!
+ increased by isolate_lru_pages() if putback_to_lru_pages() is not
+ used! The kernel may want to handle the various cases of failures in
+ different ways.
-B. Operation of migrate_pages()
---------------------------------
+B. How migrate_pages() works
+----------------------------
-migrate_pages does several passes over its list of pages. A page is moved
-if all references to a page are removable at the time.
+migrate_pages() does several passes over its list of pages. A page is moved
+if all references to a page are removable at the time. The page has
+already been removed from the LRU via isolate_lru_page() and the refcount
+is increased so that the page cannot be freed while page migration occurs.
Steps:
@@ -79,36 +95,40 @@ Steps:
3. Make sure that the page has assigned swap cache entry if
it is an anonyous page. The swap cache reference is necessary
- to preserve the information contain in the page table maps.
+ to preserve the information contain in the page table maps while
+ page migration occurs.
4. Prep the new page that we want to move to. It is locked
and set to not being uptodate so that all accesses to the new
- page immediately lock while we are moving references.
+ page immediately lock while the move is in progress.
-5. All the page table references to the page are either dropped (file backed)
- or converted to swap references (anonymous pages). This should decrease the
- reference count.
+5. All the page table references to the page are either dropped (file
+ backed pages) or converted to swap references (anonymous pages).
+ This should decrease the reference count.
-6. The radix tree lock is taken
+6. The radix tree lock is taken. This will cause all processes trying
+ to reestablish a pte to block on the radix tree spinlock.
7. The refcount of the page is examined and we back out if references remain
otherwise we know that we are the only one referencing this page.
8. The radix tree is checked and if it does not contain the pointer to this
- page then we back out.
+ page then we back out because someone else modified the mapping first.
9. The mapping is checked. If the mapping is gone then a truncate action may
be in progress and we back out.
-10. The new page is prepped with some settings from the old page so that accesses
- to the new page will be discovered to have the correct settings.
+10. The new page is prepped with some settings from the old page so that
+ accesses to the new page will be discovered to have the correct settings.
11. The radix tree is changed to point to the new page.
-12. The reference count of the old page is dropped because the reference has now
- been removed.
+12. The reference count of the old page is dropped because the radix tree
+ reference is gone.
-13. The radix tree lock is dropped.
+13. The radix tree lock is dropped. With that lookups become possible again
+ and other processes will move from spinning on the tree lock to sleeping on
+ the locked new page.
14. The page contents are copied to the new page.
@@ -119,11 +139,37 @@ Steps:
17. Queued up writeback on the new page is triggered.
-18. If swap pte's were generated for the page then remove them again.
+18. If swap pte's were generated for the page then replace them with real
+ ptes. This will reenable access for processes not blocked by the page lock.
+
+19. The page locks are dropped from the old and new page.
+ Processes waiting on the page lock can continue.
+
+20. The new page is moved to the LRU and can be scanned by the swapper
+ etc again.
+
+TODO list
+---------
+
+- Page migration requires the use of swap handles to preserve the
+ information of the anonymous page table entries. This means that swap
+ space is reserved but never used. The maximum number of swap handles used
+ is determined by CHUNK_SIZE (see mm/mempolicy.c) per ongoing migration.
+ Reservation of pages could be avoided by having a special type of swap
+ handle that does not require swap space and that would only track the page
+ references. Something like that was proposed by Marcelo Tosatti in the
+ past (search for migration cache on lkml or linux-mm@kvack.org).
-19. The locks are dropped from the old and new page.
+- Page migration unmaps ptes for file backed pages and requires page
+ faults to reestablish these ptes. This could be optimized by somehow
+ recording the references before migration and then reestablish them later.
+ However, there are several locking challenges that have to be overcome
+ before this is possible.
-20. The new page is moved to the LRU.
+- Page migration generates read ptes for anonymous pages. Dirty page
+ faults are required to make the pages writable again. It may be possible
+ to generate a pte marked dirty if it is known that the page is dirty and
+ that this process has the only reference to that page.
-Christoph Lameter, December 19, 2005.
+Christoph Lameter, March 8, 2006.
diff --git a/Documentation/w1/masters/ds2482 b/Documentation/w1/masters/ds2482
new file mode 100644
index 000000000000..c5d5478d90b2
--- /dev/null
+++ b/Documentation/w1/masters/ds2482
@@ -0,0 +1,31 @@
+Kernel driver ds2482
+====================
+
+Supported chips:
+ * Maxim DS2482-100, Maxim DS2482-800
+ Prefix: 'ds2482'
+ Addresses scanned: None
+ Datasheets:
+ http://pdfserv.maxim-ic.com/en/ds/DS2482-100-DS2482S-100.pdf
+ http://pdfserv.maxim-ic.com/en/ds/DS2482-800-DS2482S-800.pdf
+
+Author: Ben Gardner <bgardner@wabtec.com>
+
+
+Description
+-----------
+
+The Maixm/Dallas Semiconductor DS2482 is a I2C device that provides
+one (DS2482-100) or eight (DS2482-800) 1-wire busses.
+
+
+General Remarks
+---------------
+
+Valid addresses are 0x18, 0x19, 0x1a, and 0x1b.
+However, the device cannot be detected without writing to the i2c bus, so no
+detection is done.
+You should force the device address.
+
+$ modprobe ds2482 force=0,0x18
+