aboutsummaryrefslogtreecommitdiffstats
path: root/lib/zlib_dfltcc/dfltcc_util.h
diff options
context:
space:
mode:
authorMikhail Zaslonko <zaslonko@linux.ibm.com>2020-01-30 22:16:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-31 10:30:40 -0800
commitaa5b395b69b65450e008b95ec623b4fc4b175f9f (patch)
tree83949b45e52736f277dc285576a9a6069d84a4e8 /lib/zlib_dfltcc/dfltcc_util.h
parentiio: adc: qcom-vadc-common: use <linux/units.h> helpers (diff)
downloadlinux-dev-aa5b395b69b65450e008b95ec623b4fc4b175f9f.tar.xz
linux-dev-aa5b395b69b65450e008b95ec623b4fc4b175f9f.zip
lib/zlib: add s390 hardware support for kernel zlib_deflate
Patch series "S390 hardware support for kernel zlib", v3. With IBM z15 mainframe the new DFLTCC instruction is available. It implements deflate algorithm in hardware (Nest Acceleration Unit - NXU) with estimated compression and decompression performance orders of magnitude faster than the current zlib. This patchset adds s390 hardware compression support to kernel zlib. The code is based on the userspace zlib implementation: https://github.com/madler/zlib/pull/410 The coding style is also preserved for future maintainability. There is only limited set of userspace zlib functions represented in kernel. Apart from that, all the memory allocation should be performed in advance. Thus, the workarea structures are extended with the parameter lists required for the DEFLATE CONVENTION CALL instruction. Since kernel zlib itself does not support gzip headers, only Adler-32 checksum is processed (also can be produced by DFLTCC facility). Like it was implemented for userspace, kernel zlib will compress in hardware on level 1, and in software on all other levels. Decompression will always happen in hardware (when enabled). Two DFLTCC compression calls produce the same results only when they both are made on machines of the same generation, and when the respective buffers have the same offset relative to the start of the page. Therefore care should be taken when using hardware compression when reproducible results are desired. However it does always produce the standard conform output which can be inflated anyway. The new kernel command line parameter 'dfltcc' is introduced to configure s390 zlib hardware support: Format: { on | off | def_only | inf_only | always } on: s390 zlib hardware support for compression on level 1 and decompression (default) off: No s390 zlib hardware support def_only: s390 zlib hardware support for deflate only (compression on level 1) inf_only: s390 zlib hardware support for inflate only (decompression) always: Same as 'on' but ignores the selected compression level always using hardware support (used for debugging) The main purpose of the integration of the NXU support into the kernel zlib is the use of hardware deflate in btrfs filesystem with on-the-fly compression enabled. Apart from that, hardware support can also be used during boot for decompressing the kernel or the ramdisk image With the patch for btrfs expanding zlib buffer from 1 to 4 pages (patch 6) the following performance results have been achieved using the ramdisk with btrfs. These are relative numbers based on throughput rate and compression ratio for zlib level 1: Input data Deflate rate Inflate rate Compression ratio NXU/Software NXU/Software NXU/Software stream of zeroes 1.46 1.02 1.00 random ASCII data 10.44 3.00 0.96 ASCII text (dickens) 6,21 3.33 0.94 binary data (vmlinux) 8,37 3.90 1.02 This means that s390 hardware deflate can provide up to 10 times faster compression (on level 1) and up to 4 times faster decompression (refers to all compression levels) for btrfs zlib. Disclaimer: Performance results are based on IBM internal tests using DD command-line utility on btrfs on a Fedora 30 based internal driver in native LPAR on a z15 system. Results may vary based on individual workload, configuration and software levels. This patch (of 9): Create zlib_dfltcc library with the s390 DEFLATE CONVERSION CALL implementation and related compression functions. Update zlib_deflate functions with the hooks for s390 hardware support and adjust workspace structures with extra parameter lists required for hardware deflate. Link: http://lkml.kernel.org/r/20200103223334.20669-2-zaslonko@linux.ibm.com Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Signed-off-by: Mikhail Zaslonko <zaslonko@linux.ibm.com> Co-developed-by: Ilya Leoshkevich <iii@linux.ibm.com> Cc: Chris Mason <clm@fb.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: David Sterba <dsterba@suse.com> Cc: Eduard Shishkin <edward6@linux.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Josef Bacik <josef@toxicpanda.com> Cc: Richard Purdie <rpurdie@rpsys.net> Cc: Vasily Gorbik <gor@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to '')
-rw-r--r--lib/zlib_dfltcc/dfltcc_util.h110
1 files changed, 110 insertions, 0 deletions
diff --git a/lib/zlib_dfltcc/dfltcc_util.h b/lib/zlib_dfltcc/dfltcc_util.h
new file mode 100644
index 000000000000..82cd1950c416
--- /dev/null
+++ b/lib/zlib_dfltcc/dfltcc_util.h
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: Zlib
+#ifndef DFLTCC_UTIL_H
+#define DFLTCC_UTIL_H
+
+#include <linux/zutil.h>
+#include <asm/facility.h>
+
+/*
+ * C wrapper for the DEFLATE CONVERSION CALL instruction.
+ */
+typedef enum {
+ DFLTCC_CC_OK = 0,
+ DFLTCC_CC_OP1_TOO_SHORT = 1,
+ DFLTCC_CC_OP2_TOO_SHORT = 2,
+ DFLTCC_CC_OP2_CORRUPT = 2,
+ DFLTCC_CC_AGAIN = 3,
+} dfltcc_cc;
+
+#define DFLTCC_QAF 0
+#define DFLTCC_GDHT 1
+#define DFLTCC_CMPR 2
+#define DFLTCC_XPND 4
+#define HBT_CIRCULAR (1 << 7)
+#define HB_BITS 15
+#define HB_SIZE (1 << HB_BITS)
+#define DFLTCC_FACILITY 151
+
+static inline dfltcc_cc dfltcc(
+ int fn,
+ void *param,
+ Byte **op1,
+ size_t *len1,
+ const Byte **op2,
+ size_t *len2,
+ void *hist
+)
+{
+ Byte *t2 = op1 ? *op1 : NULL;
+ size_t t3 = len1 ? *len1 : 0;
+ const Byte *t4 = op2 ? *op2 : NULL;
+ size_t t5 = len2 ? *len2 : 0;
+ register int r0 __asm__("r0") = fn;
+ register void *r1 __asm__("r1") = param;
+ register Byte *r2 __asm__("r2") = t2;
+ register size_t r3 __asm__("r3") = t3;
+ register const Byte *r4 __asm__("r4") = t4;
+ register size_t r5 __asm__("r5") = t5;
+ int cc;
+
+ __asm__ volatile(
+ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
+ "ipm %[cc]\n"
+ : [r2] "+r" (r2)
+ , [r3] "+r" (r3)
+ , [r4] "+r" (r4)
+ , [r5] "+r" (r5)
+ , [cc] "=r" (cc)
+ : [r0] "r" (r0)
+ , [r1] "r" (r1)
+ , [hist] "r" (hist)
+ : "cc", "memory");
+ t2 = r2; t3 = r3; t4 = r4; t5 = r5;
+
+ if (op1)
+ *op1 = t2;
+ if (len1)
+ *len1 = t3;
+ if (op2)
+ *op2 = t4;
+ if (len2)
+ *len2 = t5;
+ return (cc >> 28) & 3;
+}
+
+static inline int is_bit_set(
+ const char *bits,
+ int n
+)
+{
+ return bits[n / 8] & (1 << (7 - (n % 8)));
+}
+
+static inline void turn_bit_off(
+ char *bits,
+ int n
+)
+{
+ bits[n / 8] &= ~(1 << (7 - (n % 8)));
+}
+
+static inline int dfltcc_are_params_ok(
+ int level,
+ uInt window_bits,
+ int strategy,
+ uLong level_mask
+)
+{
+ return (level_mask & (1 << level)) != 0 &&
+ (window_bits == HB_BITS) &&
+ (strategy == Z_DEFAULT_STRATEGY);
+}
+
+static inline int is_dfltcc_enabled(void)
+{
+ return test_facility(DFLTCC_FACILITY);
+}
+
+char *oesc_msg(char *buf, int oesc);
+
+#endif /* DFLTCC_UTIL_H */