/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Hardware interface of the NX-GZIP compression accelerator * * Copyright (C) IBM Corporation, 2020 * * Author: Bulent Abali * */ #ifndef _NXU_H #define _NXU_H #include #include #include "nx.h" /* deflate */ #define LLSZ 286 #define DSZ 30 /* nx */ #define DHTSZ 18 #define DHT_MAXSZ 288 #define MAX_DDE_COUNT 256 /* util */ #ifdef NXDBG #define NXPRT(X) X #else #define NXPRT(X) #endif #ifdef NXTIMER #include #define NX_CLK(X) X #define nx_get_time() __ppc_get_timebase() #define nx_get_freq() __ppc_get_timebase_freq() #else #define NX_CLK(X) #define nx_get_time() (-1) #define nx_get_freq() (-1) #endif #define NX_MAX_FAULTS 500 /* * Definitions of acronyms used here. See * P9 NX Gzip Accelerator User's Manual for details: * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf * * adler/crc: 32 bit checksums appended to stream tail * ce: completion extension * cpb: coprocessor parameter block (metadata) * crb: coprocessor request block (command) * csb: coprocessor status block (status) * dht: dynamic huffman table * dde: data descriptor element (address, length) * ddl: list of ddes * dh/fh: dynamic and fixed huffman types * fc: coprocessor function code * histlen: history/dictionary length * history: sliding window of up to 32KB of data * lzcount: Deflate LZ symbol counts * rembytecnt: remaining byte count * sfbt: source final block type; last block's type during decomp * spbc: source processed byte count * subc: source unprocessed bit count * tebc: target ending bit count; valid bits in the last byte * tpbc: target processed byte count * vas: virtual accelerator switch; the user mode interface */ union nx_qw_t { uint32_t word[4]; uint64_t dword[2]; } __aligned(16); /* * Note: NX registers with fewer than 32 bits are declared by * convention as uint32_t variables in unions. If *_offset and *_mask * are defined for a variable, then use get_ put_ macros to * conveniently access the register fields for endian conversions. */ struct nx_dde_t { /* Data Descriptor Element, Section 6.4 */ union { uint32_t dde_count; /* When dde_count == 0 ddead is a pointer to a data buffer; * ddebc is the buffer length bytes. * When dde_count > 0 dde is an indirect dde; ddead is a * pointer to a contiguous list of direct ddes; ddebc is the * total length of all data pointed to by the list of direct * ddes. Note that only one level of indirection is permitted. * See Section 6.4 of the user manual for additional details. */ }; uint32_t ddebc; /* dde byte count */ uint64_t ddead; /* dde address */ } __aligned(16); struct nx_csb_t { /* Coprocessor Status Block, Section 6.6 */ union { uint32_t csb_v; /* Valid bit. v must be set to 0 by the program * before submitting the coprocessor command. * Software can poll for the v bit */ uint32_t csb_f; /* 16B CSB size. Written to 0 by DMA when it writes the CPB */ uint32_t csb_cs; /* cs completion sequence; unused */ uint32_t csb_cc; /* cc completion code; cc != 0 exception occurred */ uint32_t csb_ce; /* ce completion extension */ }; uint32_t tpbc; /* target processed byte count TPBC */ uint64_t fsaddr; /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage * address. Address where error occurred. When available, written * to A field of CSB */ } __aligned(16); struct nx_ccb_t { /* Coprocessor Completion Block, Section 6.7 */ uint32_t reserved[3]; union { /* When crb.c==0 (no ccb defined) it is reserved; * When crb.c==1 (ccb defined) it is cm */ uint32_t ccb_cm; /* Signal interrupt of crb.c==1 and cm==1 */ uint32_t word; /* generic access to the 32bit word */ }; } __aligned(16); struct vas_stamped_crb_t { /* * CRB operand of the paste coprocessor instruction is stamped * in quadword 4 with the information shown here as its written * in to the receive FIFO of the coprocessor */ union { uint32_t vas_buf_num; /* Verification only vas buffer number which correlates to * the low order bits of the atag in the paste command */ uint32_t send_wc_id; /* Pointer to Send Window Context that provides for NX address * translation information, such as MSR and LPCR bits, job * completion interrupt RA, PSWID, and job utilization counter. */ }; union { uint32_t recv_wc_id; /* Pointer to Receive Window Context. NX uses this to return * credits to a Receive FIFO as entries are dequeued. */ }; uint32_t reserved2; union { uint32_t vas_invalid; /* Invalid bit. If this bit is 1 the CRB is discarded by * NX upon fetching from the receive FIFO. If this bit is 0 * the CRB is processed normally. The bit is stamped to 0 * by VAS and may be written to 1 by hypervisor while * the CRB is in the receive FIFO (in memory). */ }; }; struct nx_stamped_fault_crb_t { /* * A CRB that has a translation fault is stamped by NX in quadword 4 * and pasted to the Fault Send Window in VAS. */ uint64_t fsa; union { uint32_t nxsf_t; uint32_t nxsf_fs; }; uint32_t pswid; }; union stamped_crb_t { struct vas_stamped_crb_t vas; struct nx_stamped_fault_crb_t nx; }; struct nx_gzip_cpb_t { /* * Coprocessor Parameter Block In/Out are used to pass metadata * to/from accelerator. Tables 6.5 and 6.6 of the user manual. */ /* CPBInput */ struct { union { union nx_qw_t qw0; struct { uint32_t in_adler; /* bits 0:31 */ uint32_t in_crc; /* bits 32:63 */ union { uint32_t in_histlen; /* bits 64:75 */ uint32_t in_subc; /* bits 93:95 */ }; union { /* bits 108:111 */ uint32_t in_sfbt; /* bits 112:127 */ uint32_t in_rembytecnt; /* bits 116:127 */ uint32_t in_dhtlen; }; }; }; union { union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */ char in_dht_char[DHT_MAXSZ]; /* byte access */ }; union nx_qw_t reserved[5]; /* qw[19:23] */ }; /* CPBOutput */ volatile struct { union { union nx_qw_t qw24; struct { uint32_t out_adler; /* bits 0:31 qw[24] */ uint32_t out_crc; /* bits 32:63 qw[24] */ union { /* bits 77:79 qw[24] */ uint32_t out_tebc; /* bits 80:95 qw[24] */ uint32_t out_subc; }; union { /* bits 108:111 qw[24] */ uint32_t out_sfbt; /* bits 112:127 qw[24] */ uint32_t out_rembytecnt; /* bits 116:127 qw[24] */ uint32_t out_dhtlen; }; }; }; union { union nx_qw_t qw25[79]; /* qw[25:103] */ /* qw[25] compress no lzcounts or wrap */ uint32_t out_spbc_comp_wrap; uint32_t out_spbc_wrap; /* qw[25] wrap */ /* qw[25] compress no lzcounts */ uint32_t out_spbc_comp; /* 286 LL and 30 D symbol counts */ uint32_t out_lzcount[LLSZ+DSZ]; struct { union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */ /* qw[43] decompress */ uint32_t out_spbc_decomp; }; }; /* qw[104] compress with lzcounts */ uint32_t out_spbc_comp_with_count; }; } __aligned(128); struct nx_gzip_crb_t { union { /* byte[0:3] */ uint32_t gzip_fc; /* bits[24-31] */ }; uint32_t reserved1; /* byte[4:7] */ union { uint64_t csb_address; /* byte[8:15] */ struct { uint32_t reserved2; union { uint32_t crb_c; /* c==0 no ccb defined */ uint32_t crb_at; /* at==0 address type is ignored; * all addrs effective assumed. */ }; }; }; struct nx_dde_t source_dde; /* byte[16:31] */ struct nx_dde_t target_dde; /* byte[32:47] */ volatile struct nx_ccb_t ccb; /* byte[48:63] */ volatile union { /* byte[64:239] shift csb by 128 bytes out of the crb; csb was * in crb earlier; JReilly says csb written with partial inject */ union nx_qw_t reserved64[11]; union stamped_crb_t stamp; /* byte[64:79] */ }; volatile struct nx_csb_t csb; } __aligned(128); struct nx_gzip_crb_cpb_t { struct nx_gzip_crb_t crb; struct nx_gzip_cpb_t cpb; } __aligned(2048); /* * NX hardware convention has the msb bit on the left numbered 0. * The defines below has *_offset defined as the right most bit * position of a field. x of size_mask(x) is the field width in bits. */ #define size_mask(x) ((1U<<(x))-1) /* * Offsets and Widths within the containing 32 bits of the various NX * gzip hardware registers. Use the getnn/putnn macros to access * these regs */ #define dde_count_mask size_mask(8) #define dde_count_offset 23 /* CSB */ #define csb_v_mask size_mask(1) #define csb_v_offset 0 #define csb_f_mask size_mask(1) #define csb_f_offset 6 #define csb_cs_mask size_mask(8) #define csb_cs_offset 15 #define csb_cc_mask size_mask(8) #define csb_cc_offset 23 #define csb_ce_mask size_mask(8) #define csb_ce_offset 31 /* CCB */ #define ccb_cm_mask size_mask(3) #define ccb_cm_offset 31 /* VAS stamped CRB fields */ #define vas_buf_num_mask size_mask(6) #define vas_buf_num_offset 5 #define send_wc_id_mask size_mask(16) #define send_wc_id_offset 31 #define recv_wc_id_mask size_mask(16) #define recv_wc_id_offset 31 #define vas_invalid_mask size_mask(1) #define vas_invalid_offset 31 /* NX stamped fault CRB fields */ #define nxsf_t_mask size_mask(1) #define nxsf_t_offset 23 #define nxsf_fs_mask size_mask(8) #define nxsf_fs_offset 31 /* CPB input */ #define in_histlen_mask size_mask(12) #define in_histlen_offset 11 #define in_dhtlen_mask size_mask(12) #define in_dhtlen_offset 31 #define in_subc_mask size_mask(3) #define in_subc_offset 31 #define in_sfbt_mask size_mask(4) #define in_sfbt_offset 15 #define in_rembytecnt_mask size_mask(16) #define in_rembytecnt_offset 31 /* CPB output */ #define out_tebc_mask size_mask(3) #define out_tebc_offset 15 #define out_subc_mask size_mask(16) #define out_subc_offset 31 #define out_sfbt_mask size_mask(4) #define out_sfbt_offset 15 #define out_rembytecnt_mask size_mask(16) #define out_rembytecnt_offset 31 #define out_dhtlen_mask size_mask(12) #define out_dhtlen_offset 31 /* CRB */ #define gzip_fc_mask size_mask(8) #define gzip_fc_offset 31 #define crb_c_mask size_mask(1) #define crb_c_offset 28 #define crb_at_mask size_mask(1) #define crb_at_offset 30 #define csb_address_mask ~(15UL) /* mask off bottom 4b */ /* * Access macros for the registers. Do not access registers directly * because of the endian conversion. P9 processor may run either as * Little or Big endian. However the NX coprocessor regs are always * big endian. * Use the 32 and 64b macros to access respective * register sizes. * Use nn forms for the register fields shorter than 32 bits. */ #define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \ & REG##_mask) #define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \ & REG##_mask) #define get32(ST, REG) (be32toh(ST.REG)) #define getp32(ST, REG) (be32toh((ST)->REG)) #define get64(ST, REG) (be64toh(ST.REG)) #define getp64(ST, REG) (be64toh((ST)->REG)) #define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \ << (31-REG##_offset))) /* get 32bits less the REG field */ #define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \ << (31-REG##_offset))) /* get 32bits less the REG field */ #define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST)) #define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \ } while (0) #define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \ (ST)->ddead = 0; \ } while (0) #define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST)) #define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \ & REG##_mask) << (31-REG##_offset)))) #define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \ | (((X) & REG##_mask) << (31-REG##_offset)))) #define put32(ST, REG, X) (ST.REG = htobe32(X)) #define putp32(ST, REG, X) ((ST)->REG = htobe32(X)) #define put64(ST, REG, X) (ST.REG = htobe64(X)) #define putp64(ST, REG, X) ((ST)->REG = htobe64(X)) /* * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7) * unused. Section 6.6 Figure 6.7. */ #define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce)) #define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5) #define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5)) #define CSB_CE_PARTIAL 0x4 #define CSB_CE_TERMINATE 0x2 #define CSB_CE_TPBC_VALID 0x1 #define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE)) /* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */ #define csb_ce_check_completion(X) (!csb_ce_termination(X)) /* if not terminated then check full or partial completion */ #define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL)) #define csb_ce_full_completion(X) (!csb_ce_partial_completion(X)) #define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID)) /* TPBC indicates successfully stored data count */ #define csb_ce_default_err(X) csb_ce_termination(X) /* most error CEs have CE(0)=0 and CE(1)=1 */ #define csb_ce_cc3_partial(X) csb_ce_partial_completion(X) /* some CC=3 are partially completed, Table 6-8 */ #define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \ | CSB_CE_TERMINATE) == 0) /* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't * compress smaller than source. */ /* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */ #define SFBT_BFINAL 0x1 #define SFBT_LIT 0x4 #define SFBT_FHT 0x5 #define SFBT_DHT 0x6 #define SFBT_HDR 0x7 /* * NX gzip function codes. Table 6.2. * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to * select one of the two Byte Count Limits. */ #define GZIP_FC_LIMIT_MASK 0x01 #define GZIP_FC_COMPRESS_FHT 0x00 #define GZIP_FC_COMPRESS_DHT 0x02 #define GZIP_FC_COMPRESS_FHT_COUNT 0x04 #define GZIP_FC_COMPRESS_DHT_COUNT 0x06 #define GZIP_FC_COMPRESS_RESUME_FHT 0x08 #define GZIP_FC_COMPRESS_RESUME_DHT 0x0a #define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c #define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e #define GZIP_FC_DECOMPRESS 0x10 #define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12 #define GZIP_FC_DECOMPRESS_RESUME 0x14 #define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16 #define GZIP_FC_WRAP 0x1e #define fc_is_compress(fc) (((fc) & 0x10) == 0) #define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0)) /* CSB.CC Error codes */ #define ERR_NX_OK 0 #define ERR_NX_ALIGNMENT 1 #define ERR_NX_OPOVERLAP 2 #define ERR_NX_DATA_LENGTH 3 #define ERR_NX_TRANSLATION 5 #define ERR_NX_PROTECTION 6 #define ERR_NX_EXTERNAL_UE7 7 #define ERR_NX_INVALID_OP 8 #define ERR_NX_PRIVILEGE 9 #define ERR_NX_INTERNAL_UE 10 #define ERR_NX_EXTERN_UE_WR 12 #define ERR_NX_TARGET_SPACE 13 #define ERR_NX_EXCESSIVE_DDE 14 #define ERR_NX_TRANSL_WR 15 #define ERR_NX_PROTECT_WR 16 #define ERR_NX_SUBFUNCTION 17 #define ERR_NX_FUNC_ABORT 18 #define ERR_NX_BYTE_MAX 19 #define ERR_NX_CORRUPT_CRB 20 #define ERR_NX_INVALID_CRB 21 #define ERR_NX_INVALID_DDE 30 #define ERR_NX_SEGMENTED_DDL 31 #define ERR_NX_DDE_OVERFLOW 33 #define ERR_NX_TPBC_GT_SPBC 64 #define ERR_NX_MISSING_CODE 66 #define ERR_NX_INVALID_DIST 67 #define ERR_NX_INVALID_DHT 68 #define ERR_NX_EXTERNAL_UE90 90 #define ERR_NX_WDOG_TIMER 224 #define ERR_NX_AT_FAULT 250 #define ERR_NX_INTR_SERVER 252 #define ERR_NX_UE253 253 #define ERR_NX_NO_HW 254 #define ERR_NX_HUNG_OP 255 #define ERR_NX_END 256 /* initial values for non-resume operations */ #define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */ #define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */ /* prototypes */ int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle); extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx); extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr); /* caller supplies a print buffer 4*sizeof(crb) */ char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf); char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf); char *nx_prt_hex(void *cp, int sz, char *prbuf); char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf); char *nx_strerror(int e); #ifdef NX_SIM #include int nx_sim_init(void *ctx); int nx_sim_end(void *ctx); int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx); #endif /* NX_SIM */ /* Deflate stream manipulation */ #define set_final_bit(x) (x |= (unsigned char)1) #define clr_final_bit(x) (x &= ~(unsigned char)1) #define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \ } while (0) /* append 10 bits 0000001b 00...... ; * assumes appending starts on a byte boundary; b is the final bit. */ #ifdef NX_842 /* 842 Engine */ struct nx_eft_crb_t { union { /* byte[0:3] */ uint32_t eft_fc; /* bits[29-31] */ }; uint32_t reserved1; /* byte[4:7] */ union { uint64_t csb_address; /* byte[8:15] */ struct { uint32_t reserved2; union { uint32_t crb_c; /* c==0 no ccb defined */ uint32_t crb_at; /* at==0 address type is ignored; * all addrs effective assumed. */ }; }; }; struct nx_dde_t source_dde; /* byte[16:31] */ struct nx_dde_t target_dde; /* byte[32:47] */ struct nx_ccb_t ccb; /* byte[48:63] */ union { union nx_qw_t reserved64[3]; /* byte[64:96] */ }; struct nx_csb_t csb; } __aligned(128); /* 842 CRB */ #define EFT_FC_MASK size_mask(3) #define EFT_FC_OFFSET 31 #define EFT_FC_COMPRESS 0x0 #define EFT_FC_COMPRESS_WITH_CRC 0x1 #define EFT_FC_DECOMPRESS 0x2 #define EFT_FC_DECOMPRESS_WITH_CRC 0x3 #define EFT_FC_BLK_DATA_MOVE 0x4 #endif /* NX_842 */ #endif /* _NXU_H */