Message ID | 20200327181610.13762-4-rzinsly@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | selftests/powerpc: Add NX-GZIP engine testcase | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (c6624071c338732402e8c726df6a4074473eaa0e) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 41 checks, 769 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Raphael Moreira Zinsly <rzinsly@linux.ibm.com> writes: > Add a compression testcase for the powerpc NX-GZIP engine. > > Signed-off-by: Bulent Abali <abali@us.ibm.com> > Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com> > --- > .../selftests/powerpc/nx-gzip/Makefile | 21 + > .../selftests/powerpc/nx-gzip/gzfht_test.c | 489 ++++++++++++++++++ > .../selftests/powerpc/nx-gzip/gzip_vas.c | 259 ++++++++++ > 3 files changed, 769 insertions(+) > create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile > create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c > create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c > > diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile > new file mode 100644 > index 000000000000..ab903f63bbbd > --- /dev/null > +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile > @@ -0,0 +1,21 @@ > +CC = gcc > +CFLAGS = -O3 > +INC = ./inc > +SRC = gzfht_test.c > +OBJ = $(SRC:.c=.o) > +TESTS = gzfht_test > +EXTRA_SOURCES = gzip_vas.c > + > +all: $(TESTS) > + > +$(OBJ): %.o: %.c > + $(CC) $(CFLAGS) -I$(INC) -c $< > + > +$(TESTS): $(OBJ) > + $(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES) > + > +run_tests: $(TESTS) > + ./gzfht_test gzip_vas.c > + > +clean: > + rm -f $(TESTS) *.o *~ *.gz > diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c > new file mode 100644 > index 000000000000..7a21c25f5611 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c > @@ -0,0 +1,489 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > + > +/* P9 gzip sample code for demonstrating the P9 NX hardware interface. > + * Not intended for productive uses or for performance or compression > + * ratio measurements. For simplicity of demonstration, this sample > + * code compresses in to fixed Huffman blocks only (Deflate btype=1) > + * and has very simple memory management. Dynamic Huffman blocks > + * (Deflate btype=2) are more involved as detailed in the user guide. > + * Note also that /dev/crypto/gzip, VAS and skiboot support are > + * required. > + * > + * Copyright 2020 IBM Corp. > + * > + * https://github.com/libnxz/power-gzip for zlib api and other utils > + * > + * Author: Bulent Abali <abali@us.ibm.com> > + * > + * Definitions of acronyms used here. See > + * P9 NX Gzip Accelerator User's Manual for details: > + * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf > + * > + * adler/crc: 32 bit checksums appended to stream tail > + * ce: completion extension > + * cpb: coprocessor parameter block (metadata) > + * crb: coprocessor request block (command) > + * csb: coprocessor status block (status) > + * dht: dynamic huffman table > + * dde: data descriptor element (address, length) > + * ddl: list of ddes > + * dh/fh: dynamic and fixed huffman types > + * fc: coprocessor function code > + * histlen: history/dictionary length > + * history: sliding window of up to 32KB of data > + * lzcount: Deflate LZ symbol counts > + * rembytecnt: remaining byte count > + * sfbt: source final block type; last block's type during decomp > + * spbc: source processed byte count > + * subc: source unprocessed bit count > + * tebc: target ending bit count; valid bits in the last byte > + * tpbc: target processed byte count > + * vas: virtual accelerator switch; the user mode interface > + */ > + > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <stdint.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/time.h> > +#include <sys/fcntl.h> > +#include <sys/mman.h> > +#include <endian.h> > +#include <bits/endian.h> > +#include <sys/ioctl.h> > +#include <assert.h> > +#include <errno.h> > +#include <signal.h> > +#include "nxu.h" > +#include "nx.h" > + > +int nx_dbg; > +FILE *nx_gzip_log; > +void *nx_fault_storage_address; > + > +#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) > +#define FNAME_MAX 1024 > +#define FEXT ".nx.gz" > + > +/* > + * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. > + */ > +static int compress_fht_sample(char *src, uint32_t srclen, char *dst, > + uint32_t dstlen, int with_count, > + struct nx_gzip_crb_cpb_t *cmdp, void *handle) > +{ > + int cc; > + uint32_t fc; > + > + assert(!!cmdp); > + > + put32(cmdp->crb, gzip_fc, 0); /* clear */ > + fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : > + GZIP_FC_COMPRESS_RESUME_FHT; > + putnn(cmdp->crb, gzip_fc, fc); > + putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ > + memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); > + > + /* Section 6.6 programming notes; spbc may be in two different > + * places depending on FC. > + */ > + if (!with_count) > + put32(cmdp->cpb, out_spbc_comp, 0); > + else > + put32(cmdp->cpb, out_spbc_comp_with_count, 0); > + > + /* Figure 6-3 6-4; CSB location */ > + put64(cmdp->crb, csb_address, 0); > + put64(cmdp->crb, csb_address, > + (uint64_t) &cmdp->crb.csb & csb_address_mask); > + > + /* Source direct dde (scatter-gather list) */ > + clear_dde(cmdp->crb.source_dde); > + putnn(cmdp->crb.source_dde, dde_count, 0); > + put32(cmdp->crb.source_dde, ddebc, srclen); > + put64(cmdp->crb.source_dde, ddead, (uint64_t) src); > + > + /* Target direct dde (scatter-gather list) */ > + clear_dde(cmdp->crb.target_dde); > + putnn(cmdp->crb.target_dde, dde_count, 0); > + put32(cmdp->crb.target_dde, ddebc, dstlen); > + put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); > + > + /* Submit the crb, the job descriptor, to the accelerator */ > + nxu_run_job(cmdp, handle); > + > + /* Poll for the csb.v bit; you should also consider sleeping > + * or interrupts. > + */ > + do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0); > + > + /* CC Table 6-8 */ > + cc = getnn(cmdp->crb.csb, csb_cc); > + > + return cc; > +} > + > +/* > + * Prepares a blank no filename no timestamp gzip header and returns > + * the number of bytes written to buf. > + * Gzip specification at https://tools.ietf.org/html/rfc1952 > + */ > +int gzip_header_blank(char *buf) > +{ > + int i = 0; > + > + buf[i++] = 0x1f; /* ID1 */ > + buf[i++] = 0x8b; /* ID2 */ > + buf[i++] = 0x08; /* CM */ > + buf[i++] = 0x00; /* FLG */ > + buf[i++] = 0x00; /* MTIME */ > + buf[i++] = 0x00; /* MTIME */ > + buf[i++] = 0x00; /* MTIME */ > + buf[i++] = 0x00; /* MTIME */ > + buf[i++] = 0x04; /* XFL 4=fastest */ > + buf[i++] = 0x03; /* OS UNIX */ > + > + return i; > +} > + > +/* Caller must free the allocated buffer return nonzero on error. */ > +int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) > +{ > + struct stat statbuf; > + FILE *fp; > + char *p; > + size_t num_bytes; > + > + if (stat(fname, &statbuf)) { > + perror(fname); > + return(-1); > + } > + fp = fopen(fname, "r"); > + if (fp == NULL) { > + perror(fname); > + return(-1); > + } > + assert(NULL != (p = (char *) malloc(statbuf.st_size))); > + num_bytes = fread(p, 1, statbuf.st_size, fp); > + if (ferror(fp) || (num_bytes != statbuf.st_size)) { > + perror(fname); > + return(-1); > + } > + *buf = p; > + *bufsize = num_bytes; > + return 0; > +} > + > +/* Returns nonzero on error */ > +int write_output_file(char *fname, char *buf, size_t bufsize) > +{ > + FILE *fp; > + size_t num_bytes; > + > + fp = fopen(fname, "w"); > + if (fp == NULL) { > + perror(fname); > + return(-1); > + } > + num_bytes = fwrite(buf, 1, bufsize, fp); > + if (ferror(fp) || (num_bytes != bufsize)) { > + perror(fname); > + return(-1); > + } > + fclose(fp); > + return 0; > +} > + > +/* > + * Z_SYNC_FLUSH as described in zlib.h. > + * Returns number of appended bytes > + */ > +int append_sync_flush(char *buf, int tebc, int final) > +{ > + uint64_t flush; > + int shift = (tebc & 0x7); > + > + if (tebc > 0) { > + /* Last byte is partially full */ > + buf = buf - 1; > + *buf = *buf & (unsigned char) ((1<<tebc)-1); > + } else > + *buf = 0; > + flush = ((0x1ULL & final) << shift) | *buf; > + shift = shift + 3; /* BFINAL and BTYPE written */ > + shift = (shift <= 8) ? 8 : 16; > + flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ > + shift = shift + 32; > + while (shift > 0) { > + *buf++ = (unsigned char) (flush & 0xffULL); > + flush = flush >> 8; > + shift = shift - 8; > + } > + return(((tebc > 5) || (tebc == 0)) ? 5 : 4); > +} > + > +/* > + * Fault in pages prior to NX job submission. wr=1 may be required to > + * touch writeable pages. System zero pages do not fault-in the page as > + * intended. Typically set wr=1 for NX target pages and set wr=0 for NX > + * source pages. > + */ > +static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr) > +{ > + char *begin = buf; > + char *end = (char *) buf + buf_len - 1; > + char t; > + > + assert(buf_len >= 0 && !!buf); > + > + NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf, > + (buf + buf_len), buf_len, wr)); > + > + if (buf_len <= 0 || buf == NULL) > + return -1; > + > + do { > + t = *begin; > + if (wr) > + *begin = t; > + begin = begin + page_len; > + } while (begin < end); > + > + /* When buf_sz is small or buf tail is in another page */ > + t = *end; > + if (wr) > + *end = t; > + > + return 0; > +} > + > +/* > + * Final deflate block bit. This call assumes the block > + * beginning is byte aligned. > + */ > +static void set_bfinal(void *buf, int bfinal) > +{ > + char *b = buf; > + > + if (bfinal) > + *b = *b | (unsigned char) 0x01; > + else > + *b = *b & (unsigned char) 0xfe; > +} > + > +int compress_file(int argc, char **argv, void *handle) > +{ > + char *inbuf, *outbuf, *srcbuf, *dstbuf; > + char outname[FNAME_MAX]; > + uint32_t srclen, dstlen; > + uint32_t flushlen, chunk; > + size_t inlen, outlen, dsttotlen, srctotlen; > + uint32_t crc, spbc, tpbc, tebc; > + int lzcounts = 0; > + int cc; > + int num_hdr_bytes; > + struct nx_gzip_crb_cpb_t *cmdp; > + uint32_t pagelen = 65536; > + int fault_tries = 50; > + > + cmdp = (void *)(uintptr_t) > + aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), > + sizeof(struct nx_gzip_crb_cpb_t)); > + > + if (argc != 2) { > + fprintf(stderr, "usage: %s <fname>\n", argv[0]); > + exit(-1); > + } > + if (read_alloc_input_file(argv[1], &inbuf, &inlen)) > + exit(-1); > + fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); > + > + /* Generous output buffer for header/trailer */ > + outlen = 2 * inlen + 1024; > + > + assert(NULL != (outbuf = (char *)malloc(outlen))); > + nx_touch_pages(outbuf, outlen, pagelen, 1); > + > + /* Compress piecemeal in smallish chunks */ > + chunk = 1<<22; > + > + /* Write the gzip header to the stream */ > + num_hdr_bytes = gzip_header_blank(outbuf); > + dstbuf = outbuf + num_hdr_bytes; > + outlen = outlen - num_hdr_bytes; > + dsttotlen = num_hdr_bytes; > + > + srcbuf = inbuf; > + srctotlen = 0; > + > + /* Init the CRB, the coprocessor request block */ > + memset(&cmdp->crb, 0, sizeof(cmdp->crb)); > + > + /* Initial gzip crc32 */ > + put32(cmdp->cpb, in_crc, 0); > + > + fault_tries = 50; > + > + while (inlen > 0) { > + > + /* Submit chunk size source data per job */ > + srclen = NX_MIN(chunk, inlen); > + /* Supply large target in case data expands */ > + dstlen = NX_MIN(2*srclen, outlen); > + > + /* Page faults are handled by the user code */ > + > + /* Fault-in pages; an improved code wouldn't touch so > + * many pages but would try to estimate the > + * compression ratio and adjust both the src and dst > + * touch amounts. > + */ > + nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, > + 1); > + nx_touch_pages(srcbuf, srclen, pagelen, 0); > + nx_touch_pages(dstbuf, dstlen, pagelen, 1); > + > + cc = compress_fht_sample( > + srcbuf, srclen, > + dstbuf, dstlen, > + lzcounts, cmdp, handle); > + > + if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && > + cc != ERR_NX_TRANSLATION) { > + fprintf(stderr, "nx error: cc= %d\n", cc); > + exit(-1); > + } > + > + /* Page faults are handled by the user code */ > + if (cc == ERR_NX_TRANSLATION) { > + NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); > + NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", > + fault_tries, > + (unsigned long long) cmdp->crb.csb.fsaddr)); > + > + fault_tries--; > + if (fault_tries > 0) { > + continue; > + } else { > + fprintf(stderr, "error: cannot progress; "); > + fprintf(stderr, "too many faults\n"); > + exit(-1); > + }; > + } > + > + fault_tries = 50; /* Reset for the next chunk */ > + > + inlen = inlen - srclen; > + srcbuf = srcbuf + srclen; > + srctotlen = srctotlen + srclen; > + > + /* Two possible locations for spbc depending on the function > + * code. > + */ > + spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : > + get32(cmdp->cpb, out_spbc_comp_with_count); > + assert(spbc == srclen); > + > + /* Target byte count */ > + tpbc = get32(cmdp->crb.csb, tpbc); > + /* Target ending bit count */ > + tebc = getnn(cmdp->cpb, out_tebc); > + NXPRT(fprintf(stderr, "compressed chunk %d " spbc)); > + NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); > + > + if (inlen > 0) { /* More chunks to go */ > + set_bfinal(dstbuf, 0); > + dstbuf = dstbuf + tpbc; > + dsttotlen = dsttotlen + tpbc; > + outlen = outlen - tpbc; > + /* Round up to the next byte with a flush > + * block; do not set the BFINAqL bit. > + */ > + flushlen = append_sync_flush(dstbuf, tebc, 0); > + dsttotlen = dsttotlen + flushlen; > + outlen = outlen - flushlen; > + dstbuf = dstbuf + flushlen; > + NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", > + flushlen)); > + } else { /* Done */ > + /* Set the BFINAL bit of the last block per Deflate > + * specification. > + */ > + set_bfinal(dstbuf, 1); > + dstbuf = dstbuf + tpbc; > + dsttotlen = dsttotlen + tpbc; > + outlen = outlen - tpbc; > + } > + > + /* Resuming crc32 for the next chunk */ > + crc = get32(cmdp->cpb, out_crc); > + put32(cmdp->cpb, in_crc, crc); > + crc = be32toh(crc); > + } > + > + /* Append crc32 and ISIZE to the end */ > + memcpy(dstbuf, &crc, 4); > + memcpy(dstbuf+4, &srctotlen, 4); > + dsttotlen = dsttotlen + 8; > + outlen = outlen - 8; > + > + assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); > + strcpy(outname, argv[1]); > + strcat(outname, FEXT); > + if (write_output_file(outname, outbuf, dsttotlen)) { > + fprintf(stderr, "write error: %s\n", outname); > + exit(-1); > + } > + > + fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, > + dsttotlen); > + fprintf(stderr, "crc32 checksum = %08x\n", crc); > + > + if (inbuf != NULL) > + free(inbuf); > + > + if (outbuf != NULL) > + free(outbuf); > + > + return 0; > +} > + > +void sigsegv_handler(int sig, siginfo_t *info, void *ctx) > +{ > + fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(), > + sig, info->si_code, info->si_addr); > + > + nx_fault_storage_address = info->si_addr; > +} > + > +int main(int argc, char **argv) > +{ > + int rc; > + struct sigaction act; > + void *handle; > + > + nx_dbg = 0; > + nx_gzip_log = NULL; > + act.sa_handler = 0; > + act.sa_sigaction = sigsegv_handler; > + act.sa_flags = SA_SIGINFO; > + act.sa_restorer = 0; > + sigemptyset(&act.sa_mask); > + sigaction(SIGSEGV, &act, NULL); > + > + handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); > + if (!handle) { > + fprintf(stderr, "Unable to init NX, errno %d\n", errno); > + exit(-1); > + } > + > + rc = compress_file(argc, argv, handle); > + > + nx_function_end(handle); > + > + return rc; > +} > diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c > new file mode 100644 > index 000000000000..d28e1efb527b > --- /dev/null > +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c > @@ -0,0 +1,259 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > + > +/* Copyright 2020 IBM Corp. > + * > + * Author: Bulent Abali <abali@us.ibm.com> > + * > + */ > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <stdint.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/time.h> > +#include <sys/fcntl.h> > +#include <sys/mman.h> > +#include <endian.h> > +#include <bits/endian.h> > +#include <sys/ioctl.h> > +#include <assert.h> > +#include <errno.h> > +#include <signal.h> > +#include "nx-gzip.h" > +#include "nx.h" > +#include "copy-paste.h" > +#include "nxu.h" > +#include "nx_dbg.h" > +#include <sys/platform/ppc.h> > + > +#define barrier() > +#define hwsync() ({ asm volatile("hwsync" ::: "memory"); }) This doesn't compile on the clang version I tried as it doesn't recognise 'hwsync'. Does asm volatile("sync" ::: "memory"); do the same thing? That is recognised by clang, but I don't know if dropping the hw prefix matters! Regards, Daniel > + > +#ifndef NX_NO_CPU_PRI > +#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); }) > +#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); }) > +#else > +#define cpu_pri_default() > +#define cpu_pri_low() > +#endif > + > +void *nx_fault_storage_address; > + > +struct nx_handle { > + int fd; > + int function; > + void *paste_addr; > +}; > + > +static int open_device_nodes(char *devname, int pri, struct nx_handle *handle) > +{ > + int rc, fd; > + void *addr; > + struct vas_gzip_setup_attr txattr; > + > + fd = open(devname, O_RDWR); > + if (fd < 0) { > + fprintf(stderr, " open device name %s\n", devname); > + return -errno; > + } > + > + memset(&txattr, 0, sizeof(txattr)); > + txattr.version = 1; > + txattr.vas_id = pri; > + rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr); > + if (rc < 0) { > + fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno); > + rc = -errno; > + goto out; > + } > + > + addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL); > + if (addr == MAP_FAILED) { > + fprintf(stderr, "mmap() failed, errno %d\n", errno); > + rc = -errno; > + goto out; > + } > + handle->fd = fd; > + handle->paste_addr = (void *)((char *)addr + 0x400); > + > + rc = 0; > +out: > + close(fd); > + return rc; > +} > + > +void *nx_function_begin(int function, int pri) > +{ > + int rc; > + char *devname = "/dev/crypto/nx-gzip"; > + struct nx_handle *nxhandle; > + > + if (function != NX_FUNC_COMP_GZIP) { > + errno = EINVAL; > + fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n"); > + return NULL; > + } > + > + > + nxhandle = malloc(sizeof(*nxhandle)); > + if (!nxhandle) { > + errno = ENOMEM; > + fprintf(stderr, " No memory\n"); > + return NULL; > + } > + > + nxhandle->function = function; > + rc = open_device_nodes(devname, pri, nxhandle); > + if (rc < 0) { > + errno = -rc; > + fprintf(stderr, " open_device_nodes failed\n"); > + return NULL; > + } > + > + return nxhandle; > +} > + > +int nx_function_end(void *handle) > +{ > + int rc = 0; > + struct nx_handle *nxhandle = handle; > + > + rc = munmap(nxhandle->paste_addr - 0x400, 4096); > + if (rc < 0) { > + fprintf(stderr, "munmap() failed, errno %d\n", errno); > + return rc; > + } > + close(nxhandle->fd); > + free(nxhandle); > + > + return rc; > +} > + > +static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp) > +{ > + long poll = 0; > + uint64_t t; > + > + /* Save power and let other threads use the h/w. top may show > + * 100% but only because OS doesn't know we slowed the this > + * h/w thread while polling. We're letting other threads have > + * higher throughput on the core. > + */ > + cpu_pri_low(); > + > +#define CSB_MAX_POLL 200000000UL > +#define USLEEP_TH 300000UL > + > + t = __ppc_get_timebase(); > + > + while (getnn(cmdp->crb.csb, csb_v) == 0) { > + ++poll; > + hwsync(); > + > + cpu_pri_low(); > + > + /* usleep(0) takes around 29000 ticks ~60 us. > + * 300000 is spinning for about 600 us then > + * start sleeping. > + */ > + if ((__ppc_get_timebase() - t) > USLEEP_TH) { > + cpu_pri_default(); > + usleep(1); > + } > + > + if (poll > CSB_MAX_POLL) > + break; > + > + /* Fault address from signal handler */ > + if (nx_fault_storage_address) { > + cpu_pri_default(); > + return -EAGAIN; > + } > + > + } > + > + cpu_pri_default(); > + > + /* hw has updated csb and output buffer */ > + hwsync(); > + > + /* Check CSB flags. */ > + if (getnn(cmdp->crb.csb, csb_v) == 0) { > + fprintf(stderr, "CSB still not valid after %d polls.\n", > + (int) poll); > + prt_err("CSB still not valid after %d polls, giving up.\n", > + (int) poll); > + return -ETIMEDOUT; > + } > + > + return 0; > +} > + > +int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle) > +{ > + int i, ret, retries; > + struct nx_handle *nxhandle = handle; > + > + assert(handle != NULL); > + i = 0; > + retries = 5000; > + while (i++ < retries) { > + hwsync(); > + vas_copy(&cmdp->crb, 0); > + ret = vas_paste(nxhandle->paste_addr, 0); > + hwsync(); > + > + NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n", > + i, retries, ret)); > + > + if ((ret == 2) || (ret == 3)) { > + > + ret = nx_wait_for_csb(cmdp); > + if (!ret) { > + goto out; > + } else if (ret == -EAGAIN) { > + long x; > + > + prt_err("Touching address %p, 0x%lx\n", > + nx_fault_storage_address, > + *(long *) nx_fault_storage_address); > + x = *(long *) nx_fault_storage_address; > + *(long *) nx_fault_storage_address = x; > + nx_fault_storage_address = 0; > + continue; > + } else { > + prt_err("wait_for_csb() returns %d\n", ret); > + break; > + } > + } else { > + if (i < 10) { > + /* spin for few ticks */ > +#define SPIN_TH 500UL > + uint64_t fail_spin; > + > + fail_spin = __ppc_get_timebase(); > + while ((__ppc_get_timebase() - fail_spin) < > + SPIN_TH) > + ; > + } else { > + /* sleep */ > + unsigned int pr = 0; > + > + if (pr++ % 100 == 0) { > + prt_err("Paste attempt %d/", i); > + prt_err("%d, failed pid= %d\n", retries, > + getpid()); > + } > + usleep(1); > + } > + continue; > + } > + } > + > +out: > + cpu_pri_default(); > + > + return ret; > +} > -- > 2.21.0
Daniel Axtens <dja@axtens.net> writes: > Raphael Moreira Zinsly <rzinsly@linux.ibm.com> writes: > >> Add a compression testcase for the powerpc NX-GZIP engine. >> >> Signed-off-by: Bulent Abali <abali@us.ibm.com> >> Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com> ... >> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c >> new file mode 100644 >> index 000000000000..d28e1efb527b >> --- /dev/null >> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c >> @@ -0,0 +1,259 @@ >> +// SPDX-License-Identifier: GPL-2.0-or-later >> + >> +/* Copyright 2020 IBM Corp. >> + * >> + * Author: Bulent Abali <abali@us.ibm.com> >> + * >> + */ >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <string.h> >> +#include <unistd.h> >> +#include <stdint.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <sys/time.h> >> +#include <sys/fcntl.h> >> +#include <sys/mman.h> >> +#include <endian.h> >> +#include <bits/endian.h> >> +#include <sys/ioctl.h> >> +#include <assert.h> >> +#include <errno.h> >> +#include <signal.h> >> +#include "nx-gzip.h" >> +#include "nx.h" >> +#include "copy-paste.h" >> +#include "nxu.h" >> +#include "nx_dbg.h" >> +#include <sys/platform/ppc.h> >> + >> +#define barrier() >> +#define hwsync() ({ asm volatile("hwsync" ::: "memory"); }) > > This doesn't compile on the clang version I tried as it doesn't > recognise 'hwsync'. What assembler are you using? I guess the LLVM one? Can you try with -fno-integrated-as ? > Does asm volatile("sync" ::: "memory"); > do the same thing? That is recognised by clang, but I don't know if > dropping the hw prefix matters! It shouldn't matter. But you can just try it and look at the generated code to be sure, you should get 0x7c0004ac. cheers
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile new file mode 100644 index 000000000000..ab903f63bbbd --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -0,0 +1,21 @@ +CC = gcc +CFLAGS = -O3 +INC = ./inc +SRC = gzfht_test.c +OBJ = $(SRC:.c=.o) +TESTS = gzfht_test +EXTRA_SOURCES = gzip_vas.c + +all: $(TESTS) + +$(OBJ): %.o: %.c + $(CC) $(CFLAGS) -I$(INC) -c $< + +$(TESTS): $(OBJ) + $(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES) + +run_tests: $(TESTS) + ./gzfht_test gzip_vas.c + +clean: + rm -f $(TESTS) *.o *~ *.gz diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c new file mode 100644 index 000000000000..7a21c25f5611 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* P9 gzip sample code for demonstrating the P9 NX hardware interface. + * Not intended for productive uses or for performance or compression + * ratio measurements. For simplicity of demonstration, this sample + * code compresses in to fixed Huffman blocks only (Deflate btype=1) + * and has very simple memory management. Dynamic Huffman blocks + * (Deflate btype=2) are more involved as detailed in the user guide. + * Note also that /dev/crypto/gzip, VAS and skiboot support are + * required. + * + * Copyright 2020 IBM Corp. + * + * https://github.com/libnxz/power-gzip for zlib api and other utils + * + * Author: Bulent Abali <abali@us.ibm.com> + * + * Definitions of acronyms used here. See + * P9 NX Gzip Accelerator User's Manual for details: + * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf + * + * adler/crc: 32 bit checksums appended to stream tail + * ce: completion extension + * cpb: coprocessor parameter block (metadata) + * crb: coprocessor request block (command) + * csb: coprocessor status block (status) + * dht: dynamic huffman table + * dde: data descriptor element (address, length) + * ddl: list of ddes + * dh/fh: dynamic and fixed huffman types + * fc: coprocessor function code + * histlen: history/dictionary length + * history: sliding window of up to 32KB of data + * lzcount: Deflate LZ symbol counts + * rembytecnt: remaining byte count + * sfbt: source final block type; last block's type during decomp + * spbc: source processed byte count + * subc: source unprocessed bit count + * tebc: target ending bit count; valid bits in the last byte + * tpbc: target processed byte count + * vas: virtual accelerator switch; the user mode interface + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/fcntl.h> +#include <sys/mman.h> +#include <endian.h> +#include <bits/endian.h> +#include <sys/ioctl.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include "nxu.h" +#include "nx.h" + +int nx_dbg; +FILE *nx_gzip_log; +void *nx_fault_storage_address; + +#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) +#define FNAME_MAX 1024 +#define FEXT ".nx.gz" + +/* + * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. + */ +static int compress_fht_sample(char *src, uint32_t srclen, char *dst, + uint32_t dstlen, int with_count, + struct nx_gzip_crb_cpb_t *cmdp, void *handle) +{ + int cc; + uint32_t fc; + + assert(!!cmdp); + + put32(cmdp->crb, gzip_fc, 0); /* clear */ + fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : + GZIP_FC_COMPRESS_RESUME_FHT; + putnn(cmdp->crb, gzip_fc, fc); + putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ + memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); + + /* Section 6.6 programming notes; spbc may be in two different + * places depending on FC. + */ + if (!with_count) + put32(cmdp->cpb, out_spbc_comp, 0); + else + put32(cmdp->cpb, out_spbc_comp_with_count, 0); + + /* Figure 6-3 6-4; CSB location */ + put64(cmdp->crb, csb_address, 0); + put64(cmdp->crb, csb_address, + (uint64_t) &cmdp->crb.csb & csb_address_mask); + + /* Source direct dde (scatter-gather list) */ + clear_dde(cmdp->crb.source_dde); + putnn(cmdp->crb.source_dde, dde_count, 0); + put32(cmdp->crb.source_dde, ddebc, srclen); + put64(cmdp->crb.source_dde, ddead, (uint64_t) src); + + /* Target direct dde (scatter-gather list) */ + clear_dde(cmdp->crb.target_dde); + putnn(cmdp->crb.target_dde, dde_count, 0); + put32(cmdp->crb.target_dde, ddebc, dstlen); + put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); + + /* Submit the crb, the job descriptor, to the accelerator */ + nxu_run_job(cmdp, handle); + + /* Poll for the csb.v bit; you should also consider sleeping + * or interrupts. + */ + do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0); + + /* CC Table 6-8 */ + cc = getnn(cmdp->crb.csb, csb_cc); + + return cc; +} + +/* + * Prepares a blank no filename no timestamp gzip header and returns + * the number of bytes written to buf. + * Gzip specification at https://tools.ietf.org/html/rfc1952 + */ +int gzip_header_blank(char *buf) +{ + int i = 0; + + buf[i++] = 0x1f; /* ID1 */ + buf[i++] = 0x8b; /* ID2 */ + buf[i++] = 0x08; /* CM */ + buf[i++] = 0x00; /* FLG */ + buf[i++] = 0x00; /* MTIME */ + buf[i++] = 0x00; /* MTIME */ + buf[i++] = 0x00; /* MTIME */ + buf[i++] = 0x00; /* MTIME */ + buf[i++] = 0x04; /* XFL 4=fastest */ + buf[i++] = 0x03; /* OS UNIX */ + + return i; +} + +/* Caller must free the allocated buffer return nonzero on error. */ +int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) +{ + struct stat statbuf; + FILE *fp; + char *p; + size_t num_bytes; + + if (stat(fname, &statbuf)) { + perror(fname); + return(-1); + } + fp = fopen(fname, "r"); + if (fp == NULL) { + perror(fname); + return(-1); + } + assert(NULL != (p = (char *) malloc(statbuf.st_size))); + num_bytes = fread(p, 1, statbuf.st_size, fp); + if (ferror(fp) || (num_bytes != statbuf.st_size)) { + perror(fname); + return(-1); + } + *buf = p; + *bufsize = num_bytes; + return 0; +} + +/* Returns nonzero on error */ +int write_output_file(char *fname, char *buf, size_t bufsize) +{ + FILE *fp; + size_t num_bytes; + + fp = fopen(fname, "w"); + if (fp == NULL) { + perror(fname); + return(-1); + } + num_bytes = fwrite(buf, 1, bufsize, fp); + if (ferror(fp) || (num_bytes != bufsize)) { + perror(fname); + return(-1); + } + fclose(fp); + return 0; +} + +/* + * Z_SYNC_FLUSH as described in zlib.h. + * Returns number of appended bytes + */ +int append_sync_flush(char *buf, int tebc, int final) +{ + uint64_t flush; + int shift = (tebc & 0x7); + + if (tebc > 0) { + /* Last byte is partially full */ + buf = buf - 1; + *buf = *buf & (unsigned char) ((1<<tebc)-1); + } else + *buf = 0; + flush = ((0x1ULL & final) << shift) | *buf; + shift = shift + 3; /* BFINAL and BTYPE written */ + shift = (shift <= 8) ? 8 : 16; + flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ + shift = shift + 32; + while (shift > 0) { + *buf++ = (unsigned char) (flush & 0xffULL); + flush = flush >> 8; + shift = shift - 8; + } + return(((tebc > 5) || (tebc == 0)) ? 5 : 4); +} + +/* + * Fault in pages prior to NX job submission. wr=1 may be required to + * touch writeable pages. System zero pages do not fault-in the page as + * intended. Typically set wr=1 for NX target pages and set wr=0 for NX + * source pages. + */ +static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr) +{ + char *begin = buf; + char *end = (char *) buf + buf_len - 1; + char t; + + assert(buf_len >= 0 && !!buf); + + NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf, + (buf + buf_len), buf_len, wr)); + + if (buf_len <= 0 || buf == NULL) + return -1; + + do { + t = *begin; + if (wr) + *begin = t; + begin = begin + page_len; + } while (begin < end); + + /* When buf_sz is small or buf tail is in another page */ + t = *end; + if (wr) + *end = t; + + return 0; +} + +/* + * Final deflate block bit. This call assumes the block + * beginning is byte aligned. + */ +static void set_bfinal(void *buf, int bfinal) +{ + char *b = buf; + + if (bfinal) + *b = *b | (unsigned char) 0x01; + else + *b = *b & (unsigned char) 0xfe; +} + +int compress_file(int argc, char **argv, void *handle) +{ + char *inbuf, *outbuf, *srcbuf, *dstbuf; + char outname[FNAME_MAX]; + uint32_t srclen, dstlen; + uint32_t flushlen, chunk; + size_t inlen, outlen, dsttotlen, srctotlen; + uint32_t crc, spbc, tpbc, tebc; + int lzcounts = 0; + int cc; + int num_hdr_bytes; + struct nx_gzip_crb_cpb_t *cmdp; + uint32_t pagelen = 65536; + int fault_tries = 50; + + cmdp = (void *)(uintptr_t) + aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), + sizeof(struct nx_gzip_crb_cpb_t)); + + if (argc != 2) { + fprintf(stderr, "usage: %s <fname>\n", argv[0]); + exit(-1); + } + if (read_alloc_input_file(argv[1], &inbuf, &inlen)) + exit(-1); + fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); + + /* Generous output buffer for header/trailer */ + outlen = 2 * inlen + 1024; + + assert(NULL != (outbuf = (char *)malloc(outlen))); + nx_touch_pages(outbuf, outlen, pagelen, 1); + + /* Compress piecemeal in smallish chunks */ + chunk = 1<<22; + + /* Write the gzip header to the stream */ + num_hdr_bytes = gzip_header_blank(outbuf); + dstbuf = outbuf + num_hdr_bytes; + outlen = outlen - num_hdr_bytes; + dsttotlen = num_hdr_bytes; + + srcbuf = inbuf; + srctotlen = 0; + + /* Init the CRB, the coprocessor request block */ + memset(&cmdp->crb, 0, sizeof(cmdp->crb)); + + /* Initial gzip crc32 */ + put32(cmdp->cpb, in_crc, 0); + + fault_tries = 50; + + while (inlen > 0) { + + /* Submit chunk size source data per job */ + srclen = NX_MIN(chunk, inlen); + /* Supply large target in case data expands */ + dstlen = NX_MIN(2*srclen, outlen); + + /* Page faults are handled by the user code */ + + /* Fault-in pages; an improved code wouldn't touch so + * many pages but would try to estimate the + * compression ratio and adjust both the src and dst + * touch amounts. + */ + nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, + 1); + nx_touch_pages(srcbuf, srclen, pagelen, 0); + nx_touch_pages(dstbuf, dstlen, pagelen, 1); + + cc = compress_fht_sample( + srcbuf, srclen, + dstbuf, dstlen, + lzcounts, cmdp, handle); + + if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && + cc != ERR_NX_TRANSLATION) { + fprintf(stderr, "nx error: cc= %d\n", cc); + exit(-1); + } + + /* Page faults are handled by the user code */ + if (cc == ERR_NX_TRANSLATION) { + NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); + NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", + fault_tries, + (unsigned long long) cmdp->crb.csb.fsaddr)); + + fault_tries--; + if (fault_tries > 0) { + continue; + } else { + fprintf(stderr, "error: cannot progress; "); + fprintf(stderr, "too many faults\n"); + exit(-1); + }; + } + + fault_tries = 50; /* Reset for the next chunk */ + + inlen = inlen - srclen; + srcbuf = srcbuf + srclen; + srctotlen = srctotlen + srclen; + + /* Two possible locations for spbc depending on the function + * code. + */ + spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : + get32(cmdp->cpb, out_spbc_comp_with_count); + assert(spbc == srclen); + + /* Target byte count */ + tpbc = get32(cmdp->crb.csb, tpbc); + /* Target ending bit count */ + tebc = getnn(cmdp->cpb, out_tebc); + NXPRT(fprintf(stderr, "compressed chunk %d " spbc)); + NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); + + if (inlen > 0) { /* More chunks to go */ + set_bfinal(dstbuf, 0); + dstbuf = dstbuf + tpbc; + dsttotlen = dsttotlen + tpbc; + outlen = outlen - tpbc; + /* Round up to the next byte with a flush + * block; do not set the BFINAqL bit. + */ + flushlen = append_sync_flush(dstbuf, tebc, 0); + dsttotlen = dsttotlen + flushlen; + outlen = outlen - flushlen; + dstbuf = dstbuf + flushlen; + NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", + flushlen)); + } else { /* Done */ + /* Set the BFINAL bit of the last block per Deflate + * specification. + */ + set_bfinal(dstbuf, 1); + dstbuf = dstbuf + tpbc; + dsttotlen = dsttotlen + tpbc; + outlen = outlen - tpbc; + } + + /* Resuming crc32 for the next chunk */ + crc = get32(cmdp->cpb, out_crc); + put32(cmdp->cpb, in_crc, crc); + crc = be32toh(crc); + } + + /* Append crc32 and ISIZE to the end */ + memcpy(dstbuf, &crc, 4); + memcpy(dstbuf+4, &srctotlen, 4); + dsttotlen = dsttotlen + 8; + outlen = outlen - 8; + + assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); + strcpy(outname, argv[1]); + strcat(outname, FEXT); + if (write_output_file(outname, outbuf, dsttotlen)) { + fprintf(stderr, "write error: %s\n", outname); + exit(-1); + } + + fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, + dsttotlen); + fprintf(stderr, "crc32 checksum = %08x\n", crc); + + if (inbuf != NULL) + free(inbuf); + + if (outbuf != NULL) + free(outbuf); + + return 0; +} + +void sigsegv_handler(int sig, siginfo_t *info, void *ctx) +{ + fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(), + sig, info->si_code, info->si_addr); + + nx_fault_storage_address = info->si_addr; +} + +int main(int argc, char **argv) +{ + int rc; + struct sigaction act; + void *handle; + + nx_dbg = 0; + nx_gzip_log = NULL; + act.sa_handler = 0; + act.sa_sigaction = sigsegv_handler; + act.sa_flags = SA_SIGINFO; + act.sa_restorer = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, NULL); + + handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); + if (!handle) { + fprintf(stderr, "Unable to init NX, errno %d\n", errno); + exit(-1); + } + + rc = compress_file(argc, argv, handle); + + nx_function_end(handle); + + return rc; +} diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c new file mode 100644 index 000000000000..d28e1efb527b --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Copyright 2020 IBM Corp. + * + * Author: Bulent Abali <abali@us.ibm.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/fcntl.h> +#include <sys/mman.h> +#include <endian.h> +#include <bits/endian.h> +#include <sys/ioctl.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include "nx-gzip.h" +#include "nx.h" +#include "copy-paste.h" +#include "nxu.h" +#include "nx_dbg.h" +#include <sys/platform/ppc.h> + +#define barrier() +#define hwsync() ({ asm volatile("hwsync" ::: "memory"); }) + +#ifndef NX_NO_CPU_PRI +#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); }) +#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); }) +#else +#define cpu_pri_default() +#define cpu_pri_low() +#endif + +void *nx_fault_storage_address; + +struct nx_handle { + int fd; + int function; + void *paste_addr; +}; + +static int open_device_nodes(char *devname, int pri, struct nx_handle *handle) +{ + int rc, fd; + void *addr; + struct vas_gzip_setup_attr txattr; + + fd = open(devname, O_RDWR); + if (fd < 0) { + fprintf(stderr, " open device name %s\n", devname); + return -errno; + } + + memset(&txattr, 0, sizeof(txattr)); + txattr.version = 1; + txattr.vas_id = pri; + rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr); + if (rc < 0) { + fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno); + rc = -errno; + goto out; + } + + addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL); + if (addr == MAP_FAILED) { + fprintf(stderr, "mmap() failed, errno %d\n", errno); + rc = -errno; + goto out; + } + handle->fd = fd; + handle->paste_addr = (void *)((char *)addr + 0x400); + + rc = 0; +out: + close(fd); + return rc; +} + +void *nx_function_begin(int function, int pri) +{ + int rc; + char *devname = "/dev/crypto/nx-gzip"; + struct nx_handle *nxhandle; + + if (function != NX_FUNC_COMP_GZIP) { + errno = EINVAL; + fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n"); + return NULL; + } + + + nxhandle = malloc(sizeof(*nxhandle)); + if (!nxhandle) { + errno = ENOMEM; + fprintf(stderr, " No memory\n"); + return NULL; + } + + nxhandle->function = function; + rc = open_device_nodes(devname, pri, nxhandle); + if (rc < 0) { + errno = -rc; + fprintf(stderr, " open_device_nodes failed\n"); + return NULL; + } + + return nxhandle; +} + +int nx_function_end(void *handle) +{ + int rc = 0; + struct nx_handle *nxhandle = handle; + + rc = munmap(nxhandle->paste_addr - 0x400, 4096); + if (rc < 0) { + fprintf(stderr, "munmap() failed, errno %d\n", errno); + return rc; + } + close(nxhandle->fd); + free(nxhandle); + + return rc; +} + +static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp) +{ + long poll = 0; + uint64_t t; + + /* Save power and let other threads use the h/w. top may show + * 100% but only because OS doesn't know we slowed the this + * h/w thread while polling. We're letting other threads have + * higher throughput on the core. + */ + cpu_pri_low(); + +#define CSB_MAX_POLL 200000000UL +#define USLEEP_TH 300000UL + + t = __ppc_get_timebase(); + + while (getnn(cmdp->crb.csb, csb_v) == 0) { + ++poll; + hwsync(); + + cpu_pri_low(); + + /* usleep(0) takes around 29000 ticks ~60 us. + * 300000 is spinning for about 600 us then + * start sleeping. + */ + if ((__ppc_get_timebase() - t) > USLEEP_TH) { + cpu_pri_default(); + usleep(1); + } + + if (poll > CSB_MAX_POLL) + break; + + /* Fault address from signal handler */ + if (nx_fault_storage_address) { + cpu_pri_default(); + return -EAGAIN; + } + + } + + cpu_pri_default(); + + /* hw has updated csb and output buffer */ + hwsync(); + + /* Check CSB flags. */ + if (getnn(cmdp->crb.csb, csb_v) == 0) { + fprintf(stderr, "CSB still not valid after %d polls.\n", + (int) poll); + prt_err("CSB still not valid after %d polls, giving up.\n", + (int) poll); + return -ETIMEDOUT; + } + + return 0; +} + +int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle) +{ + int i, ret, retries; + struct nx_handle *nxhandle = handle; + + assert(handle != NULL); + i = 0; + retries = 5000; + while (i++ < retries) { + hwsync(); + vas_copy(&cmdp->crb, 0); + ret = vas_paste(nxhandle->paste_addr, 0); + hwsync(); + + NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n", + i, retries, ret)); + + if ((ret == 2) || (ret == 3)) { + + ret = nx_wait_for_csb(cmdp); + if (!ret) { + goto out; + } else if (ret == -EAGAIN) { + long x; + + prt_err("Touching address %p, 0x%lx\n", + nx_fault_storage_address, + *(long *) nx_fault_storage_address); + x = *(long *) nx_fault_storage_address; + *(long *) nx_fault_storage_address = x; + nx_fault_storage_address = 0; + continue; + } else { + prt_err("wait_for_csb() returns %d\n", ret); + break; + } + } else { + if (i < 10) { + /* spin for few ticks */ +#define SPIN_TH 500UL + uint64_t fail_spin; + + fail_spin = __ppc_get_timebase(); + while ((__ppc_get_timebase() - fail_spin) < + SPIN_TH) + ; + } else { + /* sleep */ + unsigned int pr = 0; + + if (pr++ % 100 == 0) { + prt_err("Paste attempt %d/", i); + prt_err("%d, failed pid= %d\n", retries, + getpid()); + } + usleep(1); + } + continue; + } + } + +out: + cpu_pri_default(); + + return ret; +}