summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbenno <benno@openbsd.org>2019-02-10 23:18:28 +0000
committerbenno <benno@openbsd.org>2019-02-10 23:18:28 +0000
commit60a32ee9289593d1c350423a61f21dab436b3e7f (patch)
treee5b7c139d6c2987f1cdc502bc8edde00dea9b429
parentRevert previous. requested by deraadt@ (diff)
downloadwireguard-openbsd-60a32ee9289593d1c350423a61f21dab436b3e7f.tar.xz
wireguard-openbsd-60a32ee9289593d1c350423a61f21dab436b3e7f.zip
Import Kristaps' openrsync into the tree.
OK deraadt@
-rw-r--r--usr.bin/rsync/Makefile57
-rw-r--r--usr.bin/rsync/TODO.md42
-rw-r--r--usr.bin/rsync/blocks.c678
-rw-r--r--usr.bin/rsync/child.c72
-rw-r--r--usr.bin/rsync/client.c106
-rw-r--r--usr.bin/rsync/downloader.c600
-rw-r--r--usr.bin/rsync/extern.h295
-rw-r--r--usr.bin/rsync/fargs.c104
-rw-r--r--usr.bin/rsync/flist.c1160
-rw-r--r--usr.bin/rsync/hash.c94
-rw-r--r--usr.bin/rsync/io.c585
-rw-r--r--usr.bin/rsync/log.c194
-rw-r--r--usr.bin/rsync/main.c453
-rw-r--r--usr.bin/rsync/md4.c265
-rw-r--r--usr.bin/rsync/md4.h49
-rw-r--r--usr.bin/rsync/mkpath.c77
-rw-r--r--usr.bin/rsync/receiver.c341
-rw-r--r--usr.bin/rsync/rsync.1213
-rw-r--r--usr.bin/rsync/rsync.5469
-rw-r--r--usr.bin/rsync/rsyncd.5135
-rw-r--r--usr.bin/rsync/sender.c227
-rw-r--r--usr.bin/rsync/server.c162
-rw-r--r--usr.bin/rsync/session.c161
-rw-r--r--usr.bin/rsync/socket.c435
-rw-r--r--usr.bin/rsync/symlinks.c102
-rw-r--r--usr.bin/rsync/uploader.c772
26 files changed, 7848 insertions, 0 deletions
diff --git a/usr.bin/rsync/Makefile b/usr.bin/rsync/Makefile
new file mode 100644
index 00000000000..686f76a5983
--- /dev/null
+++ b/usr.bin/rsync/Makefile
@@ -0,0 +1,57 @@
+PREFIX = /usr/local
+OBJS = blocks.o \
+ child.o \
+ client.o \
+ downloader.o \
+ fargs.o \
+ flist.o \
+ hash.o \
+ io.o \
+ log.o \
+ md4.o \
+ mkpath.o \
+ receiver.o \
+ sender.o \
+ server.o \
+ session.o \
+ socket.o \
+ symlinks.o \
+ uploader.o
+ALLOBJS = $(OBJS) \
+ main.o
+AFLS = afl/test-blk_recv \
+ afl/test-flist_recv
+CFLAGS += -O0 -g -W -Wall -Wextra -Wno-unused-parameter
+MANDIR = $(PREFIX)/man
+BINDIR = $(PREFIX)/bin
+
+all: openrsync
+
+openrsync: $(ALLOBJS)
+ $(CC) -o $@ $(ALLOBJS) -lm
+
+afl: $(AFLS)
+
+$(AFLS): $(OBJS)
+ $(CC) -o $@ $*.c $(OBJS)
+
+install: openrsync
+ mkdir -p $(DESTDIR)$(BINDIR)
+ mkdir -p $(DESTDIR)$(MANDIR)/man1
+ mkdir -p $(DESTDIR)$(MANDIR)/man5
+ install -m 0444 openrsync.1 $(DESTDIR)$(MANDIR)/man1
+ install -m 0444 rsync.5 rsyncd.5 $(DESTDIR)$(MANDIR)/man5
+ install -m 0555 openrsync $(DESTDIR)$(BINDIR)
+
+uninstall:
+ rm -f $(DESTDIR)$(BINDIR)/openrsync
+ rm -f $(DESTDIR)$(MANDIR)/man1/openrsync.1
+ rm -f $(DESTDIR)$(MANDIR)/man5/rsync.5
+ rm -f $(DESTDIR)$(MANDIR)/man5/rsyncd.5
+
+clean:
+ rm -f $(ALLOBJS) openrsync $(AFLS)
+
+$(ALLOBJS) $(AFLS): extern.h
+
+blocks.o downloader.o hash.o md4.o: md4.h
diff --git a/usr.bin/rsync/TODO.md b/usr.bin/rsync/TODO.md
new file mode 100644
index 00000000000..c66371a5181
--- /dev/null
+++ b/usr.bin/rsync/TODO.md
@@ -0,0 +1,42 @@
+This is a list of possible work projects within openrsync, rated by difficulty.
+
+First, porting: see
+[Porting](https://github.com/kristapsdz/openrsync/blob/master/README.md#Portability)
+for information on this topic.
+I've included the specific security porting topics below.
+
+This list also does not include adding support for features (e.g., **-u** and
+so on).
+
+- Easy: add a hashtable to `blk_find()` in
+ [blocks.c](https://github.com/kristapsdz/openrsync/blob/master/blocks.c)
+ for quickly looking up fast-hash matches.
+
+- Easy: print more statistics, such as transfer times and rates.
+
+- Easy: tighten the [pledge(2)](https://man.openbsd.org/pledge.2) and
+ [unveil(2)](https://man.openbsd.org/unveil.2) to work with **-n**, as
+ it does not touch files.
+
+- Easy: find the shared path for all input files and
+ [unveil(2)](https://man.openbsd.org/unveil.2) only the shared path
+ instead of each one.
+
+- Medium: have the log messages when multiplex writing (server mode) is
+ enabled by flushed out through the multiplex channel.
+ Right now, they're emitted on `stderr` just like with the client.
+
+- Medium: porting the security precautions
+ ([unveil(2)](https://man.openbsd.org/unveil.2),
+ [pledge(2)](https://man.openbsd.org/pledge.2)) to
+ [FreeBSD](https://www.freebsd.org)'s
+ [Capsicum](https://wiki.freebsd.org/Capsicum).
+ Without this in place, you're exposing your file-system to whatever is
+ coming down over the wire.
+ This is certainly possible, as openrsync makes exclusive use of the "at"
+ functions (e.g., [openat(2)](https://man.openbsd.org/openat.2)) for working
+ with files.
+
+- Hard: the same, but for Linux.
+
+Above all, `grep FIXME *.c *.h` and start from there.
diff --git a/usr.bin/rsync/blocks.c b/usr.bin/rsync/blocks.c
new file mode 100644
index 00000000000..d6c26eec988
--- /dev/null
+++ b/usr.bin/rsync/blocks.c
@@ -0,0 +1,678 @@
+/* $Id: blocks.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "md4.h"
+#include "extern.h"
+
+/*
+ * Flush out "size" bytes of the buffer, doing all of the appropriate
+ * chunking of the data, then the subsequent token (or zero).
+ * This is symmetrised in blk_merge().
+ * Return zero on failure, non-zero on success.
+ */
+static int
+blk_flush(struct sess *sess, int fd,
+ const void *b, off_t size, int32_t token)
+{
+ off_t i = 0, sz;
+
+ while (i < size) {
+ sz = MAX_CHUNK < (size - i) ?
+ MAX_CHUNK : (size - i);
+ if ( ! io_write_int(sess, fd, sz)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ } else if ( ! io_write_buf(sess, fd, b + i, sz)) {
+ ERRX1(sess, "io_write_buf");
+ return 0;
+ }
+ i += sz;
+ }
+
+ if ( ! io_write_int(sess, fd, token)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * From our current position of "offs" in buffer "buf" of total size
+ * "size", see if we can find a matching block in our list of blocks.
+ * The "hint" refers to the block that *might* work.
+ * Returns the blk or NULL if no matching block was found.
+ */
+static struct blk *
+blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
+ const struct blkset *blks, const char *path, size_t hint)
+{
+ unsigned char md[MD4_DIGEST_LENGTH];
+ uint32_t fhash;
+ off_t remain, osz;
+ size_t i;
+ int have_md = 0;
+
+ /*
+ * First, compute our fast hash.
+ * FIXME: yes, this can be a rolling computation, but I'm
+ * deliberately making it simple first.
+ */
+
+ remain = size - offs;
+ assert(remain);
+ osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
+ fhash = hash_fast(buf + offs, (size_t)osz);
+ have_md = 0;
+
+ /*
+ * Start with our match hint.
+ * This just runs the fast and slow check with the hint.
+ */
+
+ if (hint < blks->blksz &&
+ fhash == blks->blks[hint].chksum_short &&
+ (size_t)osz == blks->blks[hint].len) {
+ hash_slow(buf + offs, (size_t)osz, md, sess);
+ have_md = 1;
+ if (0 == memcmp(md,
+ blks->blks[hint].chksum_long, blks->csum)) {
+ LOG4(sess, "%s: found matching hinted match: "
+ "position %jd, block %zu "
+ "(position %jd, size %zu)", path,
+ (intmax_t)offs, blks->blks[hint].idx,
+ (intmax_t)blks->blks[hint].offs,
+ blks->blks[hint].len);
+ return &blks->blks[hint];
+ }
+ }
+
+ /*
+ * Now loop and look for the fast hash.
+ * If it's found, move on to the slow hash.
+ */
+
+ for (i = 0; i < blks->blksz; i++) {
+ if (fhash != blks->blks[i].chksum_short)
+ continue;
+ if ((size_t)osz != blks->blks[i].len)
+ continue;
+
+ LOG4(sess, "%s: found matching fast match: "
+ "position %jd, block %zu "
+ "(position %jd, size %zu)", path,
+ (intmax_t)offs, blks->blks[i].idx,
+ (intmax_t)blks->blks[i].offs,
+ blks->blks[i].len);
+
+ /* Compute slow hash on demand. */
+
+ if (0 == have_md) {
+ hash_slow(buf + offs, (size_t)osz, md, sess);
+ have_md = 1;
+ }
+
+ if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
+ continue;
+
+ LOG4(sess, "%s: sender verifies slow match", path);
+ return &blks->blks[i];
+ }
+
+ return NULL;
+}
+
+/*
+ * The main reconstruction algorithm on the sender side.
+ * Scans byte-wise over the input file, looking for matching blocks in
+ * what the server sent us.
+ * If a block is found, emit all data up until the block, then the token
+ * for the block.
+ * The receiving end can then reconstruct the file trivially.
+ * Return zero on failure, non-zero on success.
+ */
+static int
+blk_match_send(struct sess *sess, const char *path, int fd,
+ const void *buf, off_t size, const struct blkset *blks)
+{
+ off_t offs, last, end, fromcopy = 0, fromdown = 0,
+ total = 0, sz;
+ int32_t tok;
+ struct blk *blk;
+ size_t hint = 0;
+
+ /*
+ * Stop searching at the length of the file minus the size of
+ * the last block.
+ * The reason for this being that we don't need to do an
+ * incremental hash within the last block---if it doesn't match,
+ * it doesn't match.
+ */
+
+ end = size + 1 - blks->blks[blks->blksz - 1].len;
+
+ for (last = offs = 0; offs < end; offs++) {
+ blk = blk_find(sess, buf, size,
+ offs, blks, path, hint);
+ if (NULL == blk)
+ continue;
+
+ sz = offs - last;
+ fromdown += sz;
+ total += sz;
+ LOG4(sess, "%s: flushing %jd B before %zu B "
+ "block %zu", path, (intmax_t)sz, blk->len,
+ blk->idx);
+ tok = -(blk->idx + 1);
+
+ /*
+ * Write the data we have, then follow it with the tag
+ * of the block that matches.
+ * The receiver will then write our data, then the data
+ * it already has in the matching block.
+ */
+
+ if ( ! blk_flush(sess, fd, buf + last, sz, tok)) {
+ ERRX1(sess, "blk_flush");
+ return 0;
+ }
+
+ fromcopy += blk->len;
+ total += blk->len;
+ offs += blk->len - 1;
+ last = offs + 1;
+ hint = blk->idx + 1;
+ }
+
+ /* Emit remaining data and send terminator token. */
+
+ sz = size - last;
+ total += sz;
+ fromdown += sz;
+
+ LOG4(sess, "%s: flushing remaining %jd B", path, (intmax_t)sz);
+
+ if ( ! blk_flush(sess, fd, buf + last, sz, 0)) {
+ ERRX1(sess, "blk_flush");
+ return 0;
+ }
+
+ LOG3(sess, "%s: flushed (chunked) %jd B total, "
+ "%.2f%% upload ratio", path, (intmax_t)total,
+ 100.0 * fromdown / total);
+ return 1;
+}
+
+/*
+ * Given a local file "path" and the blocks created by a remote machine,
+ * find out which blocks of our file they don't have and send them.
+ * Return zero on failure, non-zero on success.
+ */
+int
+blk_match(struct sess *sess, int fd,
+ const struct blkset *blks, const char *path)
+{
+ int nfd, rc = 0, c;
+ struct stat st;
+ void *map = MAP_FAILED;
+ size_t mapsz;
+ unsigned char filemd[MD4_DIGEST_LENGTH];
+
+ /* Start by mapping our file into memory. */
+
+ if (-1 == (nfd = open(path, O_RDONLY, 0))) {
+ ERR(sess, "%s: open", path);
+ return 0;
+ } else if (-1 == fstat(nfd, &st)) {
+ ERR(sess, "%s: fstat", path);
+ close(nfd);
+ return 0;
+ }
+
+ /*
+ * We might possibly have a zero-length file, in which case the
+ * mmap() will fail, so only do this with non-zero files.
+ */
+
+ if ((mapsz = st.st_size) > 0) {
+ map = mmap(NULL, mapsz, PROT_READ, MAP_SHARED, nfd, 0);
+ if (MAP_FAILED == map) {
+ ERR(sess, "%s: mmap", path);
+ close(nfd);
+ return 0;
+ }
+ }
+
+ /*
+ * If the file's empty or we don't have any blocks from the
+ * sender, then simply send the whole file.
+ * Otherwise, run the hash matching routine and send raw chunks
+ * and subsequent matching tokens.
+ * This part broadly symmetrises blk_merge().
+ */
+
+ if (st.st_size && blks->blksz) {
+ c = blk_match_send(sess, path,
+ fd, map, st.st_size, blks);
+ if ( ! c) {
+ ERRX1(sess, "blk_match_send");
+ goto out;
+ }
+ } else {
+ if ( ! blk_flush(sess, fd, map, st.st_size, 0)) {
+ ERRX1(sess, "blk_flush");
+ return 0;
+ }
+ LOG3(sess, "%s: flushed (un-chunked) %jd B, 100%% "
+ "upload ratio", path, (intmax_t)st.st_size);
+ }
+
+ /*
+ * Now write the full file hash.
+ * Since we're seeding the hash, this always gives us some sort
+ * of data even if the file's zero-length.
+ */
+
+ hash_file(map, st.st_size, filemd, sess);
+
+ if ( ! io_write_buf(sess, fd, filemd, MD4_DIGEST_LENGTH)) {
+ ERRX1(sess, "io_write_buf");
+ goto out;
+ }
+
+ rc = 1;
+out:
+ if (MAP_FAILED != map)
+ munmap(map, mapsz);
+ close(nfd);
+ return rc;
+}
+
+/* FIXME: remove. */
+void
+blkset_free(struct blkset *p)
+{
+
+ if (NULL == p)
+ return;
+ free(p->blks);
+ free(p);
+}
+
+/*
+ * Sent from the sender to the receiver to indicate that the block set
+ * has been received.
+ * Symmetrises blk_send_ack().
+ * Returns zero on failure, non-zero on success.
+ */
+int
+blk_recv_ack(struct sess *sess,
+ int fd, const struct blkset *blocks, int32_t idx)
+{
+
+ /* FIXME: put into static block. */
+
+ if ( ! io_write_int(sess, fd, idx))
+ ERRX1(sess, "io_write_int");
+ else if ( ! io_write_int(sess, fd, blocks->blksz))
+ ERRX1(sess, "io_write_int");
+ else if ( ! io_write_int(sess, fd, blocks->len))
+ ERRX1(sess, "io_write_int");
+ else if ( ! io_write_int(sess, fd, blocks->csum))
+ ERRX1(sess, "io_write_int");
+ else if ( ! io_write_int(sess, fd, blocks->rem))
+ ERRX1(sess, "io_write_int");
+ else
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Read all of the checksums for a file's blocks.
+ * Returns the set of blocks or NULL on failure.
+ */
+struct blkset *
+blk_recv(struct sess *sess, int fd, const char *path)
+{
+ struct blkset *s;
+ int32_t i;
+ size_t j;
+ struct blk *b;
+ off_t offs = 0;
+
+ if (NULL == (s = calloc(1, sizeof(struct blkset)))) {
+ ERR(sess, "calloc");
+ return NULL;
+ }
+
+ /*
+ * The block prologue consists of a few values that we'll need
+ * in reading the individual blocks for this file.
+ * FIXME: read into buffer and unbuffer.
+ */
+
+ if ( ! io_read_size(sess, fd, &s->blksz)) {
+ ERRX1(sess, "io_read_size");
+ goto out;
+ } else if ( ! io_read_size(sess, fd, &s->len)) {
+ ERRX1(sess, "io_read_size");
+ goto out;
+ } else if ( ! io_read_size(sess, fd, &s->csum)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if ( ! io_read_size(sess, fd, &s->rem)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if (s->rem && s->rem >= s->len) {
+ ERRX(sess, "block remainder is "
+ "greater than block size");
+ goto out;
+ }
+
+ LOG3(sess, "%s: read block prologue: %zu blocks of "
+ "%zu B, %zu B remainder, %zu B checksum", path,
+ s->blksz, s->len, s->rem, s->csum);
+
+ if (s->blksz) {
+ s->blks = calloc(s->blksz, sizeof(struct blk));
+ if (NULL == s->blks) {
+ ERR(sess, "calloc");
+ goto out;
+ }
+ }
+
+ /*
+ * Read each block individually.
+ * FIXME: read buffer and unbuffer.
+ */
+
+ for (j = 0; j < s->blksz; j++) {
+ b = &s->blks[j];
+ if ( ! io_read_int(sess, fd, &i)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+ b->chksum_short = i;
+
+ assert(s->csum <= sizeof(b->chksum_long));
+ if ( ! io_read_buf(sess,
+ fd, b->chksum_long, s->csum)) {
+ ERRX1(sess, "io_read_buf");
+ goto out;
+ }
+
+ /*
+ * If we're the last block, then we're assigned the
+ * remainder of the data.
+ */
+
+ b->offs = offs;
+ b->idx = j;
+ b->len = (j == (s->blksz - 1) && s->rem) ?
+ s->rem : s->len;
+ offs += b->len;
+
+ LOG4(sess, "%s: read block %zu, "
+ "length %zu B", path, b->idx, b->len);
+ }
+
+ s->size = offs;
+ LOG3(sess, "%s: read blocks: %zu blocks, %jd B total "
+ "blocked data", path, s->blksz, (intmax_t)s->size);
+ return s;
+out:
+ blkset_free(s);
+ return NULL;
+}
+
+/*
+ * Symmetrise blk_recv_ack(), except w/o the leading identifier.
+ * Return zero on failure, non-zero on success.
+ */
+int
+blk_send_ack(struct sess *sess, int fd, struct blkset *p)
+{
+ char buf[16];
+ size_t pos = 0, sz;
+
+ /* Put the entire send routine into a buffer. */
+
+ sz = sizeof(int32_t) + /* block count */
+ sizeof(int32_t) + /* block length */
+ sizeof(int32_t) + /* checksum length */
+ sizeof(int32_t); /* block remainder */
+ assert(sz <= sizeof(buf));
+
+ if ( ! io_read_buf(sess, fd, buf, sz)) {
+ ERRX1(sess, "io_read_buf");
+ return 0;
+ }
+
+ if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
+ ERRX1(sess, "io_unbuffer_size");
+ else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->len))
+ ERRX1(sess, "io_unbuffer_size");
+ else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
+ ERRX1(sess, "io_unbuffer_size");
+ else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
+ ERRX1(sess, "io_unbuffer_size");
+ else if (p->len && p->rem >= p->len)
+ ERRX1(sess, "non-zero length is less than remainder");
+ else if (0 == p->csum || p->csum > 16)
+ ERRX1(sess, "inappropriate checksum length");
+ else
+ return 1;
+
+ return 0;
+}
+
+/*
+ * The receiver now reads raw data and block indices from the sender,
+ * and merges them into the temporary file.
+ * Returns zero on failure, non-zero on success.
+ */
+int
+blk_merge(struct sess *sess, int fd, int ffd,
+ const struct blkset *block, int outfd, const char *path,
+ const void *map, size_t mapsz, float *stats)
+{
+ size_t sz, tok;
+ int32_t rawtok;
+ char *buf = NULL;
+ void *pp;
+ ssize_t ssz;
+ int rc = 0;
+ unsigned char md[MD4_DIGEST_LENGTH],
+ ourmd[MD4_DIGEST_LENGTH];
+ off_t total = 0, fromcopy = 0, fromdown = 0;
+ MD4_CTX ctx;
+
+ MD4_Init(&ctx);
+
+ rawtok = htole32(sess->seed);
+ MD4_Update(&ctx, (unsigned char *)&rawtok, sizeof(int32_t));
+
+ for (;;) {
+ /*
+ * This matches the sequence in blk_flush().
+ * We read the size/token, then optionally the data.
+ * The size >0 for reading data, 0 for no more data, and
+ * <0 for a token indicator.
+ */
+
+ if ( ! io_read_int(sess, fd, &rawtok)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if (0 == rawtok)
+ break;
+
+ if (rawtok > 0) {
+ sz = rawtok;
+ if (NULL == (pp = realloc(buf, sz))) {
+ ERR(sess, "realloc");
+ goto out;
+ }
+ buf = pp;
+ if ( ! io_read_buf(sess, fd, buf, sz)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+
+ if ((ssz = write(outfd, buf, sz)) < 0) {
+ ERR(sess, "write: temporary file");
+ goto out;
+ } else if ((size_t)ssz != sz) {
+ ERRX(sess, "write: short write");
+ goto out;
+ }
+
+ fromdown += sz;
+ total += sz;
+ LOG4(sess, "%s: received %zd B block, now %jd "
+ "B total", path, ssz, (intmax_t)total);
+
+ MD4_Update(&ctx, buf, sz);
+ } else {
+ tok = -rawtok - 1;
+ if (tok >= block->blksz) {
+ ERRX(sess, "token not in block set");
+ goto out;
+ }
+
+ /*
+ * Now we read from our block.
+ * We should only be at this point if we have a
+ * block to read from, i.e., if we were able to
+ * map our origin file and create a block
+ * profile from it.
+ */
+
+ assert(MAP_FAILED != map);
+
+ ssz = write(outfd,
+ map + block->blks[tok].offs,
+ block->blks[tok].len);
+
+ if (ssz < 0) {
+ ERR(sess, "write: temporary file");
+ goto out;
+ } else if ((size_t)ssz != block->blks[tok].len) {
+ ERRX(sess, "write: short write");
+ goto out;
+ }
+
+ fromcopy += block->blks[tok].len;
+ total += block->blks[tok].len;
+ LOG4(sess, "%s: copied %zu B, now %jd "
+ "B total", path, block->blks[tok].len,
+ (intmax_t)total);
+
+ MD4_Update(&ctx,
+ map + block->blks[tok].offs,
+ block->blks[tok].len);
+ }
+ }
+
+
+ /* Make sure our resulting MD4_ hashes match. */
+
+ MD4_Final(ourmd, &ctx);
+
+ if ( ! io_read_buf(sess, fd, md, MD4_DIGEST_LENGTH)) {
+ ERRX1(sess, "io_read_buf");
+ goto out;
+ } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) {
+ ERRX(sess, "%s: file hash does not match", path);
+ goto out;
+ }
+
+ *stats = 100.0 * fromdown / total;
+ rc = 1;
+out:
+ free(buf);
+ return rc;
+}
+
+/*
+ * Transmit the metadata for set and blocks.
+ * Return zero on failure, non-zero on success.
+ */
+int
+blk_send(struct sess *sess, int fd, size_t idx,
+ const struct blkset *p, const char *path)
+{
+ char *buf;
+ size_t i, pos = 0, sz;
+ int rc = 0;
+
+ /* Put the entire send routine into a buffer. */
+
+ sz = sizeof(int32_t) + /* identifier */
+ sizeof(int32_t) + /* block count */
+ sizeof(int32_t) + /* block length */
+ sizeof(int32_t) + /* checksum length */
+ sizeof(int32_t) + /* block remainder */
+ p->blksz *
+ (sizeof(int32_t) + /* short checksum */
+ p->csum); /* long checksum */
+
+ if (NULL == (buf = malloc(sz))) {
+ ERR(sess, "malloc");
+ return 0;
+ }
+
+ io_buffer_int(sess, buf, &pos, sz, idx);
+ io_buffer_int(sess, buf, &pos, sz, p->blksz);
+ io_buffer_int(sess, buf, &pos, sz, p->len);
+ io_buffer_int(sess, buf, &pos, sz, p->csum);
+ io_buffer_int(sess, buf, &pos, sz, p->rem);
+
+ for (i = 0; i < p->blksz; i++) {
+ io_buffer_int(sess, buf, &pos,
+ sz, p->blks[i].chksum_short);
+ io_buffer_buf(sess, buf, &pos, sz,
+ p->blks[i].chksum_long, p->csum);
+ }
+
+ assert(pos == sz);
+
+ if ( ! io_write_buf(sess, fd, buf, sz)) {
+ ERRX1(sess, "io_write_buf");
+ goto out;
+ }
+
+ LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, "
+ "%zu B remainder, %zu B checksum", path,
+ p->blksz, p->len, p->rem, p->csum);
+ rc = 1;
+out:
+ free(buf);
+ return rc;
+}
diff --git a/usr.bin/rsync/child.c b/usr.bin/rsync/child.c
new file mode 100644
index 00000000000..686c58afc91
--- /dev/null
+++ b/usr.bin/rsync/child.c
@@ -0,0 +1,72 @@
+/* $Id: child.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * This is run on the client machine to initiate a connection with the
+ * remote machine in --server mode.
+ * It does not return, as it executes into the remote shell.
+ *
+ * Pledges: exec, stdio.
+ */
+void
+rsync_child(const struct opts *opts, int fd, const struct fargs *f)
+{
+ struct sess sess;
+ char **args;
+ size_t i;
+
+ memset(&sess, 0, sizeof(struct sess));
+ sess.opts = opts;
+
+ /* Construct the remote shell command. */
+
+ if (NULL == (args = fargs_cmdline(&sess, f))) {
+ ERRX1(&sess, "fargs_cmdline");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; NULL != args[i]; i++)
+ LOG2(&sess, "exec[%zu] = %s", i, args[i]);
+
+ /* Make sure the child's stdin is from the sender. */
+
+ if (-1 == dup2(fd, STDIN_FILENO)) {
+ ERR(&sess, "dup2");
+ exit(EXIT_FAILURE);
+ } if (-1 == dup2(fd, STDOUT_FILENO)) {
+ ERR(&sess, "dup2");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Here we go... */
+
+ execvp(args[0], args);
+
+ ERR(&sess, "%s: execvp", args[0]);
+ exit(EXIT_FAILURE);
+ /* NOTREACHED */
+}
diff --git a/usr.bin/rsync/client.c b/usr.bin/rsync/client.c
new file mode 100644
index 00000000000..9a115136f8d
--- /dev/null
+++ b/usr.bin/rsync/client.c
@@ -0,0 +1,106 @@
+/* $Id: client.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * The rsync client runs on the operator's local machine.
+ * It can either be in sender or receiver mode.
+ * In the former, it synchronises local files from a remote sink.
+ * In the latter, the remote sink synchronses to the local files.
+ *
+ * Pledges: stdio, rpath, wpath, cpath, unveil, fattr.
+ *
+ * Pledges (dry-run): -cpath, -wpath, -fattr.
+ * Pledges (!preserve_times): -fattr.
+ */
+int
+rsync_client(const struct opts *opts, int fd, const struct fargs *f)
+{
+ struct sess sess;
+ int rc = 0;
+
+ /* Standard rsync preamble, sender side. */
+
+ memset(&sess, 0, sizeof(struct sess));
+ sess.opts = opts;
+ sess.lver = RSYNC_PROTOCOL;
+
+ if ( ! io_write_int(&sess, fd, sess.lver)) {
+ ERRX1(&sess, "io_write_int");
+ goto out;
+ } else if ( ! io_read_int(&sess, fd, &sess.rver)) {
+ ERRX1(&sess, "io_read_int");
+ goto out;
+ } else if ( ! io_read_int(&sess, fd, &sess.seed)) {
+ ERRX1(&sess, "io_read_int");
+ goto out;
+ }
+
+ if (sess.rver < sess.lver) {
+ ERRX(&sess, "remote protocol is older "
+ "than our own (%" PRId32 " < %" PRId32 "): "
+ "this is not supported",
+ sess.rver, sess.lver);
+ goto out;
+ }
+
+ LOG2(&sess, "client detected client version %" PRId32
+ ", server version %" PRId32 ", seed %" PRId32,
+ sess.lver, sess.rver, sess.seed);
+
+ sess.mplex_reads = 1;
+
+ /*
+ * Now we need to get our list of files.
+ * Senders (and locals) send; receivers receive.
+ */
+
+ if (FARGS_RECEIVER != f->mode) {
+ LOG2(&sess, "client starting sender: %s",
+ NULL == f->host ? "(local)" : f->host);
+ if ( ! rsync_sender(&sess, fd, fd,
+ f->sourcesz, f->sources)) {
+ ERRX1(&sess, "rsync_sender");
+ goto out;
+ }
+ } else {
+ LOG2(&sess, "client starting receiver: %s",
+ NULL == f->host ? "(local)" : f->host);
+ if ( ! rsync_receiver(&sess, fd, fd, f->sink)) {
+ ERRX1(&sess, "rsync_receiver");
+ goto out;
+ }
+ }
+
+#if 0
+ /* Probably the EOF. */
+ if (io_read_check(&sess, fd))
+ WARNX(&sess, "data remains in read pipe");
+#endif
+
+ rc = 1;
+out:
+ return rc;
+}
diff --git a/usr.bin/rsync/downloader.c b/usr.bin/rsync/downloader.c
new file mode 100644
index 00000000000..284e62b7f3c
--- /dev/null
+++ b/usr.bin/rsync/downloader.c
@@ -0,0 +1,600 @@
+/* $Id: downloader.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "extern.h"
+#include "md4.h"
+
+/*
+ * A small optimisation: have a 1 MB pre-write buffer.
+ * Disable the pre-write buffer by having this be zero.
+ * (It doesn't affect performance much.)
+ */
+#define OBUF_SIZE (1024 * 1024)
+
+enum downloadst {
+ DOWNLOAD_READ_NEXT = 0,
+ DOWNLOAD_READ_LOCAL,
+ DOWNLOAD_READ_REMOTE
+};
+
+/*
+ * Like struct upload, but used to keep track of what we're downloading.
+ * This also is managed by the receiver process.
+ */
+struct download {
+ enum downloadst state; /* state of affairs */
+ size_t idx; /* index of current file */
+ struct blkset blk; /* its blocks */
+ void *map; /* mmap of current file */
+ size_t mapsz; /* length of mapsz */
+ int ofd; /* open origin file */
+ int fd; /* open output file */
+ char *fname; /* output filename */
+ MD4_CTX ctx; /* current hashing context */
+ off_t downloaded; /* total downloaded */
+ off_t total; /* total in file */
+ const struct flist *fl; /* file list */
+ size_t flsz; /* size of file list */
+ int rootfd; /* destination directory */
+ int fdin; /* read descriptor from sender */
+ char *obuf; /* pre-write buffer */
+ size_t obufsz; /* current size of obuf */
+ size_t obufmax; /* max size we'll wbuffer */
+};
+
+
+/*
+ * Simply log the filename.
+ */
+static void
+log_file(struct sess *sess,
+ const struct download *dl, const struct flist *f)
+{
+ float frac, tot = dl->total;
+ int prec = 0;
+ const char *unit = "B";
+
+ if (sess->opts->server)
+ return;
+
+ frac = 0 == dl->total ? 100.0 :
+ 100.0 * dl->downloaded / dl->total;
+
+ if (dl->total > 1024 * 1024 * 1024) {
+ tot = dl->total / (1024. * 1024. * 1024.);
+ prec = 3;
+ unit = "GB";
+ } else if (dl->total > 1024 * 1024) {
+ tot = dl->total / (1024. * 1024.);
+ prec = 2;
+ unit = "MB";
+ } else if (dl->total > 1024) {
+ tot = dl->total / 1024.;
+ prec = 1;
+ unit = "KB";
+ }
+
+ LOG1(sess, "%s (%.*f %s, %.1f%% downloaded)",
+ f->path, prec, tot, unit, frac);
+}
+
+/*
+ * Reinitialise a download context w/o overwriting the persistent parts
+ * of the structure (like p->fl or p->flsz) for index "idx".
+ * The MD4 context is pre-seeded.
+ */
+static void
+download_reinit(struct sess *sess, struct download *p, size_t idx)
+{
+ int32_t seed = htole32(sess->seed);
+
+ assert(DOWNLOAD_READ_NEXT == p->state);
+
+ p->idx = idx;
+ memset(&p->blk, 0, sizeof(struct blkset));
+ p->map = MAP_FAILED;
+ p->mapsz = 0;
+ p->ofd = -1;
+ p->fd = -1;
+ p->fname = NULL;
+ MD4_Init(&p->ctx);
+ p->downloaded = p->total = 0;
+ /* Don't touch p->fl. */
+ /* Don't touch p->flsz. */
+ /* Don't touch p->rootfd. */
+ /* Don't touch p->fdin. */
+ MD4_Update(&p->ctx, &seed, sizeof(int32_t));
+}
+
+/*
+ * Free a download context.
+ * If "cleanup" is non-zero, we also try to clean up the temporary file,
+ * assuming that it has been opened in p->fd.
+ */
+static void
+download_cleanup(struct download *p, int cleanup)
+{
+
+ if (MAP_FAILED != p->map) {
+ assert(p->mapsz);
+ munmap(p->map, p->mapsz);
+ p->map = MAP_FAILED;
+ p->mapsz = 0;
+ }
+ if (-1 != p->ofd) {
+ close(p->ofd);
+ p->ofd = -1;
+ }
+ if (-1 != p->fd) {
+ close(p->fd);
+ if (cleanup && NULL != p->fname)
+ unlinkat(p->rootfd, p->fname, 0);
+ p->fd = -1;
+ }
+ free(p->fname);
+ p->fname = NULL;
+ p->state = DOWNLOAD_READ_NEXT;
+}
+
+/*
+ * Initial allocation of the download object using the file list "fl" of
+ * size "flsz", the destination "rootfd", and the sender read "fdin".
+ * Returns NULL on allocation failure.
+ * On success, download_free() must be called with the pointer.
+ */
+struct download *
+download_alloc(struct sess *sess, int fdin,
+ const struct flist *fl, size_t flsz, int rootfd)
+{
+ struct download *p;
+
+ if (NULL == (p = malloc(sizeof(struct download)))) {
+ ERR(sess, "malloc");
+ return NULL;
+ }
+
+ p->state = DOWNLOAD_READ_NEXT;
+ p->fl = fl;
+ p->flsz = flsz;
+ p->rootfd = rootfd;
+ p->fdin = fdin;
+ download_reinit(sess, p, 0);
+ p->obufsz = 0;
+ p->obuf = NULL;
+ p->obufmax = OBUF_SIZE;
+ if (p->obufmax &&
+ NULL == (p->obuf = malloc(p->obufmax))) {
+ ERR(sess, "malloc");
+ free(p);
+ return NULL;
+ }
+ return p;
+}
+
+/*
+ * Perform all cleanups (including removing stray files) and free.
+ * Passing a NULL to this function is ok.
+ */
+void
+download_free(struct download *p)
+{
+
+ if (NULL == p)
+ return;
+ download_cleanup(p, 1);
+ free(p->obuf);
+ free(p);
+}
+
+/*
+ * Optimisation: instead of dumping directly into the output file, keep
+ * a buffer and write as much as we can into the buffer.
+ * That way, we can avoid calling write() too much, and instead call it
+ * with big buffers.
+ * To flush the buffer w/o changing it, pass 0 as "sz".
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+buf_copy(struct sess *sess,
+ const char *buf, size_t sz, struct download *p)
+{
+ size_t rem, tocopy;
+ ssize_t ssz;
+
+ assert(p->obufsz <= p->obufmax);
+
+ /*
+ * Copy as much as we can.
+ * If we've copied everything, exit.
+ * If we have no pre-write buffer (obufmax of zero), this never
+ * gets called, so we never buffer anything.
+ */
+
+ if (sz && p->obufsz < p->obufmax) {
+ assert(NULL != p->obuf);
+ rem = p->obufmax - p->obufsz;
+ assert(rem > 0);
+ tocopy = rem < sz ? rem : sz;
+ memcpy(p->obuf + p->obufsz, buf, tocopy);
+ sz -= tocopy;
+ buf += tocopy;
+ p->obufsz += tocopy;
+ assert(p->obufsz <= p->obufmax);
+ if (0 == sz)
+ return 1;
+ }
+
+ /* Drain the main buffer. */
+
+ if (p->obufsz) {
+ assert(p->obufmax);
+ assert(p->obufsz <= p->obufmax);
+ assert(NULL != p->obuf);
+ if ((ssz = write(p->fd, p->obuf, p->obufsz)) < 0) {
+ ERR(sess, "%s: write", p->fname);
+ return 0;
+ } else if ((size_t)ssz != p->obufsz) {
+ ERRX(sess, "%s: short write", p->fname);
+ return 0;
+ }
+ p->obufsz = 0;
+ }
+
+ /*
+ * Now drain anything left.
+ * If we have no pre-write buffer, this is it.
+ */
+
+ if (sz) {
+ if ((ssz = write(p->fd, buf, sz)) < 0) {
+ ERR(sess, "%s: write", p->fname);
+ return 0;
+ } else if ((size_t)ssz != sz) {
+ ERRX(sess, "%s: short write", p->fname);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * The downloader waits on a file the sender is going to give us, opens
+ * and mmaps the existing file, opens a temporary file, dumps the file
+ * (or metadata) into the temporary file, then renames.
+ * This happens in several possible phases to avoid blocking.
+ * Returns <0 on failure, 0 on no more data (end of phase), >0 on
+ * success (more data to be read from the sender).
+ */
+int
+rsync_downloader(struct download *p, struct sess *sess, int *ofd)
+{
+ int32_t idx, rawtok;
+ uint32_t hash;
+ const struct flist *f;
+ size_t sz, dirlen, tok;
+ const char *cp;
+ mode_t perm;
+ struct stat st;
+ char *buf = NULL;
+ unsigned char ourmd[MD4_DIGEST_LENGTH],
+ md[MD4_DIGEST_LENGTH];
+ struct timespec tv[2];
+
+ /*
+ * If we don't have a download already in session, then the next
+ * one is coming in.
+ * Read either the stop (phase) signal from the sender or block
+ * metadata, in which case we open our file and wait for data.
+ */
+
+ if (DOWNLOAD_READ_NEXT == p->state) {
+ if ( ! io_read_int(sess, p->fdin, &idx)) {
+ ERRX1(sess, "io_read_int");
+ return -1;
+ } else if (idx >= 0 && (size_t)idx >= p->flsz) {
+ ERRX(sess, "index out of bounds");
+ return -1;
+ } else if (idx < 0) {
+ LOG3(sess, "downloader: phase complete");
+ return 0;
+ }
+
+ /* Short-circuit: dry_run mode does nothing. */
+
+ if (sess->opts->dry_run)
+ return 1;
+
+ /*
+ * Now get our block information.
+ * This is all we'll need to reconstruct the file from
+ * the map, as block sizes are regular.
+ */
+
+ download_reinit(sess, p, idx);
+ if ( ! blk_send_ack(sess, p->fdin, &p->blk)) {
+ ERRX1(sess, "blk_send_ack");
+ goto out;
+ }
+
+ /*
+ * Next, we want to open the existing file for using as
+ * block input.
+ * We do this in a non-blocking way, so if the open
+ * succeeds, then we'll go reentrant til the file is
+ * readable and we can mmap() it.
+ * Set the file descriptor that we want to wait for.
+ */
+
+ p->state = DOWNLOAD_READ_LOCAL;
+ f = &p->fl[idx];
+ p->ofd = openat(p->rootfd, f->path,
+ O_RDONLY | O_NONBLOCK, 0);
+
+ if (-1 == p->ofd && ENOENT != errno) {
+ ERR(sess, "%s: openat", f->path);
+ goto out;
+ } else if (-1 != p->ofd) {
+ *ofd = p->ofd;
+ return 1;
+ }
+
+ /* Fall-through: there's no file. */
+ }
+
+ /*
+ * At this point, the server is sending us data and we want to
+ * hoover it up as quickly as possible or we'll deadlock.
+ * We want to be pulling off of f->fdin as quickly as possible,
+ * so perform as much buffering as we can.
+ */
+
+ f = &p->fl[p->idx];
+
+ /*
+ * Next in sequence: we have an open download session but
+ * haven't created our temporary file.
+ * This means that we've already opened (or tried to open) the
+ * original file in a nonblocking way, and we can map it.
+ */
+
+ if (DOWNLOAD_READ_LOCAL == p->state) {
+ assert(NULL == p->fname);
+
+ /*
+ * Try to fstat() the file descriptor if valid and make
+ * sure that we're still a regular file.
+ * Then, if it has non-zero size, mmap() it for hashing.
+ */
+
+ if (-1 != p->ofd &&
+ -1 == fstat(p->ofd, &st)) {
+ ERR(sess, "%s: fstat", f->path);
+ goto out;
+ } else if (-1 != p->ofd && ! S_ISREG(st.st_mode)) {
+ WARNX(sess, "%s: not regular", f->path);
+ goto out;
+ }
+
+ if (-1 != p->ofd && st.st_size > 0) {
+ p->mapsz = st.st_size;
+ p->map = mmap(NULL, p->mapsz,
+ PROT_READ, MAP_SHARED, p->ofd, 0);
+ if (MAP_FAILED == p->map) {
+ ERR(sess, "%s: mmap", f->path);
+ goto out;
+ }
+ }
+
+ /* Success either way: we don't need this. */
+
+ *ofd = -1;
+
+ /*
+ * Create the temporary file.
+ * Use a simple scheme of path/.FILE.RANDOM, where we
+ * fill in RANDOM with an arc4random number.
+ * The tricky part is getting into the directory if
+ * we're in recursive mode.
+ */
+
+ hash = arc4random();
+ if (sess->opts->recursive &&
+ NULL != (cp = strrchr(f->path, '/'))) {
+ dirlen = cp - f->path;
+ if (asprintf(&p->fname, "%.*s/.%s.%" PRIu32,
+ (int)dirlen, f->path,
+ f->path + dirlen + 1, hash) < 0)
+ p->fname = NULL;
+ } else {
+ if (asprintf(&p->fname, ".%s.%" PRIu32,
+ f->path, hash) < 0)
+ p->fname = NULL;
+ }
+ if (NULL == p->fname) {
+ ERR(sess, "asprintf");
+ goto out;
+ }
+
+ /*
+ * Inherit permissions from the source file if we're new
+ * or specifically told with -p.
+ */
+
+ if ( ! sess->opts->preserve_perms)
+ perm = -1 == p->ofd ? f->st.mode : st.st_mode;
+ else
+ perm = f->st.mode;
+
+ p->fd = openat(p->rootfd, p->fname,
+ O_APPEND|O_WRONLY|O_CREAT|O_EXCL, perm);
+
+ if (-1 == p->fd) {
+ ERR(sess, "%s: openat", p->fname);
+ goto out;
+ }
+
+ /*
+ * FIXME: we can technically wait until the temporary
+ * file is writable, but since it's guaranteed to be
+ * empty, I don't think this is a terribly expensive
+ * operation as it doesn't involve reading the file into
+ * memory beforehand.
+ */
+
+ LOG3(sess, "%s: temporary: %s", f->path, p->fname);
+ p->state = DOWNLOAD_READ_REMOTE;
+ return 1;
+ }
+
+ /*
+ * This matches the sequence in blk_flush().
+ * If we've gotten here, then we have a possibly-open map file
+ * (not for new files) and our temporary file is writable.
+ * We read the size/token, then optionally the data.
+ * The size >0 for reading data, 0 for no more data, and <0 for
+ * a token indicator.
+ */
+
+ assert(DOWNLOAD_READ_REMOTE == p->state);
+ assert(NULL != p->fname);
+ assert(-1 != p->fd);
+ assert(-1 != p->fdin);
+
+ if ( ! io_read_int(sess, p->fdin, &rawtok)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+
+ if (rawtok > 0) {
+ sz = rawtok;
+ if (NULL == (buf = malloc(sz))) {
+ ERR(sess, "realloc");
+ goto out;
+ }
+ if ( ! io_read_buf(sess, p->fdin, buf, sz)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if ( ! buf_copy(sess, buf, sz, p)) {
+ ERRX1(sess, "buf_copy");
+ goto out;
+ }
+ p->total += sz;
+ p->downloaded += sz;
+ LOG4(sess, "%s: received %zu B block", p->fname, sz);
+ MD4_Update(&p->ctx, buf, sz);
+ free(buf);
+ return 1;
+ } else if (rawtok < 0) {
+ tok = -rawtok - 1;
+ if (tok >= p->blk.blksz) {
+ ERRX(sess, "%s: token not in block "
+ "set: %zu (have %zu blocks)",
+ p->fname, tok, p->blk.blksz);
+ goto out;
+ }
+ sz = tok == p->blk.blksz - 1 ? p->blk.rem : p->blk.len;
+ assert(sz);
+ assert(MAP_FAILED != p->map);
+ buf = p->map + (tok * p->blk.len);
+
+ /*
+ * Now we read from our block.
+ * We should only be at this point if we have a
+ * block to read from, i.e., if we were able to
+ * map our origin file and create a block
+ * profile from it.
+ */
+
+ assert(MAP_FAILED != p->map);
+ if ( ! buf_copy(sess, buf, sz, p)) {
+ ERRX1(sess, "buf_copy");
+ goto out;
+ }
+ p->total += sz;
+ LOG4(sess, "%s: copied %zu B", p->fname, sz);
+ MD4_Update(&p->ctx, buf, sz);
+ return 1;
+ }
+
+ if ( ! buf_copy(sess, NULL, 0, p)) {
+ ERRX1(sess, "buf_copy");
+ goto out;
+ }
+
+ assert(0 == rawtok);
+ assert(0 == p->obufsz);
+
+ /*
+ * Make sure our resulting MD4 hashes match.
+ * FIXME: if the MD4 hashes don't match, then our file has
+ * changed out from under us.
+ * This should require us to re-run the sequence in another
+ * phase.
+ */
+
+ MD4_Final(ourmd, &p->ctx);
+
+ if ( ! io_read_buf(sess, p->fdin, md, MD4_DIGEST_LENGTH)) {
+ ERRX1(sess, "io_read_buf");
+ goto out;
+ } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) {
+ ERRX(sess, "%s: hash does not match", p->fname);
+ goto out;
+ }
+
+ /* Conditionally adjust file modification time. */
+
+ if (sess->opts->preserve_times) {
+ tv[0].tv_sec = time(NULL);
+ tv[0].tv_nsec = 0;
+ tv[1].tv_sec = f->st.mtime;
+ tv[1].tv_nsec = 0;
+ if (-1 == futimens(p->fd, tv)) {
+ ERR(sess, "%s: futimens", p->fname);
+ goto out;
+ }
+ LOG4(sess, "%s: updated date", f->path);
+ }
+
+ /* Finally, rename the temporary to the real file. */
+
+ if (-1 == renameat(p->rootfd, p->fname, p->rootfd, f->path)) {
+ ERR(sess, "%s: renameat: %s", p->fname, f->path);
+ goto out;
+ }
+
+ log_file(sess, p, f);
+ download_cleanup(p, 0);
+ return 1;
+out:
+ download_cleanup(p, 1);
+ return -1;
+}
diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h
new file mode 100644
index 00000000000..8d3d591eace
--- /dev/null
+++ b/usr.bin/rsync/extern.h
@@ -0,0 +1,295 @@
+/* $Id: extern.h,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef EXTERN_H
+#define EXTERN_H
+
+/*
+ * This is the rsync protocol version that we support.
+ */
+#define RSYNC_PROTOCOL (27)
+
+/*
+ * Maximum amount of file data sent over the wire at once.
+ */
+#define MAX_CHUNK (32 * 1024)
+
+/*
+ * This is the minimum size for a block of data not including those in
+ * the remainder block.
+ */
+#define BLOCK_SIZE_MIN (700)
+
+/*
+ * The sender and receiver use a two-phase synchronisation process.
+ * The first uses two-byte hashes; the second, 16-byte.
+ * (The second must hold a full MD4 digest.)
+ */
+#define CSUM_LENGTH_PHASE1 (2)
+#define CSUM_LENGTH_PHASE2 (16)
+
+/*
+ * Operating mode for a client or a server.
+ * Sender means we synchronise local files with those from remote.
+ * Receiver is the opposite.
+ * This is relative to which host we're running on.
+ */
+enum fmode {
+ FARGS_SENDER,
+ FARGS_RECEIVER
+};
+
+/*
+ * File arguments given on the command line.
+ * See struct opts.
+ */
+struct fargs {
+ char *host; /* hostname or NULL if local */
+ char **sources; /* transfer source */
+ size_t sourcesz; /* number of sources */
+ char *sink; /* transfer endpoint */
+ enum fmode mode; /* mode of operation */
+ int remote; /* uses rsync:// or :: for remote */
+ char *module; /* if rsync://, the module */
+};
+
+/*
+ * The subset of stat(2) information that we need.
+ * (There are some parts we don't use yet.)
+ */
+struct flstat {
+ mode_t mode; /* mode */
+ uid_t uid; /* user */
+ gid_t gid; /* group */
+ off_t size; /* size */
+ time_t mtime; /* modification */
+ unsigned int flags;
+#define FLSTAT_TOP_DIR 0x01 /* a top-level directory */
+
+};
+
+/*
+ * A list of files with their statistics.
+ */
+struct flist {
+ char *path; /* path relative to root */
+ const char *wpath; /* "working" path for receiver */
+ struct flstat st; /* file information */
+ char *link; /* symlink target or NULL */
+};
+
+/*
+ * Options passed into the command line.
+ * See struct fargs.
+ */
+struct opts {
+ int sender; /* --sender */
+ int server; /* --server */
+ int recursive; /* -r */
+ int verbose; /* -v */
+ int dry_run; /* -n */
+ int preserve_times; /* -t */
+ int preserve_perms; /* -p */
+ int preserve_links; /* -l */
+ int del; /* --delete */
+ const char *rsync_path; /* --rsync-path */
+};
+
+/*
+ * An individual block description for a file.
+ * See struct blkset.
+ */
+struct blk {
+ off_t offs; /* offset in file */
+ size_t idx; /* block index */
+ size_t len; /* bytes in block */
+ uint32_t chksum_short; /* fast checksum */
+ unsigned char chksum_long[CSUM_LENGTH_PHASE2]; /* slow checksum */
+};
+
+/*
+ * When transferring file contents, we break the file down into blocks
+ * and work with those.
+ */
+struct blkset {
+ off_t size; /* file size */
+ size_t rem; /* terminal block length if non-zero */
+ size_t len; /* block length */
+ size_t csum; /* checksum length */
+ struct blk *blks; /* all blocks */
+ size_t blksz; /* number of blks */
+};
+
+/*
+ * Values required during a communication session.
+ */
+struct sess {
+ const struct opts *opts; /* system options */
+ int32_t seed; /* checksum seed */
+ int32_t lver; /* local version */
+ int32_t rver; /* remote version */
+ uint64_t total_read; /* non-logging wire/reads */
+ uint64_t total_size; /* total file size */
+ uint64_t total_write; /* non-logging wire/writes */
+ int mplex_reads; /* multiplexing reads? */
+ size_t mplex_read_remain; /* remaining bytes */
+ int mplex_writes; /* multiplexing writes? */
+};
+
+struct download;
+struct upload;
+
+#define LOG0(_sess, _fmt, ...) \
+ rsync_log((_sess), __FILE__, __LINE__, -1, (_fmt), ##__VA_ARGS__)
+#define LOG1(_sess, _fmt, ...) \
+ rsync_log((_sess), __FILE__, __LINE__, 0, (_fmt), ##__VA_ARGS__)
+#define LOG2(_sess, _fmt, ...) \
+ rsync_log((_sess), __FILE__, __LINE__, 1, (_fmt), ##__VA_ARGS__)
+#define LOG3(_sess, _fmt, ...) \
+ rsync_log((_sess), __FILE__, __LINE__, 2, (_fmt), ##__VA_ARGS__)
+#define LOG4(_sess, _fmt, ...) \
+ rsync_log((_sess), __FILE__, __LINE__, 3, (_fmt), ##__VA_ARGS__)
+#define ERRX1(_sess, _fmt, ...) \
+ rsync_errx1((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define WARNX(_sess, _fmt, ...) \
+ rsync_warnx((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define WARN(_sess, _fmt, ...) \
+ rsync_warn((_sess), 0, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define WARN1(_sess, _fmt, ...) \
+ rsync_warn((_sess), 1, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define WARN2(_sess, _fmt, ...) \
+ rsync_warn((_sess), 2, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define ERR(_sess, _fmt, ...) \
+ rsync_err((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+#define ERRX(_sess, _fmt, ...) \
+ rsync_errx((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__)
+
+__BEGIN_DECLS
+
+void rsync_log(struct sess *,
+ const char *, size_t, int, const char *, ...)
+ __attribute__((format(printf, 5, 6)));
+void rsync_warnx1(struct sess *,
+ const char *, size_t, const char *, ...)
+ __attribute__((format(printf, 4, 5)));
+void rsync_warn(struct sess *, int,
+ const char *, size_t, const char *, ...)
+ __attribute__((format(printf, 5, 6)));
+void rsync_warnx(struct sess *, const char *,
+ size_t, const char *, ...)
+ __attribute__((format(printf, 4, 5)));
+void rsync_err(struct sess *, const char *,
+ size_t, const char *, ...)
+ __attribute__((format(printf, 4, 5)));
+void rsync_errx(struct sess *, const char *,
+ size_t, const char *, ...)
+ __attribute__((format(printf, 4, 5)));
+void rsync_errx1(struct sess *, const char *,
+ size_t, const char *, ...)
+ __attribute__((format(printf, 4, 5)));
+
+int flist_del(struct sess *, int,
+ const struct flist *, size_t);
+int flist_gen(struct sess *, size_t, char **,
+ struct flist **, size_t *);
+int flist_gen_local(struct sess *, const char *,
+ struct flist **, size_t *);
+void flist_free(struct flist *, size_t);
+int flist_recv(struct sess *, int,
+ struct flist **, size_t *);
+int flist_send(struct sess *, int, int,
+ const struct flist *, size_t);
+int flist_gen_dels(struct sess *, const char *,
+ struct flist **, size_t *,
+ const struct flist *, size_t);
+
+char **fargs_cmdline(struct sess *, const struct fargs *);
+
+int io_read_buf(struct sess *, int, void *, size_t);
+int io_read_byte(struct sess *, int, uint8_t *);
+int io_read_check(struct sess *, int);
+int io_read_flush(struct sess *, int);
+int io_read_int(struct sess *, int, int32_t *);
+int io_read_long(struct sess *, int, int64_t *);
+int io_read_size(struct sess *, int, size_t *);
+int io_read_ulong(struct sess *, int, uint64_t *);
+int io_write_buf(struct sess *, int, const void *, size_t);
+int io_write_byte(struct sess *, int, uint8_t);
+int io_write_int(struct sess *, int, int32_t);
+int io_write_line(struct sess *, int, const char *);
+int io_write_long(struct sess *, int, int64_t);
+
+void io_buffer_int(struct sess *, void *,
+ size_t *, size_t, int32_t);
+void io_buffer_buf(struct sess *, void *,
+ size_t *, size_t, const void *, size_t);
+
+void io_unbuffer_int(struct sess *, const void *,
+ size_t *, size_t, int32_t *);
+int io_unbuffer_size(struct sess *, const void *,
+ size_t *, size_t, size_t *);
+void io_unbuffer_buf(struct sess *, const void *,
+ size_t *, size_t, void *, size_t);
+
+void rsync_child(const struct opts *, int, const struct fargs *)
+ __attribute__((noreturn));
+int rsync_receiver(struct sess *, int, int, const char *);
+int rsync_sender(struct sess *, int, int, size_t, char **);
+int rsync_client(const struct opts *, int, const struct fargs *);
+int rsync_socket(const struct opts *, const struct fargs *);
+int rsync_server(const struct opts *, size_t, char *[]);
+int rsync_downloader(struct download *, struct sess *, int *);
+int rsync_uploader(struct upload *,
+ int *, struct sess *, int *);
+int rsync_uploader_tail(struct upload *, struct sess *);
+
+struct download *download_alloc(struct sess *, int,
+ const struct flist *, size_t, int);
+void download_free(struct download *);
+struct upload *upload_alloc(struct sess *, int, int, size_t,
+ const struct flist *, size_t, mode_t);
+void upload_free(struct upload *);
+
+struct blkset *blk_recv(struct sess *, int, const char *);
+int blk_recv_ack(struct sess *,
+ int, const struct blkset *, int32_t);
+int blk_match(struct sess *, int,
+ const struct blkset *, const char *);
+int blk_send(struct sess *, int, size_t,
+ const struct blkset *, const char *);
+int blk_send_ack(struct sess *, int, struct blkset *);
+int blk_merge(struct sess *, int, int,
+ const struct blkset *, int, const char *,
+ const void *, size_t, float *);
+void blkset_free(struct blkset *);
+
+uint32_t hash_fast(const void *, size_t);
+void hash_slow(const void *, size_t,
+ unsigned char *, const struct sess *);
+void hash_file(const void *, size_t,
+ unsigned char *, const struct sess *);
+
+int mkpath(struct sess *, char *);
+
+char *symlink_read(struct sess *, const char *);
+char *symlinkat_read(struct sess *, int, const char *);
+
+int sess_stats_send(struct sess *, int);
+int sess_stats_recv(struct sess *, int);
+
+__END_DECLS
+
+#endif /*!EXTERN_H*/
diff --git a/usr.bin/rsync/fargs.c b/usr.bin/rsync/fargs.c
new file mode 100644
index 00000000000..0120479bee1
--- /dev/null
+++ b/usr.bin/rsync/fargs.c
@@ -0,0 +1,104 @@
+/* $Id: fargs.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "extern.h"
+
+#define RSYNC_PATH "rsync"
+
+char **
+fargs_cmdline(struct sess *sess, const struct fargs *f)
+{
+ char **args;
+ size_t i = 0, j, argsz = 0;
+ const char *rsync_path;
+
+ assert(NULL != f);
+ assert(f->sourcesz > 0);
+
+ if (NULL == (rsync_path = sess->opts->rsync_path))
+ rsync_path = RSYNC_PATH;
+
+ /* Be explicit with array size. */
+
+ argsz += 1; /* dot separator */
+ argsz += 1; /* sink file */
+ argsz += 5; /* per-mode maximum */
+ argsz += 10; /* shared args */
+ argsz += 1; /* NULL pointer */
+ argsz += f->sourcesz;
+
+ args = calloc(argsz, sizeof(char *));
+ if (NULL == args) {
+ ERR(sess, "calloc");
+ return NULL;
+ }
+
+ if (NULL != f->host) {
+ assert(NULL != f->host);
+ args[i++] = "ssh";
+ args[i++] = f->host;
+ args[i++] = (char *)rsync_path;
+ args[i++] = "--server";
+ if (FARGS_RECEIVER == f->mode)
+ args[i++] = "--sender";
+ } else {
+ args[i++] = (char *)rsync_path;
+ args[i++] = "--server";
+ }
+
+ /* Shared arguments. */
+
+ if (sess->opts->verbose > 3)
+ args[i++] = "-v";
+ if (sess->opts->verbose > 2)
+ args[i++] = "-v";
+ if (sess->opts->verbose > 1)
+ args[i++] = "-v";
+ if (sess->opts->verbose > 0)
+ args[i++] = "-v";
+ if (sess->opts->dry_run)
+ args[i++] = "-n";
+ if (sess->opts->preserve_times)
+ args[i++] = "-t";
+ if (sess->opts->preserve_perms)
+ args[i++] = "-p";
+ if (sess->opts->recursive)
+ args[i++] = "-r";
+ if (sess->opts->preserve_links)
+ args[i++] = "-l";
+ if (sess->opts->del)
+ args[i++] = "--delete";
+
+ /* Terminate with a full-stop for reasons unknown. */
+
+ args[i++] = ".";
+
+ if (FARGS_RECEIVER == f->mode) {
+ for (j = 0; j < f->sourcesz; j++)
+ args[i++] = f->sources[j];
+ } else
+ args[i++] = f->sink;
+
+ args[i] = NULL;
+ return args;
+}
+
diff --git a/usr.bin/rsync/flist.c b/usr.bin/rsync/flist.c
new file mode 100644
index 00000000000..be091267f0d
--- /dev/null
+++ b/usr.bin/rsync/flist.c
@@ -0,0 +1,1160 @@
+/* $Id: flist.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <fts.h>
+#include <search.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * We allocate our file list in chunk sizes so as not to do it one by
+ * one.
+ * Preferrably we get one or two allocation.
+ */
+#define FLIST_CHUNK_SIZE (1024)
+
+/*
+ * These flags are part of the rsync protocol.
+ * They are sent as the first byte for a file transmission and encode
+ * information that affects subsequent transmissions.
+ */
+#define FLIST_MODE_SAME 0x0002 /* mode is repeat */
+#define FLIST_NAME_SAME 0x0020 /* name is repeat */
+#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */
+#define FLIST_TIME_SAME 0x0080 /* time is repeat */
+
+/*
+ * Requied way to sort a filename list.
+ */
+static int
+flist_cmp(const void *p1, const void *p2)
+{
+ const struct flist *f1 = p1, *f2 = p2;
+
+ return strcmp(f1->wpath, f2->wpath);
+}
+
+/*
+ * Deduplicate our file list (which may be zero-length).
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
+{
+ size_t i, j;
+ struct flist *new;
+ struct flist *f, *fnext;
+
+ if (0 == *sz)
+ return 1;
+
+ /* Create a new buffer, "new", and copy. */
+
+ new = calloc(*sz, sizeof(struct flist));
+ if (NULL == new) {
+ ERR(sess, "calloc");
+ return 0;
+ }
+
+ for (i = j = 0; i < *sz - 1; i++) {
+ f = &(*fl)[i];
+ fnext = &(*fl)[i + 1];
+
+ if (strcmp(f->wpath, fnext->wpath)) {
+ new[j++] = *f;
+ continue;
+ }
+
+ /*
+ * Our working (destination) paths are the same.
+ * If the actual file is the same (as given on the
+ * command-line), then we can just discard the first.
+ * Otherwise, we need to bail out: it means we have two
+ * different files with the relative path on the
+ * destination side.
+ */
+
+ if (0 == strcmp(f->path, fnext->path)) {
+ new[j++] = *f;
+ i++;
+ WARNX(sess, "%s: duplicate path: %s",
+ f->wpath, f->path);
+ free(fnext->path);
+ free(fnext->link);
+ fnext->path = fnext->link = NULL;
+ continue;
+ }
+
+ ERRX(sess, "%s: duplicate working path for "
+ "possibly different file: %s, %s",
+ f->wpath, f->path, fnext->path);
+ free(new);
+ return 0;
+ }
+
+ /* Don't forget the last entry. */
+
+ if (i == *sz - 1)
+ new[j++] = (*fl)[i];
+
+ /*
+ * Reassign to the deduplicated array.
+ * If we started out with *sz > 0, which we check for at the
+ * beginning, then we'll always continue having *sz > 0.
+ */
+
+ free(*fl);
+ *fl = new;
+ *sz = j;
+ assert(*sz);
+ return 1;
+}
+
+/*
+ * We're now going to find our top-level directories.
+ * This only applies to recursive mode.
+ * If we have the first element as the ".", then that's the "top
+ * directory" of our transfer.
+ * Otherwise, mark up all top-level directories in the set.
+ */
+static void
+flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
+{
+ size_t i;
+ const char *cp;
+
+ if ( ! sess->opts->recursive)
+ return;
+
+ if (flsz && strcmp(fl[0].wpath, ".")) {
+ for (i = 0; i < flsz; i++) {
+ if ( ! S_ISDIR(fl[i].st.mode))
+ continue;
+ cp = strchr(fl[i].wpath, '/');
+ if (NULL != cp && '\0' != cp[1])
+ continue;
+ fl[i].st.flags |= FLSTAT_TOP_DIR;
+ LOG4(sess, "%s: top-level", fl[i].wpath);
+ }
+ } else if (flsz) {
+ fl[0].st.flags |= FLSTAT_TOP_DIR;
+ LOG4(sess, "%s: top-level", fl[0].wpath);
+ }
+}
+
+/*
+ * Filter through the fts() file information.
+ * We want directories (pre-order), regular files, and symlinks.
+ * Everything else is skipped and possibly warned about.
+ * Return zero to skip, non-zero to examine.
+ */
+static int
+flist_fts_check(struct sess *sess, FTSENT *ent)
+{
+
+ if (FTS_F == ent->fts_info ||
+ FTS_D == ent->fts_info ||
+ FTS_SL == ent->fts_info ||
+ FTS_SLNONE == ent->fts_info)
+ return 1;
+
+ if (FTS_DC == ent->fts_info) {
+ WARNX(sess, "%s: directory cycle", ent->fts_path);
+ } else if (FTS_DNR == ent->fts_info) {
+ errno = ent->fts_errno;
+ WARN(sess, "%s: unreadable directory", ent->fts_path);
+ } else if (FTS_DOT == ent->fts_info) {
+ WARNX(sess, "%s: skipping dot-file", ent->fts_path);
+ } else if (FTS_ERR == ent->fts_info) {
+ errno = ent->fts_errno;
+ WARN(sess, "%s", ent->fts_path);
+ } else if (FTS_DEFAULT == ent->fts_info) {
+ WARNX(sess, "%s: skipping special", ent->fts_path);
+ } else if (FTS_NS == ent->fts_info) {
+ errno = ent->fts_errno;
+ WARN(sess, "%s: could not stat", ent->fts_path);
+ }
+
+ return 0;
+}
+
+/*
+ * Copy necessary elements in "st" into the fields of "f".
+ */
+static void
+flist_copy_stat(struct flist *f, const struct stat *st)
+{
+
+ f->st.mode = st->st_mode;
+ f->st.uid = st->st_uid;
+ f->st.gid = st->st_gid;
+ f->st.size = st->st_size;
+ f->st.mtime = st->st_mtime;
+}
+
+void
+flist_free(struct flist *f, size_t sz)
+{
+ size_t i;
+
+ if (NULL == f)
+ return;
+
+ for (i = 0; i < sz; i++) {
+ free(f[i].path);
+ free(f[i].link);
+ }
+ free(f);
+}
+
+/*
+ * Serialise our file list (which may be zero-length) to the wire.
+ * Makes sure that the receiver isn't going to block on sending us
+ * return messages on the log channel.
+ * Return zero on failure, non-zero on success.
+ */
+int
+flist_send(struct sess *sess, int fdin,
+ int fdout, const struct flist *fl, size_t flsz)
+{
+ size_t i, fnlen;
+ uint8_t flag;
+ const struct flist *f;
+ const char *fn;
+
+ /* Double-check that we've no pending multiplexed data. */
+
+ LOG2(sess, "sending file metadata list: %zu", flsz);
+
+ for (i = 0; i < flsz; i++) {
+ f = &fl[i];
+ fn = f->wpath;
+ fnlen = strlen(f->wpath);
+ assert(fnlen > 0);
+
+ /*
+ * If applicable, unclog the read buffer.
+ * This happens when the receiver has a lot of log
+ * messages and all we're doing is sending our file list
+ * without checking for messages.
+ */
+
+ if (sess->mplex_reads &&
+ io_read_check(sess, fdin) &&
+ ! io_read_flush(sess, fdin)) {
+ ERRX1(sess, "io_read_flush");
+ return 0;
+ }
+
+ /*
+ * For ease, make all of our filenames be "long"
+ * regardless their actual length.
+ * This also makes sure that we don't transmit a zero
+ * byte unintentionally.
+ */
+
+ flag = FLIST_NAME_LONG;
+
+ LOG3(sess, "%s: sending file metadata: "
+ "size %jd, mtime %jd, mode %o",
+ fn, (intmax_t)f->st.size,
+ (intmax_t)f->st.mtime, f->st.mode);
+
+ /* Now write to the wire. */
+ /* FIXME: buffer this. */
+
+ if ( ! io_write_byte(sess, fdout, flag)) {
+ ERRX1(sess, "io_write_byte");
+ return 0;
+ } else if ( ! io_write_int(sess, fdout, fnlen)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ } else if ( ! io_write_buf(sess, fdout, fn, fnlen)) {
+ ERRX1(sess, "io_write_buf");
+ return 0;
+ } else if ( ! io_write_long(sess, fdout, f->st.size)) {
+ ERRX1(sess, "io_write_long");
+ return 0;
+ } else if ( ! io_write_int(sess, fdout, f->st.mtime)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ } else if ( ! io_write_int(sess, fdout, f->st.mode)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ }
+
+ /* Optional link information. */
+
+ if (S_ISLNK(f->st.mode) &&
+ sess->opts->preserve_links) {
+ fn = f->link;
+ fnlen = strlen(f->link);
+ if ( ! io_write_int(sess, fdout, fnlen)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ }
+ if ( ! io_write_buf(sess, fdout, fn, fnlen)) {
+ ERRX1(sess, "io_write_int");
+ return 0;
+ }
+ }
+
+ if (S_ISREG(f->st.mode))
+ sess->total_size += f->st.size;
+ }
+
+ if ( ! io_write_byte(sess, fdout, 0)) {
+ ERRX1(sess, "io_write_byte");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Read the filename of a file list.
+ * This is the most expensive part of the file list transfer, so a lot
+ * of attention has gone into transmitting as little as possible.
+ * Micro-optimisation, but whatever.
+ * Fills in "f" with the full path on success.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_recv_name(struct sess *sess, int fd,
+ struct flist *f, uint8_t flags, char last[MAXPATHLEN])
+{
+ uint8_t bval;
+ size_t partial = 0;
+ size_t pathlen = 0, len;
+
+ /*
+ * Read our filename.
+ * If we have FLIST_NAME_SAME, we inherit some of the last
+ * transmitted name.
+ * If we have FLIST_NAME_LONG, then the string length is greater
+ * than byte-size.
+ */
+
+ if (FLIST_NAME_SAME & flags) {
+ if ( ! io_read_byte(sess, fd, &bval)) {
+ ERRX1(sess, "io_read_byte");
+ return 0;
+ }
+ partial = bval;
+ }
+
+ /* Get the (possibly-remaining) filename length. */
+
+ if (FLIST_NAME_LONG & flags) {
+ if ( ! io_read_size(sess, fd, &pathlen)) {
+ ERRX1(sess, "io_read_size");
+ return 0;
+ }
+ } else {
+ if ( ! io_read_byte(sess, fd, &bval)) {
+ ERRX1(sess, "io_read_byte");
+ return 0;
+ }
+ pathlen = bval;
+ }
+
+ /* Allocate our full filename length. */
+ /* FIXME: maximum pathname length. */
+
+ if (0 == (len = pathlen + partial)) {
+ ERRX(sess, "security violation: "
+ "zero-length pathname");
+ return 0;
+ }
+
+ if (NULL == (f->path = malloc(len + 1))) {
+ ERR(sess, "malloc");
+ return 0;
+ }
+ f->path[len] = '\0';
+
+ if (FLIST_NAME_SAME & flags)
+ memcpy(f->path, last, partial);
+
+ if ( ! io_read_buf(sess, fd, f->path + partial, pathlen)) {
+ ERRX1(sess, "io_read_buf");
+ return 0;
+ }
+
+ if ('/' == f->path[0]) {
+ ERRX(sess, "security violation: "
+ "absolute pathname: %s", f->path);
+ return 0;
+ }
+
+ if (NULL != strstr(f->path, "/../") ||
+ (len > 2 && 0 == strcmp(f->path + len - 3, "/..")) ||
+ (len > 2 && 0 == strncmp(f->path, "../", 3)) ||
+ 0 == strcmp(f->path, "..")) {
+ ERRX(sess, "%s: security violation: "
+ "backtracking pathname", f->path);
+ return 0;
+ }
+
+ /* Record our last path and construct our filename. */
+
+ strlcpy(last, f->path, MAXPATHLEN);
+ f->wpath = f->path;
+ return 1;
+}
+
+/*
+ * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_realloc(struct sess *sess,
+ struct flist **fl, size_t *sz, size_t *max)
+{
+ void *pp;
+
+ if (*sz + 1 <= *max) {
+ (*sz)++;
+ return 1;
+ }
+
+ pp = recallocarray(*fl, *max,
+ *max + FLIST_CHUNK_SIZE, sizeof(struct flist));
+ if (NULL == pp) {
+ ERR(sess, "recallocarray");
+ return 0;
+ }
+ *fl = pp;
+ *max += FLIST_CHUNK_SIZE;
+ (*sz)++;
+ return 1;
+}
+
+/*
+ * Copy a regular or symbolic link file "path" into "f".
+ * This handles the correct path creation and symbolic linking.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_append(struct sess *sess, struct flist *f,
+ struct stat *st, const char *path)
+{
+
+ /*
+ * Copy the full path for local addressing and transmit
+ * only the filename part for the receiver.
+ */
+
+ if (NULL == (f->path = strdup(path))) {
+ ERR(sess, "strdup");
+ return 0;
+ }
+
+ if (NULL == (f->wpath = strrchr(f->path, '/')))
+ f->wpath = f->path;
+ else
+ f->wpath++;
+
+ /*
+ * On the receiving end, we'll strip out all bits on the
+ * mode except for the file permissions.
+ * No need to warn about it here.
+ */
+
+ flist_copy_stat(f, st);
+
+ /* Optionally copy link information. */
+
+ if (S_ISLNK(st->st_mode)) {
+ f->link = symlink_read(sess, f->path);
+ if (NULL == f->link) {
+ ERRX1(sess, "symlink_read");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Receive a file list from the wire, filling in length "sz" (which may
+ * possibly be zero) and list "flp" on success.
+ * Return zero on failure, non-zero on success.
+ */
+int
+flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
+{
+ struct flist *fl = NULL;
+ struct flist *ff;
+ const struct flist *fflast = NULL;
+ size_t flsz = 0, flmax = 0, lsz;
+ uint8_t flag;
+ char last[MAXPATHLEN];
+ uint64_t lval; /* temporary values... */
+ int32_t ival;
+
+ last[0] = '\0';
+
+ for (;;) {
+ if ( ! io_read_byte(sess, fd, &flag)) {
+ ERRX1(sess, "io_read_byte");
+ goto out;
+ } else if (0 == flag)
+ break;
+
+ if ( ! flist_realloc(sess, &fl, &flsz, &flmax)) {
+ ERRX1(sess, "flist_realloc");
+ goto out;
+ }
+
+ ff = &fl[flsz - 1];
+ fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
+
+ /* Filename first. */
+
+ if ( ! flist_recv_name(sess, fd, ff, flag, last)) {
+ ERRX1(sess, "flist_recv_name");
+ goto out;
+ }
+
+ /* Read the file size. */
+
+ if ( ! io_read_ulong(sess, fd, &lval)) {
+ ERRX1(sess, "io_read_ulong");
+ goto out;
+ }
+ ff->st.size = lval;
+
+ /* Read the modification time. */
+
+ if ( ! (FLIST_TIME_SAME & flag)) {
+ if ( ! io_read_int(sess, fd, &ival)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+ ff->st.mtime = ival;
+ } else if (NULL == fflast) {
+ ERRX(sess, "same time without last entry");
+ goto out;
+ } else
+ ff->st.mtime = fflast->st.mtime;
+
+ /* Read the file mode. */
+
+ if ( ! (FLIST_MODE_SAME & flag)) {
+ if ( ! io_read_int(sess, fd, &ival)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+ ff->st.mode = ival;
+ } else if (NULL == fflast) {
+ ERRX(sess, "same mode without last entry");
+ goto out;
+ } else
+ ff->st.mode = fflast->st.mode;
+
+ /* Optionally read the link information. */
+
+ if (S_ISLNK(ff->st.mode) &&
+ sess->opts->preserve_links) {
+ if ( ! io_read_size(sess, fd, &lsz)) {
+ ERRX1(sess, "io_read_size");
+ goto out;
+ } else if (0 == lsz) {
+ ERRX(sess, "empty link name");
+ goto out;
+ }
+ ff->link = calloc(lsz + 1, 1);
+ if (NULL == ff->link) {
+ ERR(sess, "calloc");
+ goto out;
+ }
+ if ( ! io_read_buf(sess, fd, ff->link, lsz)) {
+ ERRX1(sess, "io_read_buf");
+ goto out;
+ }
+ }
+
+ LOG3(sess, "%s: received file metadata: "
+ "size %jd, mtime %jd, mode %o",
+ ff->path, (intmax_t)ff->st.size,
+ (intmax_t)ff->st.mtime, ff->st.mode);
+
+ if (S_ISREG(ff->st.mode))
+ sess->total_size += ff->st.size;
+ }
+
+ /* Remember to order the received list. */
+
+ LOG2(sess, "received file metadata list: %zu", flsz);
+ qsort(fl, flsz, sizeof(struct flist), flist_cmp);
+ flist_topdirs(sess, fl, flsz);
+ *sz = flsz;
+ *flp = fl;
+ return 1;
+out:
+ flist_free(fl, flsz);
+ *sz = 0;
+ *flp = NULL;
+ return 0;
+}
+
+/*
+ * Generate a flist possibly-recursively given a file root, which may
+ * also be a regular file or symlink.
+ * On success, augments the generated list in "flp" of length "sz".
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_gen_dirent(struct sess *sess, char *root,
+ struct flist **fl, size_t *sz, size_t *max)
+{
+ char *cargv[2], *cp;
+ int rc = 0;
+ FTS *fts;
+ FTSENT *ent;
+ struct flist *f;
+ size_t flsz = 0, stripdir;
+ struct stat st;
+
+ cargv[0] = root;
+ cargv[1] = NULL;
+
+ /*
+ * If we're a file, then revert to the same actions we use for
+ * the non-recursive scan.
+ */
+
+ if (-1 == lstat(root, &st)) {
+ ERR(sess, "%s: lstat", root);
+ return 0;
+ } else if (S_ISREG(st.st_mode)) {
+ if ( ! flist_realloc(sess, fl, sz, max)) {
+ ERRX1(sess, "flist_realloc");
+ return 0;
+ }
+ f = &(*fl)[(*sz) - 1];
+ assert(NULL != f);
+
+ if ( ! flist_append(sess, f, &st, root)) {
+ ERRX1(sess, "flist_append");
+ return 0;
+ } else if (-1 == unveil(root, "r")) {
+ ERR(sess, "%s: unveil", root);
+ return 0;
+ }
+ return 1;
+ } else if (S_ISLNK(st.st_mode)) {
+ if ( ! sess->opts->preserve_links) {
+ WARNX(sess, "%s: skipping symlink", root);
+ return 1;
+ } else if ( ! flist_realloc(sess, fl, sz, max)) {
+ ERRX1(sess, "flist_realloc");
+ return 0;
+ }
+ f = &(*fl)[(*sz) - 1];
+ assert(NULL != f);
+
+ if ( ! flist_append(sess, f, &st, root)) {
+ ERRX1(sess, "flist_append");
+ return 0;
+ } else if (-1 == unveil(root, "r")) {
+ ERR(sess, "%s: unveil", root);
+ return 0;
+ }
+ return 1;
+ } else if ( ! S_ISDIR(st.st_mode)) {
+ WARNX(sess, "%s: skipping special", root);
+ return 1;
+ }
+
+ /*
+ * If we end with a slash, it means that we're not supposed to
+ * copy the directory part itself---only the contents.
+ * So set "stripdir" to be what we take out.
+ */
+
+ stripdir = strlen(root);
+ assert(stripdir > 0);
+ if ('/' != root[stripdir - 1])
+ stripdir = 0;
+
+ /*
+ * If we're not stripping anything, then see if we need to strip
+ * out the leading material in the path up to and including the
+ * last directory component.
+ */
+
+ if (0 == stripdir)
+ if (NULL != (cp = strrchr(root, '/')))
+ stripdir = cp - root + 1;
+
+ /*
+ * If we're recursive, then we need to take down all of the
+ * files and directory components, so use fts(3).
+ * Copying the information file-by-file into the flstat.
+ * We'll make sense of it in flist_send.
+ */
+
+ if (NULL == (fts = fts_open(cargv, FTS_PHYSICAL, NULL))) {
+ ERR(sess, "fts_open");
+ return 0;
+ }
+
+ errno = 0;
+ while (NULL != (ent = fts_read(fts))) {
+ if ( ! flist_fts_check(sess, ent)) {
+ errno = 0;
+ continue;
+ }
+
+ /* We don't allow symlinks without -l. */
+
+ assert(NULL != ent->fts_statp);
+ if (S_ISLNK(ent->fts_statp->st_mode) &&
+ ! sess->opts->preserve_links) {
+ WARNX(sess, "%s: skipping "
+ "symlink", ent->fts_path);
+ continue;
+ }
+
+ /* Allocate a new file entry. */
+
+ if ( ! flist_realloc(sess, fl, sz, max)) {
+ ERRX1(sess, "flist_realloc");
+ goto out;
+ }
+ flsz++;
+ f = &(*fl)[*sz - 1];
+
+ /* Our path defaults to "." for the root. */
+
+ if ('\0' == ent->fts_path[stripdir]) {
+ if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
+ ERR(sess, "asprintf");
+ f->path = NULL;
+ goto out;
+ }
+ } else {
+ if (NULL == (f->path = strdup(ent->fts_path))) {
+ ERR(sess, "strdup");
+ goto out;
+ }
+ }
+
+ f->wpath = f->path + stripdir;
+ flist_copy_stat(f, ent->fts_statp);
+
+ /* Optionally copy link information. */
+
+ if (S_ISLNK(ent->fts_statp->st_mode)) {
+ f->link = symlink_read(sess, f->path);
+ if (NULL == f->link) {
+ ERRX1(sess, "symlink_read");
+ goto out;
+ }
+ }
+
+ /* Reset errno for next fts_read() call. */
+ errno = 0;
+ }
+ if (errno) {
+ ERR(sess, "fts_read");
+ goto out;
+ } else if (-1 == unveil(root, "r")) {
+ ERR(sess, "%s: unveil", root);
+ goto out;
+ }
+
+ LOG3(sess, "generated %zu filenames: %s", flsz, root);
+ rc = 1;
+out:
+ fts_close(fts);
+ return rc;
+}
+
+/*
+ * Generate a flist recursively given the array of directories (or
+ * files, symlinks, doesn't matter) specified in argv (argc >0).
+ * On success, stores the generated list in "flp" with length "sz",
+ * which may be zero.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_gen_dirs(struct sess *sess, size_t argc,
+ char **argv, struct flist **flp, size_t *sz)
+{
+ size_t i, max = 0;
+
+ for (i = 0; i < argc; i++)
+ if ( ! flist_gen_dirent(sess, argv[i], flp, sz, &max))
+ break;
+
+ if (i == argc) {
+ LOG2(sess, "recursively generated %zu filenames", *sz);
+ return 1;
+ }
+
+ ERRX1(sess, "flist_gen_dirent");
+ flist_free(*flp, max);
+ *flp = NULL;
+ *sz = 0;
+ return 0;
+}
+
+/*
+ * Generate list of files from the command-line argc (>0) and argv.
+ * On success, stores the generated list in "flp" with length "sz",
+ * which may be zero.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+flist_gen_files(struct sess *sess, size_t argc,
+ char **argv, struct flist **flp, size_t *sz)
+{
+ struct flist *fl = NULL, *f;
+ size_t i, flsz = 0;
+ struct stat st;
+
+ assert(argc);
+
+ if (NULL == (fl = calloc(argc, sizeof(struct flist)))) {
+ ERR(sess, "calloc");
+ return 0;
+ }
+
+ for (i = 0; i < argc; i++) {
+ if ('\0' == argv[i][0])
+ continue;
+ if (-1 == lstat(argv[i], &st)) {
+ ERR(sess, "%s: lstat", argv[i]);
+ goto out;
+ }
+
+ /*
+ * File type checks.
+ * In non-recursive mode, we don't accept directories.
+ * We also skip symbolic links without -l.
+ * Beyond that, we only accept regular files.
+ */
+
+ if (S_ISDIR(st.st_mode)) {
+ WARNX(sess, "%s: skipping directory", argv[i]);
+ continue;
+ } else if (S_ISLNK(st.st_mode)) {
+ if ( ! sess->opts->preserve_links) {
+ WARNX(sess, "%s: skipping "
+ "symlink", argv[i]);
+ continue;
+ }
+ } else if ( ! S_ISREG(st.st_mode)) {
+ WARNX(sess, "%s: skipping special", argv[i]);
+ continue;
+ }
+
+ f = &fl[flsz++];
+ assert(NULL != f);
+
+ /* Add this file to our file-system worldview. */
+
+ if (-1 == unveil(argv[i], "r")) {
+ ERR(sess, "%s: unveil", argv[i]);
+ goto out;
+ } else if ( ! flist_append(sess, f, &st, argv[i])) {
+ ERRX1(sess, "flist_append");
+ goto out;
+ }
+ }
+
+ LOG2(sess, "non-recursively generated %zu filenames", flsz);
+ *sz = flsz;
+ *flp = fl;
+ return 1;
+out:
+ flist_free(fl, argc);
+ *sz = 0;
+ *flp = NULL;
+ return 0;
+}
+
+/*
+ * Generate a sorted, de-duplicated list of file metadata.
+ * In non-recursive mode (the default), we use only the files we're
+ * given.
+ * Otherwise, directories are recursively examined.
+ * Returns zero on failure, non-zero on success.
+ * On success, "fl" will need to be freed with flist_free().
+ */
+int
+flist_gen(struct sess *sess, size_t argc,
+ char **argv, struct flist **flp, size_t *sz)
+{
+ int rc;
+
+ assert(argc > 0);
+ rc = sess->opts->recursive ?
+ flist_gen_dirs(sess, argc, argv, flp, sz) :
+ flist_gen_files(sess, argc, argv, flp, sz);
+
+ /* After scanning, lock our file-system view. */
+
+ if (-1 == unveil(NULL, NULL)) {
+ ERR(sess, "unveil");
+ return 0;
+ } else if ( ! rc)
+ return 0;
+
+ qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
+
+ if (flist_dedupe(sess, flp, sz)) {
+ flist_topdirs(sess, *flp, *sz);
+ return 1;
+ }
+
+ ERRX1(sess, "flist_dedupe");
+ flist_free(*flp, *sz);
+ *flp = NULL;
+ *sz = 0;
+ return 0;
+}
+
+/*
+ * Generate a list of files in root to delete that are within the
+ * top-level directories stipulated by "wfl".
+ * Only handles symbolic links, directories, and regular files.
+ * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
+ * on success.
+ * On success, "fl" will need to be freed with flist_free().
+ */
+int
+flist_gen_dels(struct sess *sess, const char *root,
+ struct flist **fl, size_t *sz,
+ const struct flist *wfl, size_t wflsz)
+{
+ char **cargv = NULL;
+ int rc = 0, c;
+ FTS *fts = NULL;
+ FTSENT *ent;
+ struct flist *f;
+ size_t cargvs = 0, i, j, max = 0, stripdir;
+ ENTRY hent;
+ ENTRY *hentp;
+
+ *fl = NULL;
+ *sz = 0;
+
+ /* Only run this code when we're recursive. */
+
+ if ( ! sess->opts->recursive)
+ return 1;
+
+ /*
+ * Gather up all top-level directories for scanning.
+ * This is stipulated by rsync's --delete behaviour, where we
+ * only delete things in the top-level directories given on the
+ * command line.
+ */
+
+ assert(wflsz > 0);
+ for (i = 0; i < wflsz; i++)
+ if (FLSTAT_TOP_DIR & wfl[i].st.flags)
+ cargvs++;
+ if (0 == cargvs)
+ return 1;
+
+ if (NULL == (cargv = calloc(cargvs + 1, sizeof(char *)))) {
+ ERR(sess, "calloc");
+ return 0;
+ }
+
+ /*
+ * If we're given just a "." as the first entry, that means
+ * we're doing a relative copy with a trailing slash.
+ * Special-case this just for the sake of simplicity.
+ * Otherwise, look through all top-levels.
+ */
+
+ if (wflsz && 0 == strcmp(wfl[0].wpath, ".")) {
+ assert(1 == cargvs);
+ assert(S_ISDIR(wfl[0].st.mode));
+ if (asprintf(&cargv[0], "%s/", root) < 0) {
+ ERR(sess, "asprintf");
+ cargv[0] = NULL;
+ goto out;
+ }
+ cargv[1] = NULL;
+ } else {
+ for (i = j = 0; i < wflsz; i++) {
+ if ( ! (FLSTAT_TOP_DIR & wfl[i].st.flags))
+ continue;
+ assert(S_ISDIR(wfl[i].st.mode));
+ assert(strcmp(wfl[i].wpath, "."));
+ c = asprintf(&cargv[j],
+ "%s/%s", root, wfl[i].wpath);
+ if (c < 0) {
+ ERR(sess, "asprintf");
+ cargv[j] = NULL;
+ goto out;
+ }
+ LOG4(sess, "%s: will scan "
+ "for deletions", cargv[j]);
+ j++;
+ }
+ assert(j == cargvs);
+ cargv[j] = NULL;
+ }
+
+ LOG2(sess, "delete from %zu directories", cargvs);
+
+ /*
+ * Next, use the standard hcreate(3) hashtable interface to hash
+ * all of the files that we want to synchronise.
+ * This way, we'll be able to determine which files we want to
+ * delete in O(n) time instead of O(n * search) time.
+ * Plus, we can do the scan in-band and only allocate the files
+ * we want to delete.
+ */
+
+ if ( ! hcreate(wflsz)) {
+ ERR(sess, "hcreate");
+ goto out;
+ }
+
+ for (i = 0; i < wflsz; i++) {
+ memset(&hent, 0, sizeof(ENTRY));
+ if (NULL == (hent.key = strdup(wfl[i].wpath))) {
+ ERR(sess, "strdup");
+ goto out;
+ }
+ if (NULL == (hentp = hsearch(hent, ENTER))) {
+ ERR(sess, "hsearch");
+ goto out;
+ } else if (hentp->key != hent.key) {
+ ERRX(sess, "%s: duplicate", wfl[i].wpath);
+ free(hent.key);
+ goto out;
+ }
+ }
+
+ /*
+ * Now we're going to try to descend into all of the top-level
+ * directories stipulated by the file list.
+ * If the directories don't exist, it's ok.
+ */
+
+ if (NULL == (fts = fts_open(cargv, FTS_PHYSICAL, NULL))) {
+ ERR(sess, "fts_open");
+ goto out;
+ }
+
+ stripdir = strlen(root) + 1;
+ errno = 0;
+ while (NULL != (ent = fts_read(fts))) {
+ if (FTS_NS == ent->fts_info)
+ continue;
+ if ( ! flist_fts_check(sess, ent)) {
+ errno = 0;
+ continue;
+ } else if (stripdir >= ent->fts_pathlen)
+ continue;
+
+ /* Look up in hashtable. */
+
+ memset(&hent, 0, sizeof(ENTRY));
+ hent.key = ent->fts_path + stripdir;
+ if (NULL != hsearch(hent, FIND))
+ continue;
+
+ /* Not found: we'll delete it. */
+
+ if ( ! flist_realloc(sess, fl, sz, &max)) {
+ ERRX1(sess, "flist_realloc");
+ goto out;
+ }
+ f = &(*fl)[*sz - 1];
+
+ if (NULL == (f->path = strdup(ent->fts_path))) {
+ ERR(sess, "strdup");
+ goto out;
+ }
+ f->wpath = f->path + stripdir;
+ assert(NULL != ent->fts_statp);
+ flist_copy_stat(f, ent->fts_statp);
+ errno = 0;
+ }
+
+ if (errno) {
+ ERR(sess, "fts_read");
+ goto out;
+ }
+
+ qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
+ rc = 1;
+out:
+ if (NULL != fts)
+ fts_close(fts);
+ for (i = 0; i < cargvs; i++)
+ free(cargv[i]);
+ free(cargv);
+ hdestroy();
+ return rc;
+}
+
+/*
+ * Delete all files and directories in "fl".
+ * If called with a zero-length "fl", does nothing.
+ * If dry_run is specified, simply write what would be done.
+ * Return zero on failure, non-zero on success.
+ */
+int
+flist_del(struct sess *sess, int root,
+ const struct flist *fl, size_t flsz)
+{
+ ssize_t i;
+ int flag;
+
+ if (0 == flsz)
+ return 1;
+
+ assert(sess->opts->del);
+ assert(sess->opts->recursive);
+
+ for (i = flsz - 1; i >= 0; i--) {
+ LOG1(sess, "%s: deleting", fl[i].wpath);
+ if (sess->opts->dry_run)
+ continue;
+ assert(-1 != root);
+ flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
+ if (-1 == unlinkat(root, fl[i].wpath, flag) &&
+ ENOENT != errno) {
+ ERR(sess, "%s: unlinkat", fl[i].wpath);
+ return 0;
+ }
+ }
+
+ return 1;
+}
diff --git a/usr.bin/rsync/hash.c b/usr.bin/rsync/hash.c
new file mode 100644
index 00000000000..97c12db25b1
--- /dev/null
+++ b/usr.bin/rsync/hash.c
@@ -0,0 +1,94 @@
+/* $Id: hash.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+
+#include <assert.h>
+#include <endian.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "extern.h"
+#include "md4.h"
+
+/*
+ * A fast 32-bit hash.
+ * Described in Tridgell's "Efficient Algorithms for Sorting and
+ * Synchronization" thesis and the "Rolling checksum" document.
+ */
+uint32_t
+hash_fast(const void *buf, size_t len)
+{
+ size_t i = 0;
+ uint32_t a = 0, /* part of a(k, l) */
+ b = 0; /* b(k, l) */
+ const signed char *dat = buf;
+
+ if (len > 4)
+ for ( ; i < len - 4; i += 4) {
+ b += 4 * (a + dat[i]) +
+ 3 * dat[i + 1] +
+ 2 * dat[i + 2] +
+ dat[i + 3];
+ a += dat[i + 0] +
+ dat[i + 1] +
+ dat[i + 2] +
+ dat[i + 3];
+ }
+
+ for ( ; i < len; i++) {
+ a += dat[i];
+ b += a;
+ }
+
+ /* s(k, l) = (eps % M) + 2^16 b(k, l) % M */
+
+ return (a & 0xffff) + (b << 16);
+}
+
+/*
+ * Slow MD4-based hash with trailing seed.
+ */
+void
+hash_slow(const void *buf, size_t len,
+ unsigned char *md, const struct sess *sess)
+{
+ MD4_CTX ctx;
+ int32_t seed = htole32(sess->seed);
+
+ MD4_Init(&ctx);
+ MD4_Update(&ctx, buf, len);
+ MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
+ MD4_Final(md, &ctx);
+}
+
+/*
+ * Hash an entire file.
+ * This is similar to hash_slow() except the seed is hashed at the end
+ * of the sequence, not the beginning.
+ */
+void
+hash_file(const void *buf, size_t len,
+ unsigned char *md, const struct sess *sess)
+{
+ MD4_CTX ctx;
+ int32_t seed = htole32(sess->seed);
+
+ MD4_Init(&ctx);
+ MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
+ MD4_Update(&ctx, buf, len);
+ MD4_Final(md, &ctx);
+}
diff --git a/usr.bin/rsync/io.c b/usr.bin/rsync/io.c
new file mode 100644
index 00000000000..630701dbb13
--- /dev/null
+++ b/usr.bin/rsync/io.c
@@ -0,0 +1,585 @@
+/* $Id: io.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <endian.h>
+#include <errno.h>
+#include <poll.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+int
+io_read_check(struct sess *sess, int fd)
+{
+ struct pollfd pfd;
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+
+ if (poll(&pfd, 1, 0) < 0) {
+ ERR(sess, "poll");
+ return -1;
+ }
+ return pfd.revents & POLLIN;
+}
+
+/*
+ * Write buffer to non-blocking descriptor.
+ * Returns zero on failure, non-zero on success (zero or more bytes).
+ */
+static int
+io_write_nonblocking(struct sess *sess,
+ int fd, const void *buf, size_t bsz, size_t *sz)
+{
+ struct pollfd pfd;
+ ssize_t wsz;
+
+ *sz = 0;
+
+ if (0 == bsz)
+ return 1;
+
+ pfd.fd = fd;
+ pfd.events = POLLOUT;
+
+ if (poll(&pfd, 1, INFTIM) < 0) {
+ ERR(sess, "poll");
+ return 0;
+ }
+ if ((pfd.revents & (POLLERR|POLLNVAL))) {
+ ERRX(sess, "poll: bad fd");
+ return 0;
+ } else if ((pfd.revents & POLLHUP)) {
+ ERRX(sess, "poll: hangup");
+ return 0;
+ } else if ( ! (pfd.revents & POLLOUT)) {
+ ERRX(sess, "poll: unknown event");
+ return 0;
+ }
+
+ if ((wsz = write(fd, buf, bsz)) < 0) {
+ ERR(sess, "write");
+ return 0;
+ }
+
+ *sz = wsz;
+ return 1;
+}
+
+/*
+ * Blocking write of the full size of the buffer.
+ * Returns 0 on failure, non-zero on success (all bytes written).
+ */
+static int
+io_write_blocking(struct sess *sess,
+ int fd, const void *buf, size_t sz)
+{
+ size_t wsz;
+ int c;
+
+ while (sz > 0) {
+ c = io_write_nonblocking(sess, fd, buf, sz, &wsz);
+ if ( ! c) {
+ ERRX1(sess, "io_write_nonblocking");
+ return 0;
+ } else if (0 == wsz) {
+ ERRX(sess, "io_write_nonblocking: short write");
+ return 0;
+ }
+ buf += wsz;
+ sz -= wsz;
+ }
+
+ return 1;
+}
+
+/*
+ * Write "buf" of size "sz" to non-blocking descriptor.
+ * Returns zero on failure, non-zero on success (all bytes written to
+ * the descriptor).
+ */
+int
+io_write_buf(struct sess *sess, int fd, const void *buf, size_t sz)
+{
+ int32_t tag, tagbuf;
+ size_t wsz;
+ int c;
+
+ if ( ! sess->mplex_writes) {
+ c = io_write_blocking(sess, fd, buf, sz);
+ sess->total_write += sz;
+ return c;
+ }
+
+ while (sz > 0) {
+ wsz = sz & 0xFFFFFF;
+ tag = (7 << 24) + wsz;
+ tagbuf = htole32(tag);
+ if ( ! io_write_blocking(sess, fd, &tagbuf, sizeof(tagbuf))) {
+ ERRX1(sess, "io_write_blocking");
+ return 0;
+ }
+ if ( ! io_write_blocking(sess, fd, buf, wsz)) {
+ ERRX1(sess, "io_write_blocking");
+ return 0;
+ }
+ sess->total_write += wsz;
+ sz -= wsz;
+ buf += wsz;
+ }
+
+ return 1;
+}
+
+/*
+ * Write "line" (NUL-terminated) followed by a newline.
+ * Returns zero on failure, non-zero on succcess.
+ */
+int
+io_write_line(struct sess *sess, int fd, const char *line)
+{
+
+ if ( ! io_write_buf(sess, fd, line, strlen(line)))
+ ERRX1(sess, "io_write_buf");
+ else if ( ! io_write_byte(sess, fd, '\n'))
+ ERRX1(sess, "io_write_byte");
+ else
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Read buffer from non-blocking descriptor.
+ * Returns zero on failure, non-zero on success (zero or more bytes).
+ */
+static int
+io_read_nonblocking(struct sess *sess,
+ int fd, void *buf, size_t bsz, size_t *sz)
+{
+ struct pollfd pfd;
+ ssize_t rsz;
+
+ *sz = 0;
+
+ if (0 == bsz)
+ return 1;
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+
+ if (poll(&pfd, 1, INFTIM) < 0) {
+ ERR(sess, "poll");
+ return 0;
+ }
+ if ((pfd.revents & (POLLERR|POLLNVAL))) {
+ ERRX(sess, "poll: bad fd");
+ return 0;
+ } else if ( ! (pfd.revents & (POLLIN|POLLHUP))) {
+ ERRX(sess, "poll: unknown event");
+ return 0;
+ }
+
+ if ((rsz = read(fd, buf, bsz)) < 0) {
+ ERR(sess, "read");
+ return 0;
+ } else if (0 == rsz) {
+ ERRX(sess, "unexpected end of file");
+ return 0;
+ }
+
+ *sz = rsz;
+ return 1;
+}
+
+/*
+ * Blocking read of the full size of the buffer.
+ * This can be called from either the error type message or a regular
+ * message---or for that matter, multiplexed or not.
+ * Returns 0 on failure, non-zero on success (all bytes read).
+ */
+static int
+io_read_blocking(struct sess *sess,
+ int fd, void *buf, size_t sz)
+{
+ size_t rsz;
+ int c;
+
+ while (sz > 0) {
+ c = io_read_nonblocking(sess, fd, buf, sz, &rsz);
+ if ( ! c) {
+ ERRX1(sess, "io_read_nonblocking");
+ return 0;
+ } else if (0 == rsz) {
+ ERRX(sess, "io_read_nonblocking: short read");
+ return 0;
+ }
+ buf += rsz;
+ sz -= rsz;
+ }
+
+ return 1;
+}
+
+/*
+ * When we do a lot of writes in a row (such as when the sender emits
+ * the file list), the server might be sending us multiplexed log
+ * messages.
+ * If it sends too many, it clogs the socket.
+ * This function looks into the read buffer and clears out any log
+ * messages pending.
+ * If called when there are valid data reads available, this function
+ * does nothing.
+ * Returns zero on failure, non-zero on success.
+ */
+int
+io_read_flush(struct sess *sess, int fd)
+{
+ int32_t tagbuf, tag;
+ char mpbuf[1024];
+
+ if (sess->mplex_read_remain)
+ return 1;
+
+ /*
+ * First, read the 4-byte multiplex tag.
+ * The first byte is the tag identifier (7 for normal
+ * data, !7 for out-of-band data), the last three are
+ * for the remaining data size.
+ */
+
+ if ( ! io_read_blocking(sess, fd, &tagbuf, sizeof(tagbuf))) {
+ ERRX1(sess, "io_read_blocking");
+ return 0;
+ }
+ tag = le32toh(tagbuf);
+ sess->mplex_read_remain = tag & 0xFFFFFF;
+ tag >>= 24;
+ if (7 == tag)
+ return 1;
+
+ tag -= 7;
+
+ if (sess->mplex_read_remain > sizeof(mpbuf)) {
+ ERRX(sess, "multiplex buffer overflow");
+ return 0;
+ } else if (0 == sess->mplex_read_remain)
+ return 1;
+
+ if ( ! io_read_blocking(sess, fd,
+ mpbuf, sess->mplex_read_remain)) {
+ ERRX1(sess, "io_read_blocking");
+ return 0;
+ }
+ if ('\n' == mpbuf[sess->mplex_read_remain - 1])
+ mpbuf[--sess->mplex_read_remain] = '\0';
+
+ /*
+ * Always print the server's messages, as the server
+ * will control its own log levelling.
+ */
+
+ LOG0(sess, "%.*s", (int)sess->mplex_read_remain, mpbuf);
+ sess->mplex_read_remain = 0;
+
+ /*
+ * I only know that a tag of one means an error.
+ * This means that we should exit.
+ */
+
+ if (1 == tag) {
+ ERRX1(sess, "error from remote host");
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Read buffer from non-blocking descriptor, possibly in multiplex read
+ * mode.
+ * Returns zero on failure, non-zero on success (all bytes read from
+ * the descriptor).
+ */
+int
+io_read_buf(struct sess *sess, int fd, void *buf, size_t sz)
+{
+ size_t rsz;
+ int c;
+
+ /* If we're not multiplexing, read directly. */
+
+ if ( ! sess->mplex_reads) {
+ assert(0 == sess->mplex_read_remain);
+ c = io_read_blocking(sess, fd, buf, sz);
+ sess->total_read += sz;
+ return c;
+ }
+
+ while (sz > 0) {
+ /*
+ * First, check to see if we have any regular data
+ * hanging around waiting to be read.
+ * If so, read the lesser of that data and whatever
+ * amount we currently want.
+ */
+
+ if (sess->mplex_read_remain) {
+ rsz = sess->mplex_read_remain < sz ?
+ sess->mplex_read_remain : sz;
+ if ( ! io_read_blocking(sess, fd, buf, rsz)) {
+ ERRX1(sess, "io_read_blocking");
+ return 0;
+ }
+ sz -= rsz;
+ sess->mplex_read_remain -= rsz;
+ buf += rsz;
+ sess->total_read += rsz;
+ continue;
+ }
+
+ assert(0 == sess->mplex_read_remain);
+ if ( ! io_read_flush(sess, fd)) {
+ ERRX1(sess, "io_read_flush");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int
+io_write_long(struct sess *sess, int fd, int64_t val)
+{
+ int64_t nv;
+
+ /* Short-circuit: send as an integer if possible. */
+
+ if (val <= INT32_MAX && val >= 0)
+ return io_write_int(sess, fd, (int32_t)val);
+
+ /* Otherwise, pad with max integer, then send 64-bit. */
+
+ nv = htole64(val);
+
+ if ( ! io_write_int(sess, fd, INT32_MAX))
+ ERRX(sess, "io_write_int");
+ else if ( ! io_write_buf(sess, fd, &nv, sizeof(int64_t)))
+ ERRX(sess, "io_write_buf");
+ else
+ return 1;
+
+ return 0;
+}
+
+int
+io_write_int(struct sess *sess, int fd, int32_t val)
+{
+ int32_t nv;
+
+ nv = htole32(val);
+
+ if ( ! io_write_buf(sess, fd, &nv, sizeof(int32_t))) {
+ ERRX(sess, "io_write_buf");
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * A simple assertion-protected memory copy from th einput "val" or size
+ * "valsz" into our buffer "buf", full size "buflen", position "bufpos".
+ * Increases our "bufpos" appropriately.
+ * This has no return value, but will assert() if the size of the buffer
+ * is insufficient for the new data.
+ */
+void
+io_buffer_buf(struct sess *sess, void *buf,
+ size_t *bufpos, size_t buflen, const void *val, size_t valsz)
+{
+
+ assert(*bufpos + valsz <= buflen);
+ memcpy(buf + *bufpos, val, valsz);
+ *bufpos += valsz;
+}
+
+/*
+ * Converts "val" to LE prior to io_buffer_buf().
+ */
+void
+io_buffer_int(struct sess *sess, void *buf,
+ size_t *bufpos, size_t buflen, int32_t val)
+{
+ int32_t nv = htole32(val);
+
+ io_buffer_buf(sess, buf, bufpos,
+ buflen, &nv, sizeof(int32_t));
+}
+
+int
+io_read_ulong(struct sess *sess, int fd, uint64_t *val)
+{
+ int64_t oval;
+
+ if ( ! io_read_long(sess, fd, &oval)) {
+ ERRX(sess, "io_read_int");
+ return 0;
+ } else if (oval < 0) {
+ ERRX(sess, "io_read_size: negative value");
+ return 1;
+ }
+
+ *val = oval;
+ return 1;
+}
+
+int
+io_read_long(struct sess *sess, int fd, int64_t *val)
+{
+ int64_t oval;
+ int32_t sval;
+
+ /* Start with the short-circuit: read as an int. */
+
+ if ( ! io_read_int(sess, fd, &sval)) {
+ ERRX(sess, "io_read_int");
+ return 0;
+ } else if (INT32_MAX != sval) {
+ *val = sval;
+ return 1;
+ }
+
+ /* If the int is maximal, read as 64 bits. */
+
+ if ( ! io_read_buf(sess, fd, &oval, sizeof(int64_t))) {
+ ERRX(sess, "io_read_buf");
+ return 0;
+ }
+
+ *val = le64toh(oval);
+ return 1;
+}
+
+/*
+ * One thing we often need to do is read a size_t.
+ * These are transmitted as int32_t, so make sure that the value
+ * transmitted is not out of range.
+ * FIXME: I assume that size_t can handle int32_t's max.
+ */
+int
+io_read_size(struct sess *sess, int fd, size_t *val)
+{
+ int32_t oval;
+
+ if ( ! io_read_int(sess, fd, &oval)) {
+ ERRX(sess, "io_read_int");
+ return 0;
+ } else if (oval < 0) {
+ ERRX(sess, "io_read_size: negative value");
+ return 0;
+ }
+
+ *val = oval;
+ return 1;
+}
+
+int
+io_read_int(struct sess *sess, int fd, int32_t *val)
+{
+ int32_t oval;
+
+ if ( ! io_read_buf(sess, fd, &oval, sizeof(int32_t))) {
+ ERRX(sess, "io_read_buf");
+ return 0;
+ }
+
+ *val = le32toh(oval);
+ return 1;
+}
+
+/*
+ * Copies "valsz" from "buf", full size "bufsz" at position" bufpos",
+ * into "val".
+ * Calls assert() if the source doesn't have enough data.
+ * Increases "bufpos" to the new position.
+ */
+void
+io_unbuffer_buf(struct sess *sess, const void *buf,
+ size_t *bufpos, size_t bufsz, void *val, size_t valsz)
+{
+
+ assert(*bufpos + valsz <= bufsz);
+ memcpy(val, buf + *bufpos, valsz);
+ *bufpos += valsz;
+}
+
+/*
+ * Calls io_unbuffer_buf() and converts from LE.
+ */
+void
+io_unbuffer_int(struct sess *sess, const void *buf,
+ size_t *bufpos, size_t bufsz, int32_t *val)
+{
+ int32_t oval;
+
+ io_unbuffer_buf(sess, buf, bufpos,
+ bufsz, &oval, sizeof(int32_t));
+ *val = le32toh(oval);
+}
+
+int
+io_unbuffer_size(struct sess *sess, const void *buf,
+ size_t *bufpos, size_t bufsz, size_t *val)
+{
+ int32_t oval;
+
+ io_unbuffer_int(sess, buf, bufpos, bufsz, &oval);
+ if (oval < 0) {
+ ERRX(sess, "io_unbuffer_size: negative value");
+ return 0;
+ }
+ *val = oval;
+ return 1;
+}
+
+int
+io_read_byte(struct sess *sess, int fd, uint8_t *val)
+{
+
+ if ( ! io_read_buf(sess, fd, val, sizeof(uint8_t))) {
+ ERRX(sess, "io_read_buf");
+ return 0;
+ }
+ return 1;
+}
+
+int
+io_write_byte(struct sess *sess, int fd, uint8_t val)
+{
+
+ if ( ! io_write_buf(sess, fd, &val, sizeof(uint8_t))) {
+ ERRX(sess, "io_write_buf");
+ return 0;
+ }
+ return 1;
+}
+
diff --git a/usr.bin/rsync/log.c b/usr.bin/rsync/log.c
new file mode 100644
index 00000000000..093264e3bb4
--- /dev/null
+++ b/usr.bin/rsync/log.c
@@ -0,0 +1,194 @@
+/* $Id: log.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "extern.h"
+
+/*
+ * Log a message at level "level", starting at zero, which corresponds
+ * to the current verbosity level opts->verbose (whose verbosity starts
+ * at one).
+ */
+void
+rsync_log(struct sess *sess, const char *fname,
+ size_t line, int level, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ if (sess->opts->verbose < level + 1)
+ return;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ if (level <= 0 && NULL != buf)
+ fprintf(stderr, "%s\n", buf);
+ else if (level > 0)
+ fprintf(stderr, "%s:%zu%s%s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "");
+ free(buf);
+}
+
+/*
+ * This reports an error---not a warning.
+ * However, it is not like errx(3) in that it does not exit.
+ */
+void
+rsync_errx(struct sess *sess, const char *fname,
+ size_t line, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ fprintf(stderr, "%s:%zu: error%s%s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "");
+ free(buf);
+}
+
+/*
+ * This reports an error---not a warning.
+ * However, it is not like err(3) in that it does not exit.
+ */
+void
+rsync_err(struct sess *sess, const char *fname,
+ size_t line, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+ int er = errno;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ fprintf(stderr, "%s:%zu: error%s%s: %s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "", strerror(er));
+ free(buf);
+}
+
+/*
+ * Prints a non-terminal error message, that is, when reporting on the
+ * chain of functions from which the actual warning occurred.
+ */
+void
+rsync_errx1(struct sess *sess, const char *fname,
+ size_t line, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ if (sess->opts->verbose < 1)
+ return;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ fprintf(stderr, "%s:%zu: error%s%s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "");
+ free(buf);
+}
+
+/*
+ * Prints a warning message.
+ */
+void
+rsync_warnx(struct sess *sess, const char *fname,
+ size_t line, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ fprintf(stderr, "%s:%zu: warning%s%s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "");
+ free(buf);
+}
+
+/*
+ * Prints a warning with an errno.
+ * It uses a level detector for when to inhibit printing.
+ */
+void
+rsync_warn(struct sess *sess, int level,
+ const char *fname, size_t line, const char *fmt, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+ int er = errno;
+
+ if (sess->opts->verbose < level)
+ return;
+
+ if (NULL != fmt) {
+ va_start(ap, fmt);
+ if (vasprintf(&buf, fmt, ap) < 0) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ fprintf(stderr, "%s:%zu: warning%s%s: %s\n", fname, line,
+ NULL != buf ? ": " : "",
+ NULL != buf ? buf : "", strerror(er));
+ free(buf);
+}
diff --git a/usr.bin/rsync/main.c b/usr.bin/rsync/main.c
new file mode 100644
index 00000000000..871922f2c15
--- /dev/null
+++ b/usr.bin/rsync/main.c
@@ -0,0 +1,453 @@
+/* $Id: main.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+#include <assert.h>
+#include <err.h>
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+static void
+fargs_free(struct fargs *p)
+{
+ size_t i;
+
+ if (NULL == p)
+ return;
+
+ if (NULL != p->sources)
+ for (i = 0; i < p->sourcesz; i++)
+ free(p->sources[i]);
+
+ free(p->sources);
+ free(p->sink);
+ free(p->host);
+ free(p);
+}
+
+/*
+ * A remote host is has a colon before the first path separator.
+ * This works for rsh remote hosts (host:/foo/bar), implicit rsync
+ * remote hosts (host::/foo/bar), and explicit (rsync://host/foo).
+ * Return zero if local, non-zero if remote.
+ */
+static int
+fargs_is_remote(const char *v)
+{
+ size_t pos;
+
+ pos = strcspn(v, ":/");
+ return ':' == v[pos];
+}
+
+/*
+ * Test whether a remote host is specifically an rsync daemon.
+ * Return zero if not, non-zero if so.
+ */
+static int
+fargs_is_daemon(const char *v)
+{
+ size_t pos;
+
+ if (0 == strncasecmp(v, "rsync://", 8))
+ return 1;
+
+ pos = strcspn(v, ":/");
+ return ':' == v[pos] && ':' == v[pos + 1];
+}
+
+/*
+ * Take the command-line filenames (e.g., rsync foo/ bar/ baz/) and
+ * determine our operating mode.
+ * For example, if the first argument is a remote file, this means that
+ * we're going to transfer from the remote to the local.
+ * We also make sure that the arguments are consistent, that is, if
+ * we're going to transfer from the local to the remote, that no
+ * filenames for the local transfer indicate remote hosts.
+ * Always returns the parsed and sanitised options.
+ */
+static struct fargs *
+fargs_parse(size_t argc, char *argv[])
+{
+ struct fargs *f = NULL;
+ char *cp;
+ size_t i, j, len = 0;
+
+ /* Allocations. */
+
+ if (NULL == (f = calloc(1, sizeof(struct fargs))))
+ err(EXIT_FAILURE, "calloc");
+
+ f->sourcesz = argc - 1;
+ if (NULL == (f->sources = calloc(f->sourcesz, sizeof(char *))))
+ err(EXIT_FAILURE, "calloc");
+
+ for (i = 0; i < argc - 1; i++)
+ if (NULL == (f->sources[i] = strdup(argv[i])))
+ err(EXIT_FAILURE, "strdup");
+
+ if (NULL == (f->sink = strdup(argv[i])))
+ err(EXIT_FAILURE, "strdup");
+
+ /*
+ * Test files for its locality.
+ * If the last is a remote host, then we're sending from the
+ * local to the remote host ("sender" mode).
+ * If the first, remote to local ("receiver" mode).
+ * If neither, a local transfer in sender style.
+ */
+
+ f->mode = FARGS_SENDER;
+
+ if (fargs_is_remote(f->sink)) {
+ f->mode = FARGS_SENDER;
+ if (NULL == (f->host = strdup(f->sink)))
+ err(EXIT_FAILURE, "strdup");
+ }
+
+ if (fargs_is_remote(f->sources[0])) {
+ if (NULL != f->host)
+ errx(EXIT_FAILURE, "both source and "
+ "destination cannot be remote files");
+ f->mode = FARGS_RECEIVER;
+ if (NULL == (f->host = strdup(f->sources[0])))
+ err(EXIT_FAILURE, "strdup");
+ }
+
+ if (NULL != f->host) {
+ if (0 == strncasecmp(f->host, "rsync://", 8)) {
+ /* rsync://host/module[/path] */
+ f->remote = 1;
+ len = strlen(f->host) - 8 + 1;
+ memmove(f->host, f->host + 8, len);
+ if (NULL == (cp = strchr(f->host, '/')))
+ errx(EXIT_FAILURE, "rsync protocol "
+ "requires a module name");
+ *cp++ = '\0';
+ f->module = cp;
+ if (NULL != (cp = strchr(f->module, '/')))
+ *cp = '\0';
+ } else {
+ /* host:[/path] */
+ cp = strchr(f->host, ':');
+ assert(NULL != cp);
+ *cp++ = '\0';
+ if (':' == *cp) {
+ /* host::module[/path] */
+ f->remote = 1;
+ f->module = ++cp;
+ cp = strchr(f->module, '/');
+ if (NULL != cp)
+ *cp = '\0';
+ }
+ }
+ if (0 == (len = strlen(f->host)))
+ errx(EXIT_FAILURE, "empty remote host");
+ if (f->remote && 0 == strlen(f->module))
+ errx(EXIT_FAILURE, "empty remote module");
+ }
+
+ /* Make sure we have the same "hostspec" for all files. */
+
+ if ( ! f->remote) {
+ if (FARGS_SENDER == f->mode)
+ for (i = 0; i < f->sourcesz; i++) {
+ if ( ! fargs_is_remote(f->sources[i]))
+ continue;
+ errx(EXIT_FAILURE, "remote file in "
+ "list of local sources: %s",
+ f->sources[i]);
+ }
+ if (FARGS_RECEIVER == f->mode)
+ for (i = 0; i < f->sourcesz; i++) {
+ if (fargs_is_remote(f->sources[i]) &&
+ ! fargs_is_daemon(f->sources[i]))
+ continue;
+ if (fargs_is_daemon(f->sources[i]))
+ errx(EXIT_FAILURE, "remote "
+ "daemon in list of "
+ "remote sources: %s",
+ f->sources[i]);
+ errx(EXIT_FAILURE, "local file in "
+ "list of remote sources: %s",
+ f->sources[i]);
+ }
+ } else {
+ if (FARGS_RECEIVER != f->mode)
+ errx(EXIT_FAILURE, "sender mode for remote "
+ "daemon receivers not yet supported");
+ for (i = 0; i < f->sourcesz; i++) {
+ if (fargs_is_daemon(f->sources[i]))
+ continue;
+ errx(EXIT_FAILURE, "non-remote daemon file "
+ "in list of remote daemon sources: "
+ "%s", f->sources[i]);
+ }
+ }
+
+ /*
+ * If we're not remote and a sender, strip our hostname.
+ * Then exit if we're a sender or a local connection.
+ */
+
+ if ( ! f->remote) {
+ if (NULL == f->host)
+ return f;
+ if (FARGS_SENDER == f->mode) {
+ assert(NULL != f->host);
+ assert(len > 0);
+ j = strlen(f->sink);
+ memmove(f->sink, f->sink + len + 1, j - len);
+ return f;
+ } else if (FARGS_RECEIVER != f->mode)
+ return f;
+ }
+
+ /*
+ * Now strip the hostnames from the remote host.
+ * rsync://host/module/path -> module/path
+ * host::module/path -> module/path
+ * host:path -> path
+ * Also make sure that the remote hosts are the same.
+ */
+
+ assert(NULL != f->host);
+ assert(len > 0);
+
+ for (i = 0; i < f->sourcesz; i++) {
+ cp = f->sources[i];
+ j = strlen(cp);
+ if (f->remote &&
+ 0 == strncasecmp(cp, "rsync://", 8)) {
+ /* rsync://path */
+ cp += 8;
+ if (strncmp(cp, f->host, len) ||
+ ('/' != cp[len] && '\0' != cp[len]))
+ errx(EXIT_FAILURE, "different remote "
+ "host: %s", f->sources[i]);
+ memmove(f->sources[i],
+ f->sources[i] + len + 8 + 1,
+ j - len - 8);
+ } else if (f->remote && 0 == strncmp(cp, "::", 2)) {
+ /* ::path */
+ memmove(f->sources[i],
+ f->sources[i] + 2, j - 1);
+ } else if (f->remote) {
+ /* host::path */
+ if (strncmp(cp, f->host, len) ||
+ (':' != cp[len] && '\0' != cp[len]))
+ errx(EXIT_FAILURE, "different remote "
+ "host: %s", f->sources[i]);
+ memmove(f->sources[i],
+ f->sources[i] + len + 2,
+ j - len - 1);
+ } else if (':' == cp[0]) {
+ /* :path */
+ memmove(f->sources[i], f->sources[i] + 1, j);
+ } else {
+ /* host:path */
+ if (strncmp(cp, f->host, len) ||
+ (':' != cp[len] && '\0' != cp[len]))
+ errx(EXIT_FAILURE, "different remote "
+ "host: %s", f->sources[i]);
+ memmove(f->sources[i],
+ f->sources[i] + len + 1, j - len);
+ }
+ }
+
+ return f;
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct opts opts;
+ pid_t child;
+ int fds[2], flags, c, st;
+ struct fargs *fargs;
+ struct option lopts[] = {
+ { "delete", no_argument, &opts.del, 1 },
+ { "rsync-path", required_argument, NULL, 1 },
+ { "sender", no_argument, &opts.sender, 1 },
+ { "server", no_argument, &opts.server, 1 },
+ { NULL, 0, NULL, 0 }};
+
+ /* Global pledge. */
+
+ if (-1 == pledge("dns inet unveil exec stdio rpath wpath cpath proc fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+
+ memset(&opts, 0, sizeof(struct opts));
+
+ for (;;) {
+ c = getopt_long(argc, argv, "e:lnprtv", lopts, NULL);
+ if (-1 == c)
+ break;
+ switch (c) {
+ case 'e':
+ /* Ignore. */
+ break;
+ case 'l':
+ opts.preserve_links = 1;
+ break;
+ case 'n':
+ opts.dry_run = 1;
+ break;
+ case 'p':
+ opts.preserve_perms = 1;
+ break;
+ case 'r':
+ opts.recursive = 1;
+ break;
+ case 't':
+ opts.preserve_times = 1;
+ break;
+ case 'v':
+ opts.verbose++;
+ break;
+ case 0:
+ /* Non-NULL flag values (e.g., --sender). */
+ break;
+ case 1:
+ opts.rsync_path = optarg;
+ break;
+ default:
+ goto usage;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* FIXME: reference implementation rsync accepts this. */
+
+ if (argc < 2)
+ goto usage;
+
+ /*
+ * This is what happens when we're started with the "hidden"
+ * --server option, which is invoked for the rsync on the remote
+ * host by the parent.
+ */
+
+ if (opts.server) {
+ if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+ c = rsync_server(&opts, (size_t)argc, argv);
+ return c ? EXIT_SUCCESS : EXIT_FAILURE;
+ }
+
+ /*
+ * Now we know that we're the client on the local machine
+ * invoking rsync(1).
+ * At this point, we need to start the client and server
+ * initiation logic.
+ * The client is what we continue running on this host; the
+ * server is what we'll use to connect to the remote and
+ * invoke rsync with the --server option.
+ */
+
+ fargs = fargs_parse(argc, argv);
+ assert(NULL != fargs);
+
+ /*
+ * If we're contacting an rsync:// daemon, then we don't need to
+ * fork, because we won't start a server ourselves.
+ * Route directly into the socket code, in that case.
+ */
+
+ if (fargs->remote) {
+ assert(FARGS_RECEIVER == fargs->mode);
+ if (-1 == pledge("dns inet unveil stdio rpath wpath cpath fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+ c = rsync_socket(&opts, fargs);
+ fargs_free(fargs);
+ return c ? EXIT_SUCCESS : EXIT_FAILURE;
+ }
+
+ /* Drop the dns/inet possibility. */
+
+ if (-1 == pledge("unveil exec stdio rpath wpath cpath proc fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+
+ /* Create a bidirectional socket and start our child. */
+
+ flags = SOCK_STREAM | SOCK_NONBLOCK;
+
+ if (-1 == socketpair(AF_UNIX, flags, 0, fds))
+ err(EXIT_FAILURE, "socketpair");
+
+ if (-1 == (child = fork())) {
+ close(fds[0]);
+ close(fds[1]);
+ err(EXIT_FAILURE, "fork");
+ }
+
+ /* Drop the fork possibility. */
+
+ if (-1 == pledge("unveil exec stdio rpath wpath cpath fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+
+ if (0 == child) {
+ close(fds[0]);
+ fds[0] = -1;
+ if (-1 == pledge("exec stdio", NULL))
+ err(EXIT_FAILURE, "pledge");
+ rsync_child(&opts, fds[1], fargs);
+ /* NOTREACHED */
+ }
+
+ close(fds[1]);
+ fds[1] = -1;
+ if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL))
+ err(EXIT_FAILURE, "pledge");
+ c = rsync_client(&opts, fds[0], fargs);
+ fargs_free(fargs);
+
+ /*
+ * If the client has an error and exits, the server may be
+ * sitting around waiting to get data while we waitpid().
+ * So close the connection here so that they don't hang.
+ */
+
+ if ( ! c) {
+ close(fds[0]);
+ fds[0] = -1;
+ }
+
+ if (-1 == waitpid(child, &st, 0))
+ err(EXIT_FAILURE, "waitpid");
+ if ( ! (WIFEXITED(st) && EXIT_SUCCESS == WEXITSTATUS(st)))
+ c = 0;
+
+ if (-1 != fds[0])
+ close(fds[0]);
+ return c ? EXIT_SUCCESS : EXIT_FAILURE;
+usage:
+ fprintf(stderr, "usage: %s [-lnprtv] "
+ "[--delete] [--rsync-path=prog] src ... dst\n",
+ getprogname());
+ return EXIT_FAILURE;
+}
diff --git a/usr.bin/rsync/md4.c b/usr.bin/rsync/md4.c
new file mode 100644
index 00000000000..3641f03ff64
--- /dev/null
+++ b/usr.bin/rsync/md4.c
@@ -0,0 +1,265 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD4 Message-Digest Algorithm (RFC 1320).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md4
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001. No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * (This is a heavily cut-down "BSD license".)
+ *
+ * This differs from Colin Plumb's older public domain implementation in that
+ * no exactly 32-bit integer data type is required (any 32-bit or wider
+ * unsigned integer data type will do), there's no compile-time endianness
+ * configuration, and the function prototypes match OpenSSL's. No code from
+ * Colin Plumb's implementation has been reused; this comment merely compares
+ * the properties of the two independent implementations.
+ *
+ * The primary goals of this implementation are portability and ease of use.
+ * It is meant to be fast, but not as fast as possible. Some known
+ * optimizations are not included to reduce source code size and avoid
+ * compile-time configuration.
+ */
+#include <string.h>
+
+#include "md4.h"
+
+/*
+ * The basic MD4 functions.
+ *
+ * F and G are optimized compared to their RFC 1320 definitions, with the
+ * optimization for F borrowed from Colin Plumb's MD5 implementation.
+ */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) (((x) & ((y) | (z))) | ((y) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/*
+ * The MD4 transformation for all three rounds.
+ */
+#define STEP(f, a, b, c, d, x, s) \
+ (a) += f((b), (c), (d)) + (x); \
+ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s))));
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them in a
+ * properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned memory
+ * accesses is just an optimization. Nothing will break if it fails to detect
+ * a suitable architecture.
+ *
+ * Unfortunately, this optimization may be a C strict aliasing rules violation
+ * if the caller's data buffer has effective type that cannot be aliased by
+ * MD4_u32plus. In practice, this problem may occur if these MD4 routines are
+ * inlined into a calling function, or with future and dangerously advanced
+ * link-time optimizations. For the time being, keeping these MD4 routines in
+ * their own translation unit avoids the problem.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+ (*(MD4_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+ SET(n)
+#else
+#define SET(n) \
+ (ctx->block[(n)] = \
+ (MD4_u32plus)ptr[(n) * 4] | \
+ ((MD4_u32plus)ptr[(n) * 4 + 1] << 8) | \
+ ((MD4_u32plus)ptr[(n) * 4 + 2] << 16) | \
+ ((MD4_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+ (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update the bit
+ * counters. There are no alignment requirements.
+ */
+static const void *body(MD4_CTX *ctx, const void *data, unsigned long size)
+{
+ const unsigned char *ptr;
+ MD4_u32plus a, b, c, d;
+ MD4_u32plus saved_a, saved_b, saved_c, saved_d;
+ const MD4_u32plus ac1 = 0x5a827999, ac2 = 0x6ed9eba1;
+
+ ptr = (const unsigned char *)data;
+
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+ d = ctx->d;
+
+ do {
+ saved_a = a;
+ saved_b = b;
+ saved_c = c;
+ saved_d = d;
+
+/* Round 1 */
+ STEP(F, a, b, c, d, SET(0), 3)
+ STEP(F, d, a, b, c, SET(1), 7)
+ STEP(F, c, d, a, b, SET(2), 11)
+ STEP(F, b, c, d, a, SET(3), 19)
+ STEP(F, a, b, c, d, SET(4), 3)
+ STEP(F, d, a, b, c, SET(5), 7)
+ STEP(F, c, d, a, b, SET(6), 11)
+ STEP(F, b, c, d, a, SET(7), 19)
+ STEP(F, a, b, c, d, SET(8), 3)
+ STEP(F, d, a, b, c, SET(9), 7)
+ STEP(F, c, d, a, b, SET(10), 11)
+ STEP(F, b, c, d, a, SET(11), 19)
+ STEP(F, a, b, c, d, SET(12), 3)
+ STEP(F, d, a, b, c, SET(13), 7)
+ STEP(F, c, d, a, b, SET(14), 11)
+ STEP(F, b, c, d, a, SET(15), 19)
+
+/* Round 2 */
+ STEP(G, a, b, c, d, GET(0) + ac1, 3)
+ STEP(G, d, a, b, c, GET(4) + ac1, 5)
+ STEP(G, c, d, a, b, GET(8) + ac1, 9)
+ STEP(G, b, c, d, a, GET(12) + ac1, 13)
+ STEP(G, a, b, c, d, GET(1) + ac1, 3)
+ STEP(G, d, a, b, c, GET(5) + ac1, 5)
+ STEP(G, c, d, a, b, GET(9) + ac1, 9)
+ STEP(G, b, c, d, a, GET(13) + ac1, 13)
+ STEP(G, a, b, c, d, GET(2) + ac1, 3)
+ STEP(G, d, a, b, c, GET(6) + ac1, 5)
+ STEP(G, c, d, a, b, GET(10) + ac1, 9)
+ STEP(G, b, c, d, a, GET(14) + ac1, 13)
+ STEP(G, a, b, c, d, GET(3) + ac1, 3)
+ STEP(G, d, a, b, c, GET(7) + ac1, 5)
+ STEP(G, c, d, a, b, GET(11) + ac1, 9)
+ STEP(G, b, c, d, a, GET(15) + ac1, 13)
+
+/* Round 3 */
+ STEP(H, a, b, c, d, GET(0) + ac2, 3)
+ STEP(H, d, a, b, c, GET(8) + ac2, 9)
+ STEP(H, c, d, a, b, GET(4) + ac2, 11)
+ STEP(H, b, c, d, a, GET(12) + ac2, 15)
+ STEP(H, a, b, c, d, GET(2) + ac2, 3)
+ STEP(H, d, a, b, c, GET(10) + ac2, 9)
+ STEP(H, c, d, a, b, GET(6) + ac2, 11)
+ STEP(H, b, c, d, a, GET(14) + ac2, 15)
+ STEP(H, a, b, c, d, GET(1) + ac2, 3)
+ STEP(H, d, a, b, c, GET(9) + ac2, 9)
+ STEP(H, c, d, a, b, GET(5) + ac2, 11)
+ STEP(H, b, c, d, a, GET(13) + ac2, 15)
+ STEP(H, a, b, c, d, GET(3) + ac2, 3)
+ STEP(H, d, a, b, c, GET(11) + ac2, 9)
+ STEP(H, c, d, a, b, GET(7) + ac2, 11)
+ STEP(H, b, c, d, a, GET(15) + ac2, 15)
+
+ a += saved_a;
+ b += saved_b;
+ c += saved_c;
+ d += saved_d;
+
+ ptr += 64;
+ } while (size -= 64);
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+ ctx->d = d;
+
+ return ptr;
+}
+
+void MD4_Init(MD4_CTX *ctx)
+{
+ ctx->a = 0x67452301;
+ ctx->b = 0xefcdab89;
+ ctx->c = 0x98badcfe;
+ ctx->d = 0x10325476;
+
+ ctx->lo = 0;
+ ctx->hi = 0;
+}
+
+void MD4_Update(MD4_CTX *ctx, const void *data, unsigned long size)
+{
+ MD4_u32plus saved_lo;
+ unsigned long used, available;
+
+ saved_lo = ctx->lo;
+ if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+ ctx->hi++;
+ ctx->hi += size >> 29;
+
+ used = saved_lo & 0x3f;
+
+ if (used) {
+ available = 64 - used;
+
+ if (size < available) {
+ memcpy(&ctx->buffer[used], data, size);
+ return;
+ }
+
+ memcpy(&ctx->buffer[used], data, available);
+ data = (const unsigned char *)data + available;
+ size -= available;
+ body(ctx, ctx->buffer, 64);
+ }
+
+ if (size >= 64) {
+ data = body(ctx, data, size & ~(unsigned long)0x3f);
+ size &= 0x3f;
+ }
+
+ memcpy(ctx->buffer, data, size);
+}
+
+#define OUT(dst, src) \
+ (dst)[0] = (unsigned char)(src); \
+ (dst)[1] = (unsigned char)((src) >> 8); \
+ (dst)[2] = (unsigned char)((src) >> 16); \
+ (dst)[3] = (unsigned char)((src) >> 24);
+
+void MD4_Final(unsigned char *result, MD4_CTX *ctx)
+{
+ unsigned long used, available;
+
+ used = ctx->lo & 0x3f;
+
+ ctx->buffer[used++] = 0x80;
+
+ available = 64 - used;
+
+ if (available < 8) {
+ memset(&ctx->buffer[used], 0, available);
+ body(ctx, ctx->buffer, 64);
+ used = 0;
+ available = 64;
+ }
+
+ memset(&ctx->buffer[used], 0, available - 8);
+
+ ctx->lo <<= 3;
+ OUT(&ctx->buffer[56], ctx->lo)
+ OUT(&ctx->buffer[60], ctx->hi)
+
+ body(ctx, ctx->buffer, 64);
+
+ OUT(&result[0], ctx->a)
+ OUT(&result[4], ctx->b)
+ OUT(&result[8], ctx->c)
+ OUT(&result[12], ctx->d)
+
+ memset(ctx, 0, sizeof(*ctx));
+}
diff --git a/usr.bin/rsync/md4.h b/usr.bin/rsync/md4.h
new file mode 100644
index 00000000000..8a6a0021ca7
--- /dev/null
+++ b/usr.bin/rsync/md4.h
@@ -0,0 +1,49 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
+ * MD4 Message-Digest Algorithm (RFC 1320).
+ *
+ * Homepage:
+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md4
+ *
+ * Author:
+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
+ *
+ * This software was written by Alexander Peslyak in 2001. No copyright is
+ * claimed, and the software is hereby placed in the public domain.
+ * In case this attempt to disclaim copyright and place the software in the
+ * public domain is deemed null and void, then the software is
+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * See md4.c for more information.
+ */
+
+#ifndef MD4_H
+#define MD4_H
+
+#define MD4_DIGEST_LENGTH 16
+
+/* Any 32-bit or wider unsigned integer data type will do */
+typedef unsigned int MD4_u32plus;
+
+typedef struct {
+ MD4_u32plus lo, hi;
+ MD4_u32plus a, b, c, d;
+ unsigned char buffer[64];
+ MD4_u32plus block[16];
+} MD4_CTX;
+
+__BEGIN_DECLS
+
+extern void MD4_Init(MD4_CTX *ctx);
+extern void MD4_Update(MD4_CTX *ctx, const void *data, unsigned long size);
+extern void MD4_Final(unsigned char *result, MD4_CTX *ctx);
+
+__END_DECLS
+
+#endif
diff --git a/usr.bin/rsync/mkpath.c b/usr.bin/rsync/mkpath.c
new file mode 100644
index 00000000000..8dc44e544b5
--- /dev/null
+++ b/usr.bin/rsync/mkpath.c
@@ -0,0 +1,77 @@
+/* $OpenBSD: mkpath.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 1983, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "extern.h"
+
+/* Code taken directly from mkdir(1).
+
+ * mkpath -- create directories.
+ * path - path
+ */
+int
+mkpath(struct sess *sess, char *path)
+{
+ struct stat sb;
+ char *slash;
+ int done = 0;
+
+ slash = path;
+
+ while (!done) {
+ slash += strspn(slash, "/");
+ slash += strcspn(slash, "/");
+
+ done = (*slash == '\0');
+ *slash = '\0';
+
+ if (stat(path, &sb)) {
+ if (errno != ENOENT || (mkdir(path, 0777) &&
+ errno != EEXIST)) {
+ ERR(sess, "%s: stat", path);
+ return (-1);
+ }
+ } else if (!S_ISDIR(sb.st_mode)) {
+ errno = ENOTDIR;
+ ERR(sess, "%s: stat", path);
+ return (-1);
+ }
+
+ *slash = '/';
+ }
+
+ return (0);
+}
+
diff --git a/usr.bin/rsync/receiver.c b/usr.bin/rsync/receiver.c
new file mode 100644
index 00000000000..8bc4779274e
--- /dev/null
+++ b/usr.bin/rsync/receiver.c
@@ -0,0 +1,341 @@
+/* $Id: receiver.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+enum pfdt {
+ PFD_SENDER_IN = 0, /* input from the sender */
+ PFD_UPLOADER_IN, /* uploader input from a local file */
+ PFD_DOWNLOADER_IN, /* downloader input from a local file */
+ PFD_SENDER_OUT, /* output to the sender */
+ PFD__MAX
+};
+
+/*
+ * Pledges: unveil, rpath, cpath, wpath, stdio, fattr.
+ * Pledges (dry-run): -cpath, -wpath, -fattr.
+ */
+int
+rsync_receiver(struct sess *sess,
+ int fdin, int fdout, const char *root)
+{
+ struct flist *fl = NULL, *dfl = NULL;
+ size_t i, flsz = 0, dflsz = 0, excl;
+ char *tofree;
+ int rc = 0, dfd = -1, phase = 0, c;
+ int32_t ioerror;
+ struct pollfd pfd[PFD__MAX];
+ struct download *dl = NULL;
+ struct upload *ul = NULL;
+ mode_t oumask;
+
+ if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) {
+ ERR(sess, "pledge");
+ goto out;
+ }
+
+ /* Client sends zero-length exclusions. */
+
+ if ( ! sess->opts->server &&
+ ! io_write_int(sess, fdout, 0)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ if (sess->opts->server && sess->opts->del) {
+ if ( ! io_read_size(sess, fdin, &excl)) {
+ ERRX1(sess, "io_read_size");
+ goto out;
+ } else if (0 != excl) {
+ ERRX(sess, "exclusion list is non-empty");
+ goto out;
+ }
+ }
+
+ /*
+ * Start by receiving the file list and our mystery number.
+ * These we're going to be touching on our local system.
+ */
+
+ if ( ! flist_recv(sess, fdin, &fl, &flsz)) {
+ ERRX1(sess, "flist_recv");
+ goto out;
+ }
+
+ /* The IO error is sent after the file list. */
+
+ if ( ! io_read_int(sess, fdin, &ioerror)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if (0 != ioerror) {
+ ERRX1(sess, "io_error is non-zero");
+ goto out;
+ }
+
+ if (0 == flsz && ! sess->opts->server) {
+ WARNX(sess, "receiver has empty file list: exiting");
+ rc = 1;
+ goto out;
+ } else if ( ! sess->opts->server)
+ LOG1(sess, "Transfer starting: %zu files", flsz);
+
+ LOG2(sess, "%s: receiver destination", root);
+
+ /*
+ * Create the path for our destination directory, if we're not
+ * in dry-run mode (which would otherwise crash w/the pledge).
+ * This uses our current umask: we might set the permissions on
+ * this directory in post_dir().
+ */
+
+ if ( ! sess->opts->dry_run) {
+ if (NULL == (tofree = strdup(root))) {
+ ERR(sess, "strdup");
+ goto out;
+ } else if (mkpath(sess, tofree) < 0) {
+ ERRX1(sess, "%s: mkpath", root);
+ free(tofree);
+ goto out;
+ }
+ free(tofree);
+ }
+
+ /*
+ * Disable umask() so we can set permissions fully.
+ * Then open the directory iff we're not in dry_run.
+ */
+
+ oumask = umask(0);
+
+ if ( ! sess->opts->dry_run) {
+ dfd = open(root, O_RDONLY | O_DIRECTORY, 0);
+ if (-1 == dfd) {
+ ERR(sess, "%s: open", root);
+ goto out;
+ }
+ }
+
+ /*
+ * Begin by conditionally getting all files we have currently
+ * available in our destination.
+ * XXX: THIS IS A BUG IN OPENBSD 6.4.
+ * For newer version of OpenBSD, this is safe to put after the
+ * unveil.
+ */
+
+ if (sess->opts->del &&
+ sess->opts->recursive &&
+ ! flist_gen_dels(sess, root, &dfl, &dflsz, fl, flsz)) {
+ ERRX1(sess, "flist_gen_local");
+ goto out;
+ }
+
+ /*
+ * Make our entire view of the file-system be limited to what's
+ * in the root directory.
+ * This prevents us from accidentally (or "under the influence")
+ * writing into other parts of the file-system.
+ */
+
+ if (-1 == unveil(root, "rwc")) {
+ ERR(sess, "%s: unveil", root);
+ goto out;
+ } else if (-1 == unveil(NULL, NULL)) {
+ ERR(sess, "%s: unveil", root);
+ goto out;
+ }
+
+ /* If we have a local set, go for the deletion. */
+
+ if ( ! flist_del(sess, dfd, dfl, dflsz)) {
+ ERRX1(sess, "flist_del");
+ goto out;
+ }
+
+ /* Initialise poll events to listen from the sender. */
+
+ pfd[PFD_SENDER_IN].fd = fdin;
+ pfd[PFD_UPLOADER_IN].fd = -1;
+ pfd[PFD_DOWNLOADER_IN].fd = -1;
+ pfd[PFD_SENDER_OUT].fd = fdout;
+
+ pfd[PFD_SENDER_IN].events = POLLIN;
+ pfd[PFD_UPLOADER_IN].events = POLLIN;
+ pfd[PFD_DOWNLOADER_IN].events = POLLIN;
+ pfd[PFD_SENDER_OUT].events = POLLOUT;
+
+ ul = upload_alloc(sess, dfd, fdout,
+ CSUM_LENGTH_PHASE1, fl, flsz, oumask);
+ if (NULL == ul) {
+ ERRX1(sess, "upload_alloc");
+ goto out;
+ }
+
+ dl = download_alloc(sess, fdin, fl, flsz, dfd);
+ if (NULL == dl) {
+ ERRX1(sess, "download_alloc");
+ goto out;
+ }
+
+ LOG2(sess, "%s: ready for phase 1 data", root);
+
+ for (;;) {
+ if (-1 == (c = poll(pfd, PFD__MAX, INFTIM))) {
+ ERR(sess, "poll");
+ goto out;
+ }
+
+ for (i = 0; i < PFD__MAX; i++)
+ if (pfd[i].revents & (POLLERR|POLLNVAL)) {
+ ERRX(sess, "poll: bad fd");
+ goto out;
+ } else if (pfd[i].revents & POLLHUP) {
+ ERRX(sess, "poll: hangup");
+ goto out;
+ }
+
+ /*
+ * If we have a read event and we're multiplexing, we
+ * might just have error messages in the pipe.
+ * It's important to flush these out so that we don't
+ * clog the pipe.
+ * Unset our polling status if there's nothing that
+ * remains in the pipe.
+ */
+
+ if (sess->mplex_reads &&
+ (POLLIN & pfd[PFD_SENDER_IN].revents)) {
+ if ( ! io_read_flush(sess, fdin)) {
+ ERRX1(sess, "io_read_flush");
+ goto out;
+ } else if (0 == sess->mplex_read_remain)
+ pfd[PFD_SENDER_IN].revents &= ~POLLIN;
+ }
+
+
+ /*
+ * We run the uploader if we have files left to examine
+ * (i < flsz) or if we have a file that we've opened and
+ * is read to mmap.
+ */
+
+ if ((POLLIN & pfd[PFD_UPLOADER_IN].revents) ||
+ (POLLOUT & pfd[PFD_SENDER_OUT].revents)) {
+ c = rsync_uploader(ul,
+ &pfd[PFD_UPLOADER_IN].fd,
+ sess, &pfd[PFD_SENDER_OUT].fd);
+ if (c < 0) {
+ ERRX1(sess, "rsync_uploader");
+ goto out;
+ }
+ }
+
+ /*
+ * We need to run the downloader when we either have
+ * read events from the sender or an asynchronous local
+ * open is ready.
+ * XXX: we don't disable PFD_SENDER_IN like with the
+ * uploader because we might stop getting error
+ * messages, which will otherwise clog up the pipes.
+ */
+
+ if ((POLLIN & pfd[PFD_SENDER_IN].revents) ||
+ (POLLIN & pfd[PFD_DOWNLOADER_IN].revents)) {
+ c = rsync_downloader(dl, sess,
+ &pfd[PFD_DOWNLOADER_IN].fd);
+ if (c < 0) {
+ ERRX1(sess, "rsync_downloader");
+ goto out;
+ } else if (0 == c) {
+ assert(0 == phase);
+ phase++;
+ LOG2(sess, "%s: receiver ready "
+ "for phase 2 data", root);
+ break;
+ }
+
+ /*
+ * FIXME: if we have any errors during the
+ * download, most notably files getting out of
+ * sync between the send and the receiver, then
+ * here we should bump our checksum length and
+ * go into the second phase.
+ */
+ }
+ }
+
+ /* Properly close us out by progressing through the phases. */
+
+ if (1 == phase) {
+ if ( ! io_write_int(sess, fdout, -1)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ } else if ( ! io_read_int(sess, fdin, &ioerror)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if (-1 != ioerror) {
+ ERRX(sess, "expected phase ack");
+ goto out;
+ }
+ }
+
+ /*
+ * Now all of our transfers are complete, so we can fix up our
+ * directory permissions.
+ */
+
+ if ( ! rsync_uploader_tail(ul, sess)) {
+ ERRX1(sess, "rsync_uploader_tail");
+ goto out;
+ }
+
+ /* Process server statistics and say good-bye. */
+
+ if ( ! sess_stats_recv(sess, fdin)) {
+ ERRX1(sess, "sess_stats_recv");
+ goto out;
+ } else if ( ! io_write_int(sess, fdout, -1)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ LOG2(sess, "receiver finished updating");
+ rc = 1;
+out:
+ if (-1 != dfd)
+ close(dfd);
+ upload_free(ul);
+ download_free(dl);
+ flist_free(fl, flsz);
+ flist_free(dfl, dflsz);
+ return rc;
+}
diff --git a/usr.bin/rsync/rsync.1 b/usr.bin/rsync/rsync.1
new file mode 100644
index 00000000000..1a727ec776e
--- /dev/null
+++ b/usr.bin/rsync/rsync.1
@@ -0,0 +1,213 @@
+.\" $OpenBSD: rsync.1,v 1.1 2019/02/10 23:18:28 benno Exp $
+.\"
+.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: February 10 2019 $
+.Dt RSYNC 1
+.Os
+.Sh NAME
+.Nm rsync
+.Nd synchronise local and remote files
+.Sh SYNOPSIS
+.Nm rsync
+.Op Fl lnprtv
+.Op Fl -delete
+.Op Fl -rsync-path Ar prog
+.Ar source ...
+.Ar directory
+.Sh DESCRIPTION
+The
+.Nm
+utility synchronises files in the destination
+.Ar directory
+with one or more
+.Ar source
+files.
+Either the
+.Ar source
+or the destination
+.Ar directory
+may be remote,
+but not both.
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl l
+Transfer symbolic links.
+The link is transferred as a standalone file: if the destination does
+not exist, it will be broken.
+.It Fl n
+Dry-run mode.
+Does not actually modify the destination.
+.It Fl p
+Set destination file or directory permissions to match the source when
+it is updated.
+.It Fl r
+If
+.Ar source
+designates a directory, synchronise the directory and the entire subtree
+connected at that point.
+If
+.Ar source
+ends with a slash, only the subtree is synchronised, not the root
+directory.
+If
+.Ar source
+is a file, this has no effect.
+.It Fl t
+Set destination file and directory modification time to match the source
+when it is updated or created.
+.It Fl v
+Increase verbosity.
+Specify once for files being transferred, twice for specific status,
+thrice for per-file transfer information, and four times for per-file
+breakdowns.
+.It Fl -delete
+Delete files in
+.Ar directory
+not found in
+.Ar source
+directories.
+Only applicable with
+.Fl r .
+.It Fl -rsync-path Ar prog
+Run
+.Ar prog
+on the remote host instead of the default
+.Ar rsync .
+.El
+.Pp
+A remote
+.Ar source
+or
+.Ar directory
+has syntax
+.Ar host:path
+for connecting via
+.Xr ssh 1 ,
+or
+.Ar rsync://host/path
+or
+.Ar host::path
+for connecting to a remote daemon.
+Subsequent to the first remote
+.Ar source ,
+the host may be dropped to become just
+.Ar :path
+or
+.Ar ::path .
+.Pp
+For connecting to a remote daemon with
+.Ar rsync://host
+or
+.Ar host::path ,
+the first path component is interpreted as a
+.Qq module :
+.Ar host::module/path .
+This only applies to the first
+.Ar source
+invocation; subsequent to that, the module should not be specified.
+.Pp
+By default, new destination files and directories are given the current
+time and the source file permissions.
+Updated files retain their existing permissions.
+It is an error if updated files have their file types change (e.g.,
+updating a directory with a file).
+.Pp
+At this time,
+.Ar source
+may only consist of regular files, directories
+.Pq only with Fl r ,
+or symbolic links
+.Pq only with Fl l .
+The destination
+.Ar directory
+must be a directory and is created if not found.
+.Pp
+.Nm
+is compatible with the GPL-licensed
+.Xr rsync 1
+protocol version 27.
+.\" The following requests should be uncommented and used where appropriate.
+.\" .Sh CONTEXT
+.\" For section 9 functions only.
+.\" .Sh RETURN VALUES
+.\" For sections 2, 3, and 9 function return values only.
+.\" .Sh ENVIRONMENT
+.\" For sections 1, 6, 7, and 8 only.
+.\" .Sh FILES
+.\" .Sh EXIT STATUS
+.\" For sections 1, 6, and 8 only.
+.Sh EXAMPLES
+All examples use
+.Fl t
+so that destination files inherit the source time.
+If not changed, subsequent invocations of
+.Nm
+will then consider the file up to date and not transfer block hashes.
+.Pp
+To update the out-of-date remote files
+.Pa host:dest/bar
+and
+.Pa host:dest/baz
+with the local
+.Pa ../src/bar
+and
+.Pa ../src/baz :
+.Pp
+.Dl % rsync -t ../src/bar ../src/baz host:dest
+.Pp
+To update the out-of-date local files
+.Pa bar
+and
+.Pa baz
+with the remote files
+.Pa host:src/bar
+and
+.Pa host:src/baz :
+.Pp
+.Dl % rsync -t host:src/bar :src/baz \&.
+.Pp
+To update the out-of-date local files
+.Pa ../dest/bar
+and
+.Pa ../dest/baz
+with
+.Pa bar
+and
+.Pa baz :
+.Pp
+.Dl % rsync -t bar baz ../dest
+.Pp
+To update the out-of-date remote files in
+.Pa host:dest
+on a remote host running
+.Nm
+with the local host running
+.Xr rsync 1 :
+.Pp
+.Dl % rsync --rsync-path rsync -t ../dest/* host:dest
+.\" .Sh DIAGNOSTICS
+.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only.
+.\" .Sh ERRORS
+.\" For sections 2, 3, 4, and 9 errno settings only.
+.Sh SEE ALSO
+.Xr ssh 1 ,
+.Xr rsync 5 ,
+.Xr rsyncd 5
+.\" .Sh STANDARDS
+.\" .Sh HISTORY
+.\" .Sh AUTHORS
+.\" .Sh CAVEATS
+.\" .Sh BUGS
diff --git a/usr.bin/rsync/rsync.5 b/usr.bin/rsync/rsync.5
new file mode 100644
index 00000000000..5c56bc25528
--- /dev/null
+++ b/usr.bin/rsync/rsync.5
@@ -0,0 +1,469 @@
+.\" $OpenBSD: rsync.5,v 1.1 2019/02/10 23:18:28 benno Exp $
+.\"
+.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: February 10 2019 $
+.Dt RSYNC 5
+.Os
+.Sh NAME
+.Nm rsync
+.Nd rsync wire protocol
+.Sh DESCRIPTION
+The
+.Nm
+protocol described in this relates to the BSD-licensed
+.Xr openrsync 1 ,
+a re-implementation of the GPL-licensed reference utility
+.Xr rsync 1 .
+It is compatible with version 27 of the reference.
+.Pp
+In this document, the
+.Qq client process
+refers to the utility as run on the operator's local computer.
+The
+.Qq server process
+is run either on the local or remote computer, depending upon the
+command-line given file locations.
+.Pp
+There are a number of options in the protocol that are dictated by command-line
+flags.
+These will be noted as
+.Fl n
+for dry-run,
+.Fl l
+for links,
+.Fl r
+for recursion,
+.Fl v
+for verbose, and
+.Fl -delete
+for deletion (before).
+.Ss Data types
+The binary protocol encodes all data in little-endian format.
+Integers are signed 32-bit, shorts are signed 16-bit, bytes are unsigned
+8-bit.
+A long is variable-length.
+For values less than the maximum integer, the value is transmitted and
+read as a 32-bit integer.
+For values greater, the value is transmitted first as a maximum integer,
+then a 64-bit signed integer.
+.Pp
+There are three types of checksums: long (slow), short (fast), and
+whole-file.
+The fast checksum is a derivative of Adler-32.
+The slow checksum is MD4,
+made over the checksum seed first (serialised in little-endian format),
+then the data.
+The whole-file applies MD4 to the file first, then the checksum seed at
+the end (also serialised in little-endian format).
+.Ss Multiplexing
+Most
+.Nm
+transmissions are wrapped in a multiplexing envelope protocol.
+It is composed as follows:
+.Pp
+.Bl -enum -compact
+.It
+envelope header (4 bytes)
+.It
+envelope payload (arbitrary length)
+.El
+.Pp
+The first byte of the envelope header consists of a tag.
+If the tag is 7, the payload is normal data.
+Otherwise, the payload is out-of-band server messages.
+If the tag is 1, it is an error on the sender's part and must trigger an
+exit.
+This limits message payloads to 24 bit integer size,
+.Li 0x0fffffff .
+.Pp
+The only data not using this envelope are the initial handshake between
+client and server.
+.Ss File list
+A central part of the protocol is the file list, which is generated by
+the sender.
+It consists of all files that must be sent to the receiver, either
+explicitly as given or recursively generated.
+.Pp
+The file list itself consists of filenames and attributes (mode, time,
+size, etc.).
+Filenames must be relative to the destination root and not be absolute
+or contain backtracking.
+So if a file is given to the sender as
+.Pa ../../foo/bar ,
+it must be sent as
+.Pa foo/bar .
+.Pp
+The file list should be cleaned of inappropriate files prior to sending.
+For example, if
+.Fl l
+is not specified, symbolic links may be omitted.
+Directory entries without
+.Fl r
+may also be omitted.
+Duplicates may be omitted.
+.Pp
+The receiver
+.Em must not
+assume that the file list is clean.
+It should not omit inappropriate files from the file list (which would
+affect the indexing), but may omit them during processing.
+.Pp
+Prior to be sent from sender to receiver, and upon being received, the
+file list must be lexicographically sorted such as with
+.Xr strcmp 3 .
+Subsequent references to the file are by index in the sorted list.
+.Ss Client process
+The client can operate in sender or receiver mode depending upon the
+command-line source and destination.
+.Pp
+If the destination directory (sink) is remote, the client is in sender
+mode: the client will push its data to the server.
+If the source file is remote, it is in receiver mode: the server pushes
+to the client.
+If neither are remote, the client operates in sender mode.
+These are all mutually exclusive.
+.Pp
+When the client starts, regardless its mode, it first handshakes the
+server.
+This exchange is
+.Em not
+multiplexed.
+.Pp
+.Bl -enum -compact
+.It
+send local version (integer)
+.It
+receive remote version (integer)
+.It
+receive random seed (integer)
+.El
+.Pp
+Following this, the client multiplexes when reading from the server.
+Transmissions sent from client to server are not multiplexed.
+It then enters the
+.Sx Update exchange
+protocol.
+.Ss Server process
+The server can operate in sender or receiver mode depending upon how the
+client starts the server.
+This may be directly from the parent process (when invoked for local
+files) or indirectly via a remote shell.
+.Pp
+When in sender mode, the server pushes data to the client.
+(This is equivalent to receiver mode for the client.)
+In receiver, the opposite is true.
+.Pp
+When the server starts, regardless the mode, it first handshakes the
+client.
+This exchange is
+.Em not
+multiplexed.
+.Pp
+.Bl -enum -compact
+.It
+send local version (integer)
+.It
+receive remote version (integer)
+.It
+send random seed (integer)
+.El
+.Pp
+Following this, the server multiplexes when writing to the client.
+(Transmissions received from the client are not multiplexed.)
+It then enters the
+.Sx Update exchange
+protocol.
+.Ss Update exchange
+When the client or server is in sender mode, it begins by conditionally
+sending the exclusion list.
+At this time, this is always empty.
+.Pp
+.Bl -enum -compact
+.It
+if
+.Fl -delete
+and the client, exclusion list zero (integer)
+.El
+.Pp
+It then sends the
+.Sx File list .
+Prior to being sent, the file list should be lexicographically sorted.
+.Pp
+.Bl -enum -compact
+.It
+status byte (integer)
+.It
+inherited filename length (optional, byte)
+.It
+filename length (integer or byte)
+.It
+file (byte array)
+.It
+file length (long)
+.It
+file modification time (optional, time_t, integer)
+.It
+file mode (optional, mode_t, integer)
+.It
+if a symbolic link and
+.Fl l ,
+the link target's length (integer)
+.It
+if a symbolic link and
+.Fl l ,
+the link target (byte array)
+.El
+.Pp
+The status byte may consist of the following bits and determines which
+of the optional fields are transmitted.
+.Pp
+.Bl -tag -compact -width Ds
+.It 0x02
+Do not send the file mode: it is a repeat of the last file's mode.
+.It 0x20
+Inherit some of the prior file name.
+Enables the inherited filename length transmission.
+.It 0x40
+Use full integer length for file name.
+Otherwise, use only the byte length.
+.It 0x80
+Do not send the file modification time: it is a repeat of the last
+file's.
+.El
+.Pp
+If the status byte is zero, the file-list has terminated.
+The sender then sends any IO error values, which for
+.Xr openrsync 1
+is always zero.
+.Pp
+.Bl -enum -compact
+.It
+constant zero (integer)
+.El
+.Pp
+The server sender then reads the exclusion list, which is always zero.
+.Pp
+.Bl -enum -compact
+.It
+if server, constant zero (integer)
+.El
+.Pp
+Following that, the sender receives data regarding the receiver's copy
+of the file list contents.
+This data is not ordered in any way.
+Each of these requests starts as follows:
+.Pp
+.Bl -enum -compact
+.It
+file index or -1 to signal a change of phase (integer)
+.El
+.Pp
+The phase starts in phase 1, then proceeds to phase 2, and phase 3
+signals an end of transmission (no subsequent blocks).
+If a phase change occurs, the sender must write back the -1 constant
+integer value and increment its phase state.
+.Pp
+Blocks are read as follows:
+.Pp
+.Bl -enum -compact
+.It
+block index (integer)
+.El
+.Pp
+In
+.Pq Fl n
+mode, the sender may immediately write back the index (integer) to skip
+the following.
+.Pp
+.Bl -enum -compact
+.It
+number of blocks (integer)
+.It
+block length in the file (integer)
+.It
+long checksum length (integer)
+.It
+terminal (remainder) block length (integer)
+.El
+.Pp
+And for each block:
+.Pp
+.Bl -enum -compact
+.It
+short checksum (integer)
+.It
+long checksum (bytes of checksum length)
+.El
+.Pp
+The client then compares the two files, block by block, and updates the
+server with mismatches as follows.
+.Pp
+.Bl -enum -compact
+.It
+file index (integer)
+.It
+number of blocks (integer)
+.It
+block length (integer)
+.It
+long checksum length (integer)
+.It
+remainder block length (integer)
+.El
+.Pp
+Then for each block:
+.Pp
+.Bl -enum -compact
+.It
+data chunk size (integer)
+.It
+data chunk (bytes)
+.It
+block index subsequent to chunk or zero for finished (integer)
+.El
+.Pp
+Following this sequence, the sender sends the followng:
+.Pp
+.Bl -enum -compact
+.It
+whole-file long checksum (16 bytes)
+.El
+.Pp
+The sender then either handles the next queued file or, if the receiver
+has written a phase change, the phase change step.
+.Pp
+If the sender is the server and
+.Fl v
+has been specified, the sender must send statistics.
+.Pp
+.Bl -enum -compact
+.It
+total bytes read (long)
+.It
+total bytes written (long)
+.It
+total size of files (long)
+.El
+.Pp
+Finally, the sender must read a final constant-value integer.
+.Pp
+.Bl -enum -compact
+.It
+end-of-sequence -1 value (integer)
+.El
+.Pp
+If in receiver mode, the inverse above (write instead of read, read
+instead of write) is performed.
+.Pp
+The receiver begins by conditionally writing, then reading, the
+exclusion list count, which is always zero.
+.Pp
+.Bl -enum -compact
+.It
+if client, send zero (integer)
+.It
+if receiver and
+.Fl -delete ,
+read zero (integer)
+.El
+.Pp
+The receiver then proceeds with reading the
+.Sx File list
+as already
+defined.
+Following the list, the receiver reads the IO error, which must be zero.
+.Pp
+.Bl -enum -compact
+.It
+constant zero (integer)
+.El
+.Pp
+The receiver must then sort the file names lexicographically.
+.Pp
+If there are no files in the file list at this time, the receiver must
+exit prior to sending per-file data.
+It then proceeds with the file blocks.
+.Pp
+For file blocks, the receiver must look at each file that is not up to
+date, defined by having the same file size and timestamp, and send it to
+the server.
+Symbolic links and directory entries are never sent to the server.
+.Pp
+After the second phase has completed and prior to writing the
+end-of-data signal, the client receiver reads statistics.
+This is only performed with
+.Pq Fl v .
+.Pp
+.Bl -enum -compact
+.It
+total bytes read (long)
+.It
+total bytes written (long)
+.It
+total size of files (long)
+.El
+.Pp
+Finally, the receiver must send the constant end-of-sequence marker.
+.Pp
+.Bl -enum -compact
+.It
+end-of-sequence -1 value (integer)
+.El
+.Ss Sender and receiver asynchrony
+The sender and receiver need not work in lockstep.
+The receiver may send file update requests as quickly as it parses them,
+and respond to the sender's update notices on demand.
+Similarly, the sender may read as many update requests as it can, and
+service them in any order it wishes.
+.Pp
+The sender and receiver synchronise state only at the end of phase.
+.Pp
+The reference
+.Xr rsync 1
+takes advantage of this with a two-process receiver, one for sending
+update requests (the generator) and another for receiving.
+.Xr openrsync 1
+uses an event-loop model instead.
+.\" .Sh CONTEXT
+.\" For section 9 functions only.
+.\" .Sh RETURN VALUES
+.\" For sections 2, 3, and 9 function return values only.
+.\" .Sh ENVIRONMENT
+.\" For sections 1, 6, 7, and 8 only.
+.\" .Sh FILES
+.\" .Sh EXIT STATUS
+.\" For sections 1, 6, and 8 only.
+.\" .Sh EXAMPLES
+.\" .Sh DIAGNOSTICS
+.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only.
+.\" .Sh ERRORS
+.\" For sections 2, 3, 4, and 9 errno settings only.
+.Sh SEE ALSO
+.Xr openrsync 1 ,
+.Xr rsync 1 ,
+.Xr rsyncd 5
+.\" .Sh STANDARDS
+.\" .Sh HISTORY
+.\" .Sh AUTHORS
+.\" .Sh CAVEATS
+.Sh BUGS
+Time values are sent as 32-bit integers.
+.Pp
+When in server mode
+.Em and
+when communicating to a client with a newer protocol (>27), the phase
+change integer (-1) acknowledgement must be sent twice by the sender.
+The is probably a bug in the reference implementation.
diff --git a/usr.bin/rsync/rsyncd.5 b/usr.bin/rsync/rsyncd.5
new file mode 100644
index 00000000000..d2e18fbac2e
--- /dev/null
+++ b/usr.bin/rsync/rsyncd.5
@@ -0,0 +1,135 @@
+.\" $OpenBSD: rsyncd.5,v 1.1 2019/02/10 23:18:28 benno Exp $
+.\"
+.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: February 10 2019 $
+.Dt RSYNCD 5
+.Os
+.Sh NAME
+.Nm rsyncd
+.Nd rsyncd wire protocol
+.Sh DESCRIPTION
+The
+.Nm
+protocol described in this relates to the BSD-licensed
+.Xr openrsync 1 ,
+a re-implementation of the GPL-licensed reference utility
+.Xr rsync 1 .
+It is compatible with version 27 of the reference.
+.Pp
+The
+.Nm
+protocol is an envelope protocol for
+.Xr rsync 5
+between a client and an rsync://-capable server.
+It provides a means to exchange capabilities information prior to file
+transfer.
+.Pp
+In this document,
+.Qq client
+refers to the
+.Xr openrsync 1
+utility making the request.
+It follows that
+.Qq server
+refers to the daemon servicing the request.
+.Pp
+A connection between a client and server consists of host, a module, and
+zero or more paths.
+.Pp
+.Dl openrsync rsync://host/module/path1 rsync://host/path2... dest
+.Pp
+At this time, operating in sender mode (with the rsync:// host receiving
+information) is not described in this document.
+.Ss Data types
+These are the same as in
+.Xr rsync 5 .
+A newline is always a standalone \en.
+.Ss Client process
+After initialising a connection, the client and server exchange the
+following information, in order.
+This portion of the process is
+.Em not
+multiplexed.
+.Pp
+.Bl -enum -compact
+.It
+client sends requested module followed by newline
+.It
+server responds with preamble followed by standalone newline
+.El
+.Pp
+The requested module must have non-zero length.
+The preamble consists in a sequence of lines.
+Each line either contains free-form text sent by the server as a
+.Qq motd
+.Pq message of the day
+or a command:
+.Pp
+.Dl @RSYNCD: command\en
+.Pp
+The only supported command is the server protocol specification:
+.Pp
+.Dl @RSYNCD: xx[.yy]\en
+.Pp
+The optional component is the submodule, which may be discarded.
+The version may only be specified once.
+Both the motd and commands end in the special termination command:
+.Pp
+.Dl @RSYNCD: OK\en
+.Pp
+Following that, the client must send the command-line arguments that
+would otherwise be used to start a
+.Xr openrsync 1
+server.
+Each argument must be specified on its own line, e.g.,
+.Pp
+.Dl --server\en--sender\en-r\en-t\en.\enpath1\enpath2
+.Pp
+This must be followed by a standalone newline.
+.Pp
+If the server does not understand or accept any of the command-line
+arguments, it will exit at this point.
+.Pp
+Following this, the client must read the integer-length session checksum
+seed.
+Multiplexing is subsequently enabled.
+.Pp
+The sequence that follows is stipulated in
+.Xr rsync 5
+following the handshake.
+.\" The following requests should be uncommented and used where appropriate.
+.\" .Sh CONTEXT
+.\" For section 9 functions only.
+.\" .Sh RETURN VALUES
+.\" For sections 2, 3, and 9 function return values only.
+.\" .Sh ENVIRONMENT
+.\" For sections 1, 6, 7, and 8 only.
+.\" .Sh FILES
+.\" .Sh EXIT STATUS
+.\" For sections 1, 6, and 8 only.
+.\" .Sh EXAMPLES
+.\" .Sh DIAGNOSTICS
+.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only.
+.\" .Sh ERRORS
+.\" For sections 2, 3, 4, and 9 errno settings only.
+.Sh SEE ALSO
+.Xr openrsync 1 ,
+.Xr rsync 5
+.\" .Sh STANDARDS
+.\" .Sh HISTORY
+.\" .Sh AUTHORS
+.\" .Sh CAVEATS
+.\" .Sh BUGS
diff --git a/usr.bin/rsync/sender.c b/usr.bin/rsync/sender.c
new file mode 100644
index 00000000000..362c6c93ecb
--- /dev/null
+++ b/usr.bin/rsync/sender.c
@@ -0,0 +1,227 @@
+/* $Id: sender.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * A client sender manages the read-only source files and sends data to
+ * the receiver as requested.
+ * First it sends its list of files, then it waits for the server to
+ * request updates to individual files.
+ * Returns zero on failure, non-zero on success.
+ *
+ * Pledges: stdio, rpath, unveil.
+ */
+int
+rsync_sender(struct sess *sess, int fdin,
+ int fdout, size_t argc, char **argv)
+{
+ struct flist *fl = NULL;
+ size_t flsz = 0, phase = 0, excl;
+ int rc = 0, c;
+ int32_t idx;
+ struct blkset *blks = NULL;
+
+ if (-1 == pledge("unveil stdio rpath", NULL)) {
+ ERR(sess, "pledge");
+ return 0;
+ }
+
+ /*
+ * Generate the list of files we want to send from our
+ * command-line input.
+ * This will also remove all invalid files.
+ */
+
+ if ( ! flist_gen(sess, argc, argv, &fl, &flsz)) {
+ ERRX1(sess, "flist_gen");
+ goto out;
+ }
+
+ /* Client sends zero-length exclusions if deleting. */
+
+ if ( ! sess->opts->server && sess->opts->del &&
+ ! io_write_int(sess, fdout, 0)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ /*
+ * Then the file list in any mode.
+ * Finally, the IO error (always zero for us).
+ */
+
+ if ( ! flist_send(sess, fdin, fdout, fl, flsz)) {
+ ERRX1(sess, "flist_send");
+ goto out;
+ } else if ( ! io_write_int(sess, fdout, 0)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ /* Exit if we're the server with zero files. */
+
+ if (0 == flsz && sess->opts->server) {
+ WARNX(sess, "sender has empty file list: exiting");
+ rc = 1;
+ goto out;
+ } else if ( ! sess->opts->server)
+ LOG1(sess, "Transfer starting: %zu files", flsz);
+
+ /*
+ * If we're the server, read our exclusion list.
+ * This is always 0 for now.
+ */
+
+ if (sess->opts->server) {
+ if ( ! io_read_size(sess, fdin, &excl)) {
+ ERRX1(sess, "io_read_size");
+ goto out;
+ } else if (0 != excl) {
+ ERRX1(sess, "exclusion list is non-empty");
+ goto out;
+ }
+ }
+
+ /*
+ * We have two phases: the first has a two-byte checksum, the
+ * second has a full 16-byte checksum.
+ */
+
+ LOG2(sess, "sender transmitting phase 1 data");
+
+ for (;;) {
+ if ( ! io_read_int(sess, fdin, &idx)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ }
+
+ /*
+ * If we receive an invalid index (-1), then we're
+ * either promoted to the second phase or it's time to
+ * exit, depending upon which phase we're in.
+ */
+
+ if (-1 == idx) {
+ if ( ! io_write_int(sess, fdout, idx)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ /* FIXME: I don't understand this ack. */
+
+ if (sess->opts->server && sess->rver > 27)
+ if ( ! io_write_int(sess, fdout, idx)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+
+ if (phase++)
+ break;
+ LOG2(sess, "sender transmitting phase 2 data");
+ continue;
+ }
+
+ /* Validate index and file type. */
+
+ if (idx < 0 || (uint32_t)idx >= flsz) {
+ ERRX(sess, "file index out of bounds: "
+ "invalid %" PRId32 " out of %zu",
+ idx, flsz);
+ goto out;
+ } else if (S_ISDIR(fl[idx].st.mode)) {
+ ERRX(sess, "blocks requested for "
+ "directory: %s", fl[idx].path);
+ goto out;
+ } else if (S_ISLNK(fl[idx].st.mode)) {
+ ERRX(sess, "blocks requested for "
+ "symlink: %s", fl[idx].path);
+ goto out;
+ } else if ( ! S_ISREG(fl[idx].st.mode)) {
+ ERRX(sess, "blocks requested for "
+ "special: %s", fl[idx].path);
+ goto out;
+ }
+
+ if ( ! sess->opts->server)
+ LOG1(sess, "%s", fl[idx].wpath);
+
+ /* Dry-run doesn't do anything. */
+
+ if (sess->opts->dry_run) {
+ if ( ! io_write_int(sess, fdout, idx)) {
+ ERRX1(sess, "io_write_int");
+ goto out;
+ }
+ continue;
+ }
+
+ /*
+ * The server will now send us its view of the file.
+ * It does so by cutting a file into a series of blocks
+ * and checksumming each block.
+ * We can then compare the blocks in our file and those
+ * in theirs, and send them blocks they're missing or
+ * don't have.
+ */
+
+ blks = blk_recv(sess, fdin, fl[idx].path);
+ if (NULL == blks) {
+ ERRX1(sess, "blk_recv");
+ goto out;
+ } else if ( ! blk_recv_ack(sess, fdout, blks, idx)) {
+ ERRX1(sess, "blk_recv_ack");
+ goto out;
+ }
+
+ c = blk_match(sess, fdout, blks, fl[idx].path);
+ blkset_free(blks);
+
+ if ( ! c) {
+ ERRX1(sess, "blk_match");
+ goto out;
+ }
+ }
+
+ if ( ! sess_stats_send(sess, fdout)) {
+ ERRX1(sess, "sess_stats_end");
+ goto out;
+ }
+
+ /* Final "goodbye" message. */
+
+ if ( ! io_read_int(sess, fdin, &idx)) {
+ ERRX1(sess, "io_read_int");
+ goto out;
+ } else if (-1 != idx) {
+ ERRX(sess, "read incorrect update complete ack");
+ goto out;
+ }
+
+ LOG2(sess, "sender finished updating");
+ rc = 1;
+out:
+ flist_free(fl, flsz);
+ return rc;
+}
diff --git a/usr.bin/rsync/server.c b/usr.bin/rsync/server.c
new file mode 100644
index 00000000000..8ce49f0867f
--- /dev/null
+++ b/usr.bin/rsync/server.c
@@ -0,0 +1,162 @@
+/* $Id: server.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+static int
+fcntl_nonblock(struct sess *sess, int fd)
+{
+ int fl;
+
+ if (-1 == (fl = fcntl(fd, F_GETFL, 0)))
+ ERR(sess, "fcntl: F_GETFL");
+ else if (-1 == fcntl(fd, F_SETFL, fl|O_NONBLOCK))
+ ERR(sess, "fcntl: F_SETFL");
+ else
+ return 1;
+
+ return 0;
+}
+
+/*
+ * The server (remote) side of the system.
+ * This parses the arguments given it by the remote shell then moves
+ * into receiver or sender mode depending upon those arguments.
+ *
+ * Pledges: unveil rpath, cpath, wpath, stdio, fattr.
+ *
+ * Pledges (dry-run): -cpath, -wpath, -fattr.
+ * Pledges (!preserve_times): -fattr.
+ */
+int
+rsync_server(const struct opts *opts, size_t argc, char *argv[])
+{
+ struct sess sess;
+ int fdin = STDIN_FILENO,
+ fdout = STDOUT_FILENO, c = 0;
+
+ memset(&sess, 0, sizeof(struct sess));
+ sess.opts = opts;
+
+ /* Begin by making descriptors non-blocking. */
+
+ if ( ! fcntl_nonblock(&sess, fdin) ||
+ ! fcntl_nonblock(&sess, fdout)) {
+ ERRX1(&sess, "fcntl_nonblock");
+ goto out;
+ }
+
+ /* Standard rsync preamble, server side. */
+
+ sess.lver = RSYNC_PROTOCOL;
+ sess.seed = arc4random();
+
+ if ( ! io_read_int(&sess, fdin, &sess.rver)) {
+ ERRX1(&sess, "io_read_int");
+ goto out;
+ } else if ( ! io_write_int(&sess, fdout, sess.lver)) {
+ ERRX1(&sess, "io_write_int");
+ goto out;
+ } else if ( ! io_write_int(&sess, fdout, sess.seed)) {
+ ERRX1(&sess, "io_write_int");
+ goto out;
+ }
+
+ sess.mplex_writes = 1;
+
+ if (sess.rver < sess.lver) {
+ ERRX(&sess, "remote protocol is older "
+ "than our own (%" PRId32 " < %" PRId32 "): "
+ "this is not supported",
+ sess.rver, sess.lver);
+ goto out;
+ }
+
+ LOG2(&sess, "server detected client version %" PRId32
+ ", server version %" PRId32 ", seed %" PRId32,
+ sess.rver, sess.lver, sess.seed);
+
+ if (sess.opts->sender) {
+ LOG2(&sess, "server starting sender");
+
+ /*
+ * At this time, I always get a period as the first
+ * argument of the command line.
+ * Let's make it a requirement until I figure out when
+ * that differs.
+ * rsync [flags] "." <source> <...>
+ */
+
+ if (strcmp(argv[0], ".")) {
+ ERRX(&sess, "first argument must "
+ "be a standalone period");
+ goto out;
+ }
+ argv++;
+ argc--;
+ if (0 == argc) {
+ ERRX(&sess, "must have arguments");
+ goto out;
+ }
+
+ if ( ! rsync_sender(&sess, fdin, fdout, argc, argv)) {
+ ERRX1(&sess, "rsync_sender");
+ goto out;
+ }
+ } else {
+ LOG2(&sess, "server starting receiver");
+
+ /*
+ * I don't understand why this calling convention
+ * exists, but we must adhere to it.
+ * rsync [flags] "." <destination>
+ */
+
+ if (2 != argc) {
+ ERRX(&sess, "server receiver mode "
+ "requires two argument");
+ goto out;
+ } else if (strcmp(argv[0], ".")) {
+ ERRX(&sess, "first argument must "
+ "be a standalone period");
+ goto out;
+ }
+
+ if ( ! rsync_receiver(&sess, fdin, fdout, argv[1])) {
+ ERRX1(&sess, "rsync_receiver");
+ goto out;
+ }
+ }
+
+#if 0
+ /* Probably the EOF. */
+ if (io_read_check(&sess, fdin))
+ WARNX(&sess, "data remains in read pipe");
+#endif
+
+ c = 1;
+out:
+ return c;
+}
diff --git a/usr.bin/rsync/session.c b/usr.bin/rsync/session.c
new file mode 100644
index 00000000000..8ba1ebb0d38
--- /dev/null
+++ b/usr.bin/rsync/session.c
@@ -0,0 +1,161 @@
+/* $Id: session.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/param.h>
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * Accept how much we've read, written, and file-size, and print them in
+ * a human-readable fashion (with GB, MB, etc. prefixes).
+ * This only prints as the client.
+ */
+static void
+stats_log(struct sess *sess,
+ uint64_t tread, uint64_t twrite, uint64_t tsize)
+{
+ double tr, tw, ts;
+ const char *tru = "B", *twu = "B", *tsu = "B";
+ int trsz = 0, twsz = 0, tssz = 0;
+
+ assert(sess->opts->verbose);
+ if (sess->opts->server)
+ return;
+
+ if (tread >= 1024 * 1024 * 1024) {
+ tr = tread / (1024.0 * 1024.0 * 1024.0);
+ tru = "GB";
+ trsz = 3;
+ } else if (tread >= 1024 * 1024) {
+ tr = tread / (1024.0 * 1024.0);
+ tru = "MB";
+ trsz = 2;
+ } else if (tread >= 1024) {
+ tr = tread / 1024.0;
+ tru = "KB";
+ trsz = 1;
+ } else
+ tr = tread;
+
+ if (twrite >= 1024 * 1024 * 1024) {
+ tw = twrite / (1024.0 * 1024.0 * 1024.0);
+ twu = "GB";
+ twsz = 3;
+ } else if (twrite >= 1024 * 1024) {
+ tw = twrite / (1024.0 * 1024.0);
+ twu = "MB";
+ twsz = 2;
+ } else if (twrite >= 1024) {
+ tw = twrite / 1024.0;
+ twu = "KB";
+ twsz = 1;
+ } else
+ tw = twrite;
+
+ if (tsize >= 1024 * 1024 * 1024) {
+ ts = tsize / (1024.0 * 1024.0 * 1024.0);
+ tsu = "GB";
+ tssz = 3;
+ } else if (tsize >= 1024 * 1024) {
+ ts = tsize / (1024.0 * 1024.0);
+ tsu = "MB";
+ tssz = 2;
+ } else if (tsize >= 1024) {
+ ts = tsize / 1024.0;
+ tsu = "KB";
+ tssz = 1;
+ } else
+ ts = tsize;
+
+ LOG1(sess, "Transfer complete: "
+ "%.*lf %s sent, "
+ "%.*lf %s read, "
+ "%.*lf %s file size",
+ trsz, tr, tru,
+ twsz, tw, twu,
+ tssz, ts, tsu);
+}
+
+/*
+ * At the end of transmission, we write our statistics if we're the
+ * server, then log only if we're not the server.
+ * Either way, only do this if we're in verbose mode.
+ * Returns zero on failure, non-zero on success.
+ */
+int
+sess_stats_send(struct sess *sess, int fd)
+{
+ uint64_t tw, tr, ts;
+
+ if (0 == sess->opts->verbose)
+ return 1;
+
+ tw = sess->total_write;
+ tr = sess->total_read;
+ ts = sess->total_size;
+
+ if (sess->opts->server) {
+ if ( ! io_write_long(sess, fd, tr)) {
+ ERRX1(sess, "io_write_long");
+ return 0;
+ } else if ( ! io_write_long(sess, fd, tw)) {
+ ERRX1(sess, "io_write_long");
+ return 0;
+ } else if ( ! io_write_long(sess, fd, ts)) {
+ ERRX1(sess, "io_write_long");
+ return 0;
+ }
+ }
+
+ stats_log(sess, tr, tw, ts);
+ return 1;
+}
+
+/*
+ * At the end of the transmission, we have some statistics to read.
+ * Only do this (1) if we're in verbose mode and (2) if we're the
+ * server.
+ * Then log the findings.
+ * Return zero on failure, non-zero on success.
+ */
+int
+sess_stats_recv(struct sess *sess, int fd)
+{
+ uint64_t tr, tw, ts;
+
+ if (sess->opts->server || 0 == sess->opts->verbose)
+ return 1;
+
+ if ( ! io_read_ulong(sess, fd, &tw)) {
+ ERRX1(sess, "io_read_ulong");
+ return 0;
+ } else if ( ! io_read_ulong(sess, fd, &tr)) {
+ ERRX1(sess, "io_read_ulong");
+ return 0;
+ } else if ( ! io_read_ulong(sess, fd, &ts)) {
+ ERRX1(sess, "io_read_ulong");
+ return 0;
+ }
+
+ stats_log(sess, tr, tw, ts);
+ return 1;
+}
+
diff --git a/usr.bin/rsync/socket.c b/usr.bin/rsync/socket.c
new file mode 100644
index 00000000000..ca28e172b17
--- /dev/null
+++ b/usr.bin/rsync/socket.c
@@ -0,0 +1,435 @@
+/* $Id: socket.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <netdb.h>
+#include <poll.h>
+#include <resolv.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * Defines a resolved IP address for the host
+ * There can be many, IPV4 or IPV6.
+ */
+struct source {
+ int family; /* PF_INET or PF_INET6 */
+ char ip[INET6_ADDRSTRLEN]; /* formatted string */
+ struct sockaddr_storage sa; /* socket */
+ socklen_t salen; /* length of socket buffer */
+};
+
+/*
+ * Connect to an IP address representing a host.
+ * Return <0 on failure, 0 on try another address, >0 on success.
+ */
+static int
+inet_connect(struct sess *sess, int *sd,
+ const struct source *src, const char *host)
+{
+ int c, flags;
+
+ if (-1 != *sd)
+ close(*sd);
+
+ LOG2(sess, "trying: %s, %s", src->ip, host);
+
+ if (-1 == (*sd = socket(src->family, SOCK_STREAM, 0))) {
+ ERR(sess, "socket");
+ return -1;
+ }
+
+ /*
+ * Initiate blocking connection.
+ * We use the blocking connect() instead of passing NONBLOCK to
+ * the socket() function because we don't need to do anything
+ * while waiting for this to finish.
+ */
+
+ c = connect(*sd,
+ (const struct sockaddr *)&src->sa,
+ src->salen);
+ if (-1 == c) {
+ if (ECONNREFUSED == errno ||
+ EHOSTUNREACH == errno) {
+ WARNX(sess, "connect refused: "
+ "%s, %s", src->ip, host);
+ return 0;
+ }
+ ERR(sess, "connect");
+ return -1;
+ }
+
+ /* Set up non-blocking mode. */
+
+ if (-1 == (flags = fcntl(*sd, F_GETFL, 0))) {
+ ERR(sess, "fcntl");
+ return -1;
+ } else if (-1 == fcntl(*sd, F_SETFL, flags|O_NONBLOCK)) {
+ ERR(sess, "fcntl");
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Resolve the socket addresses for host, both in IPV4 and IPV6.
+ * Once completed, the "dns" pledge may be dropped.
+ * Returns the addresses on success, NULL on failure (sz is always zero,
+ * in this case).
+ */
+static struct source *
+inet_resolve(struct sess *sess, const char *host, size_t *sz)
+{
+ struct addrinfo hints, *res0, *res;
+ struct sockaddr *sa;
+ struct source *src = NULL;
+ size_t i, srcsz = 0;
+ int error;
+
+ *sz = 0;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM; /* DUMMY */
+
+ error = getaddrinfo(host, "873", &hints, &res0);
+
+ LOG2(sess, "resolving: %s", host);
+
+ if (error == EAI_AGAIN || error == EAI_NONAME) {
+ ERRX(sess, "DNS resolve error: %s: %s",
+ host, gai_strerror(error));
+ return NULL;
+ } else if (error) {
+ ERRX(sess, "DNS parse error: %s: %s",
+ host, gai_strerror(error));
+ return NULL;
+ }
+
+ /* Allocate for all available addresses. */
+
+ for (res = res0; NULL != res; res = res->ai_next)
+ if (res->ai_family == AF_INET ||
+ res->ai_family == AF_INET6)
+ srcsz++;
+
+ if (0 == srcsz) {
+ ERRX(sess, "no addresses resolved: %s", host);
+ freeaddrinfo(res0);
+ return NULL;
+ }
+
+ src = calloc(srcsz, sizeof(struct source));
+ if (NULL == src) {
+ ERRX(sess, "calloc");
+ freeaddrinfo(res0);
+ return NULL;
+ }
+
+ for (i = 0, res = res0; NULL != res; res = res->ai_next) {
+ if (res->ai_family != AF_INET &&
+ res->ai_family != AF_INET6)
+ continue;
+
+ assert(i < srcsz);
+
+ /* Copy the socket address. */
+
+ src[i].salen = res->ai_addrlen;
+ memcpy(&src[i].sa, res->ai_addr, src[i].salen);
+
+ /* Format as a string, too. */
+
+ sa = res->ai_addr;
+ if (AF_INET == res->ai_family) {
+ src[i].family = PF_INET;
+ inet_ntop(AF_INET,
+ &(((struct sockaddr_in *)sa)->sin_addr),
+ src[i].ip, INET6_ADDRSTRLEN);
+ } else {
+ src[i].family = PF_INET6;
+ inet_ntop(AF_INET6,
+ &(((struct sockaddr_in6 *)sa)->sin6_addr),
+ src[i].ip, INET6_ADDRSTRLEN);
+ }
+
+ LOG2(sess, "DNS resolved: %s: %s", host, src[i].ip);
+ i++;
+ }
+
+ freeaddrinfo(res0);
+ *sz = srcsz;
+ return src;
+}
+
+/*
+ * Process an rsyncd preamble line.
+ * This is either free-form text or @RSYNCD commands.
+ * Return <0 on failure, 0 on try more lines, >0 on finished.
+ */
+static int
+protocol_line(struct sess *sess, const char *host, const char *cp)
+{
+ int major, minor;
+
+ if (strncmp(cp, "@RSYNCD: ", 9)) {
+ LOG0(sess, "%s", cp);
+ return 0;
+ }
+
+ cp += 9;
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ /* @RSYNCD: OK indicates that we're finished. */
+
+ if (0 == strcmp(cp, "OK"))
+ return 1;
+
+ /*
+ * Otherwise, all we have left is our version.
+ * There are two formats: x.y (w/submodule) and x.
+ */
+
+ if (2 == sscanf(cp, "%d.%d", &major, &minor)) {
+ sess->rver = major;
+ return 0;
+ } else if (1 == sscanf(cp, "%d", &major)) {
+ sess->rver = major;
+ return 0;
+ }
+
+ ERRX(sess, "rsyncd protocol error: unknown command");
+ return -1;
+}
+
+/*
+ * Pledges: dns, inet, unveil, rpath, cpath, wpath, stdio, fattr.
+ *
+ * Pledges (dry-run): -cpath, -wpath, -fattr.
+ * Pledges (!preserve_times): -fattr.
+ */
+int
+rsync_socket(const struct opts *opts, const struct fargs *f)
+{
+ struct sess sess;
+ struct source *src = NULL;
+ size_t i, srcsz = 0;
+ int sd = -1, rc = 0, c;
+ char **args, buf[BUFSIZ];
+ uint8_t byte;
+
+ memset(&sess, 0, sizeof(struct sess));
+ sess.lver = RSYNC_PROTOCOL;
+ sess.opts = opts;
+
+ assert(NULL != f->host);
+ assert(NULL != f->module);
+
+ if (NULL == (args = fargs_cmdline(&sess, f))) {
+ ERRX1(&sess, "fargs_cmdline");
+ return 0;
+ }
+
+ /* Resolve all IP addresses from the host. */
+
+ if (NULL == (src = inet_resolve(&sess, f->host, &srcsz))) {
+ ERRX1(&sess, "inet_resolve");
+ free(args);
+ return 0;
+ }
+
+ /* Drop the DNS pledge. */
+
+ if (-1 == pledge("inet unveil rpath cpath wpath stdio fattr", NULL)) {
+ ERR(&sess, "pledge");
+ goto out;
+ }
+
+ /*
+ * Iterate over all addresses, trying to connect.
+ * When we succeed, then continue using the connected socket.
+ */
+
+ assert(srcsz);
+ for (i = 0; i < srcsz; i++) {
+ c = inet_connect(&sess, &sd, &src[i], f->host);
+ if (c < 0) {
+ ERRX1(&sess, "inet_connect");
+ goto out;
+ } else if (c > 0)
+ break;
+ }
+
+ /* Drop the inet pledge. */
+
+ if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) {
+ ERR(&sess, "pledge");
+ goto out;
+ }
+
+ if (i == srcsz) {
+ ERRX(&sess, "cannot connect to host: %s", f->host);
+ goto out;
+ }
+
+ /* Initiate with the rsyncd version and module request. */
+
+ LOG2(&sess, "connected: %s, %s", src[i].ip, f->host);
+
+ (void)snprintf(buf, sizeof(buf), "@RSYNCD: %d", sess.lver);
+ if ( ! io_write_line(&sess, sd, buf)) {
+ ERRX1(&sess, "io_write_line");
+ goto out;
+ }
+
+ LOG2(&sess, "requesting module: %s, %s", f->module, f->host);
+
+ if ( ! io_write_line(&sess, sd, f->module)) {
+ ERRX1(&sess, "io_write_line");
+ goto out;
+ }
+
+ /*
+ * Now we read the server's response, byte-by-byte, one newline
+ * terminated at a time, limited to BUFSIZ line length.
+ * For this protocol version, this consists of either @RSYNCD
+ * followed by some text (just "ok" and the remote version) or
+ * the message of the day.
+ */
+
+ for (;;) {
+ for (i = 0; i < sizeof(buf); i++) {
+ if ( ! io_read_byte(&sess, sd, &byte)) {
+ ERRX1(&sess, "io_read_byte");
+ goto out;
+ }
+ if ('\n' == (buf[i] = byte))
+ break;
+ }
+ if (i == sizeof(buf)) {
+ ERRX(&sess, "line buffer overrun");
+ goto out;
+ } else if (0 == i)
+ continue;
+
+ /*
+ * The rsyncd protocol isn't very clear as to whether we
+ * get a CRLF or not: I don't actually see this being
+ * transmitted over the wire.
+ */
+
+ assert(i > 0);
+ buf[i] = '\0';
+ if ('\r' == buf[i - 1])
+ buf[i - 1] = '\0';
+
+ if ((c = protocol_line(&sess, f->host, buf)) < 0) {
+ ERRX1(&sess, "protocol_line");
+ goto out;
+ } else if (c > 0)
+ break;
+ }
+
+ /*
+ * Now we've exchanged all of our protocol information.
+ * We want to send our command-line arguments over the wire,
+ * each with a newline termination.
+ * Use the same arguments when invoking the server, but leave
+ * off the binary name(s).
+ * Emit a standalone newline afterward.
+ */
+
+ if (FARGS_RECEIVER == f->mode || FARGS_SENDER == f->mode)
+ i = 3; /* ssh host rsync... */
+ else
+ i = 1; /* rsync... */
+
+ for ( ; NULL != args[i]; i++)
+ if ( ! io_write_line(&sess, sd, args[i])) {
+ ERRX1(&sess, "io_write_line");
+ goto out;
+ }
+ if ( ! io_write_byte(&sess, sd, '\n')) {
+ ERRX1(&sess, "io_write_line");
+ goto out;
+ }
+
+ /*
+ * All data after this point is going to be multiplexed, so turn
+ * on the multiplexer for our reads and writes.
+ */
+
+ /* Protocol exchange: get the random seed. */
+
+ if ( ! io_read_int(&sess, sd, &sess.seed)) {
+ ERRX1(&sess, "io_read_int");
+ goto out;
+ }
+
+ /* Now we've completed the handshake. */
+
+ if (sess.rver < sess.lver) {
+ ERRX(&sess, "remote protocol is older "
+ "than our own (%" PRId32 " < %" PRId32 "): "
+ "this is not supported",
+ sess.rver, sess.lver);
+ goto out;
+ }
+
+ sess.mplex_reads = 1;
+ LOG2(&sess, "read multiplexing enabled");
+
+ LOG2(&sess, "socket detected client version %" PRId32
+ ", server version %" PRId32 ", seed %" PRId32,
+ sess.lver, sess.rver, sess.seed);
+
+ assert(FARGS_RECEIVER == f->mode);
+
+ LOG2(&sess, "client starting receiver: %s", f->host);
+ if ( ! rsync_receiver(&sess, sd, sd, f->sink)) {
+ ERRX1(&sess, "rsync_receiver");
+ goto out;
+ }
+
+#if 0
+ /* Probably the EOF. */
+ if (io_read_check(&sess, sd))
+ WARNX(&sess, "data remains in read pipe");
+#endif
+
+ rc = 1;
+out:
+ free(src);
+ free(args);
+ if (-1 != sd)
+ close(sd);
+ return rc;
+}
diff --git a/usr.bin/rsync/symlinks.c b/usr.bin/rsync/symlinks.c
new file mode 100644
index 00000000000..b85d3866fc9
--- /dev/null
+++ b/usr.bin/rsync/symlinks.c
@@ -0,0 +1,102 @@
+/* $Id: symlinks.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/param.h>
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+/*
+ * Allocate space for a readlink(2) invocation.
+ * Returns NULL on failure or a buffer otherwise.
+ * The buffer must be passed to free() by the caller.
+ */
+char *
+symlink_read(struct sess *sess, const char *path)
+{
+ char *buf = NULL;
+ size_t sz;
+ ssize_t nsz = 0;
+ void *pp;
+
+ for (sz = MAXPATHLEN; ; sz *= 2) {
+ if (NULL == (pp = realloc(buf, sz + 1))) {
+ ERR(sess, "realloc");
+ free(buf);
+ return NULL;
+ }
+ buf = pp;
+
+ if (-1 == (nsz = readlink(path, buf, sz))) {
+ ERR(sess, "%s: readlink", path);
+ free(buf);
+ return NULL;
+ } else if (0 == nsz) {
+ ERRX(sess, "%s: empty link", path);
+ free(buf);
+ return NULL;
+ } else if ((size_t)nsz < sz)
+ break;
+ }
+
+ assert(NULL != buf);
+ assert(nsz > 0);
+ buf[nsz] = '\0';
+ return buf;
+}
+
+/*
+ * Allocate space for a readlinkat(2) invocation.
+ * Returns NULL on failure or a buffer otherwise.
+ * The buffer must be passed to free() by the caller.
+ */
+char *
+symlinkat_read(struct sess *sess, int fd, const char *path)
+{
+ char *buf = NULL;
+ size_t sz;
+ ssize_t nsz = 0;
+ void *pp;
+
+ for (sz = MAXPATHLEN; ; sz *= 2) {
+ if (NULL == (pp = realloc(buf, sz + 1))) {
+ ERR(sess, "realloc");
+ free(buf);
+ return NULL;
+ }
+ buf = pp;
+
+ if (-1 == (nsz = readlinkat(fd, path, buf, sz))) {
+ ERR(sess, "%s: readlinkat", path);
+ free(buf);
+ return NULL;
+ } else if (0 == nsz) {
+ ERRX(sess, "%s: empty link", path);
+ free(buf);
+ return NULL;
+ } else if ((size_t)nsz < sz)
+ break;
+ }
+
+ assert(NULL != buf);
+ assert(nsz > 0);
+ buf[nsz] = '\0';
+ return buf;
+}
diff --git a/usr.bin/rsync/uploader.c b/usr.bin/rsync/uploader.c
new file mode 100644
index 00000000000..5017d6b2571
--- /dev/null
+++ b/usr.bin/rsync/uploader.c
@@ -0,0 +1,772 @@
+/* $Id: uploader.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */
+/*
+ * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "extern.h"
+
+enum uploadst {
+ UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */
+ UPLOAD_WRITE_LOCAL, /* wait to write to sender */
+ UPLOAD_READ_LOCAL, /* wait to read from local file */
+ UPLOAD_FINISHED /* nothing more to do in phase */
+};
+
+/*
+ * Used to keep track of data flowing from the receiver to the sender.
+ * This is managed by the receiver process.
+ */
+struct upload {
+ enum uploadst state;
+ char *buf; /* if not NULL, pending upload */
+ size_t bufsz; /* size of buf */
+ size_t bufmax; /* maximum size of buf */
+ size_t bufpos; /* position in buf */
+ size_t idx; /* current transfer index */
+ mode_t oumask; /* umask for creating files */
+ int rootfd; /* destination directory */
+ size_t csumlen; /* checksum length */
+ int fdout; /* write descriptor to sender */
+ const struct flist *fl; /* file list */
+ size_t flsz; /* size of file list */
+ int *newdir; /* non-zero if mkdir'd */
+};
+
+/*
+ * Log a directory by emitting the file and a trailing slash, just to
+ * show the operator that we're a directory.
+ */
+static void
+log_dir(struct sess *sess, const struct flist *f)
+{
+ size_t sz;
+
+ if (sess->opts->server)
+ return;
+ sz = strlen(f->path);
+ assert(sz > 0);
+ LOG1(sess, "%s%s", f->path,
+ '/' == f->path[sz - 1] ? "" : "/");
+}
+
+/*
+ * Log a link by emitting the file and the target, just to show the
+ * operator that we're a link.
+ */
+static void
+log_link(struct sess *sess, const struct flist *f)
+{
+
+ if ( ! sess->opts->server)
+ LOG1(sess, "%s -> %s", f->path, f->link);
+}
+
+/*
+ * Simply log the filename.
+ */
+static void
+log_file(struct sess *sess, const struct flist *f)
+{
+
+ if ( ! sess->opts->server)
+ LOG1(sess, "%s", f->path);
+}
+
+/*
+ * Prepare the overall block set's metadata.
+ * We always have at least one block.
+ * The block size is an important part of the algorithm.
+ * I use the same heuristic as the reference rsync, but implemented in a
+ * bit more of a straightforward way.
+ * In general, the individual block length is the rounded square root of
+ * the total file size.
+ * The minimum block length is 700.
+ */
+static void
+init_blkset(struct blkset *p, off_t sz)
+{
+ double v;
+
+ if (sz >= (BLOCK_SIZE_MIN * BLOCK_SIZE_MIN)) {
+ /* Simple rounded-up integer square root. */
+
+ v = sqrt(sz);
+ p->len = ceil(v);
+
+ /*
+ * Always be a multiple of eight.
+ * There's no reason to do this, but rsync does.
+ */
+
+ if ((p->len % 8) > 0)
+ p->len += 8 - (p->len % 8);
+ } else
+ p->len = BLOCK_SIZE_MIN;
+
+ p->size = sz;
+ if (0 == (p->blksz = sz / p->len))
+ p->rem = sz;
+ else
+ p->rem = sz % p->len;
+
+ /* If we have a remainder, then we need an extra block. */
+
+ if (p->rem)
+ p->blksz++;
+}
+
+/*
+ * For each block, prepare the block's metadata.
+ * We use the mapped "map" file to set our checksums.
+ */
+static void
+init_blk(struct blk *p, const struct blkset *set, off_t offs,
+ size_t idx, const void *map, const struct sess *sess)
+{
+
+ assert(MAP_FAILED != map);
+
+ /* Block length inherits for all but the last. */
+
+ p->idx = idx;
+ p->len = idx < set->blksz - 1 ? set->len : set->rem;
+ p->offs = offs;
+
+ p->chksum_short = hash_fast(map + offs, p->len);
+ hash_slow(map + offs, p->len, p->chksum_long, sess);
+}
+
+/*
+ * Return <0 on failure 0 on success.
+ */
+static int
+pre_link(struct upload *p, struct sess *sess)
+{
+ int rc, newlink = 0;
+ char *b;
+ struct stat st;
+ struct timespec tv[2];
+ const struct flist *f;
+
+ f = &p->fl[p->idx];
+ assert(S_ISLNK(f->st.mode));
+
+ if ( ! sess->opts->preserve_links) {
+ WARNX(sess, "%s: ignoring symlink", f->path);
+ return 0;
+ } else if (sess->opts->dry_run) {
+ log_link(sess, f);
+ return 0;
+ }
+
+ /* See if the symlink already exists. */
+
+ assert(-1 != p->rootfd);
+ rc = fstatat(p->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW);
+ if (-1 != rc && ! S_ISLNK(st.st_mode)) {
+ WARNX(sess, "%s: not a symlink", f->path);
+ return -1;
+ } else if (-1 == rc && ENOENT != errno) {
+ WARN(sess, "%s: fstatat", f->path);
+ return -1;
+ }
+
+ /*
+ * If the symbolic link already exists, then make sure that it
+ * points to the correct place.
+ * FIXME: does symlinkat() set permissions on the link using the
+ * destination file or the default umask?
+ * Do we need a fchmod in here as well?
+ */
+
+ if (-1 == rc) {
+ LOG3(sess, "%s: creating "
+ "symlink: %s", f->path, f->link);
+ if (-1 == symlinkat(f->link, p->rootfd, f->path)) {
+ WARN(sess, "%s: symlinkat", f->path);
+ return -1;
+ }
+ newlink = 1;
+ } else {
+ b = symlinkat_read(sess, p->rootfd, f->path);
+ if (NULL == b) {
+ ERRX1(sess, "%s: symlinkat_read", f->path);
+ return -1;
+ }
+ if (strcmp(f->link, b)) {
+ free(b);
+ b = NULL;
+ LOG3(sess, "%s: updating "
+ "symlink: %s", f->path, f->link);
+ if (-1 == unlinkat(p->rootfd, f->path, 0)) {
+ WARN(sess, "%s: unlinkat", f->path);
+ return -1;
+ }
+ if (-1 == symlinkat(f->link, p->rootfd, f->path)) {
+ WARN(sess, "%s: symlinkat", f->path);
+ return -1;
+ }
+ newlink = 1;
+ }
+ free(b);
+ }
+
+ /* Optionally preserve times/perms on the symlink. */
+
+ if (sess->opts->preserve_times) {
+ tv[0].tv_sec = time(NULL);
+ tv[0].tv_nsec = 0;
+ tv[1].tv_sec = f->st.mtime;
+ tv[1].tv_nsec = 0;
+ rc = utimensat(p->rootfd,
+ f->path, tv, AT_SYMLINK_NOFOLLOW);
+ if (-1 == rc) {
+ ERR(sess, "%s: utimensat", f->path);
+ return -1;
+ }
+ LOG4(sess, "%s: updated symlink date", f->path);
+ }
+
+ /*
+ * FIXME: if newlink is set because we updated the symlink, we
+ * want to carry over the permissions from the last.
+ */
+
+ if (newlink || sess->opts->preserve_perms) {
+ rc = fchmodat(p->rootfd, f->path,
+ f->st.mode, AT_SYMLINK_NOFOLLOW);
+ if (-1 == rc) {
+ ERR(sess, "%s: fchmodat", f->path);
+ return -1;
+ }
+ LOG4(sess, "%s: updated symlink mode", f->path);
+ }
+
+ log_link(sess, f);
+ return 0;
+}
+
+/*
+ * If not found, create the destination directory in prefix order.
+ * Create directories using the existing umask.
+ * Return <0 on failure 0 on success.
+ */
+static int
+pre_dir(const struct upload *p, struct sess *sess)
+{
+ struct stat st;
+ int rc;
+ const struct flist *f;
+
+ f = &p->fl[p->idx];
+ assert(S_ISDIR(f->st.mode));
+
+ if ( ! sess->opts->recursive) {
+ WARNX(sess, "%s: ignoring directory", f->path);
+ return 0;
+ } else if (sess->opts->dry_run) {
+ log_dir(sess, f);
+ return 0;
+ }
+
+ assert(-1 != p->rootfd);
+ rc = fstatat(p->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW);
+ if (-1 == rc && ENOENT != errno) {
+ WARN(sess, "%s: fstatat", f->path);
+ return -1;
+ } else if (-1 != rc && ! S_ISDIR(st.st_mode)) {
+ WARNX(sess, "%s: not a directory", f->path);
+ return -1;
+ } else if (-1 != rc) {
+ /*
+ * FIXME: we should fchmod the permissions here as well,
+ * as we may locally have shut down writing into the
+ * directory and that doesn't work.
+ */
+ LOG3(sess, "%s: updating directory", f->path);
+ return 0;
+ }
+
+ /*
+ * We want to make the directory with default permissions (using
+ * our old umask, which we've since unset), then adjust
+ * permissions (assuming preserve_perms or new) afterward in
+ * case it's u-w or something.
+ */
+
+ LOG3(sess, "%s: creating directory", f->path);
+ if (-1 == mkdirat(p->rootfd, f->path, 0777 & ~p->oumask)) {
+ WARN(sess, "%s: mkdirat", f->path);
+ return -1;
+ }
+
+ p->newdir[p->idx] = 1;
+ log_dir(sess, f);
+ return 0;
+}
+
+/*
+ * Process the directory time and mode for "idx" in the file list.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+post_dir(struct sess *sess, const struct upload *u, size_t idx)
+{
+ struct timespec tv[2];
+ int rc;
+ struct stat st;
+ const struct flist *f;
+
+ f = &u->fl[idx];
+ assert(S_ISDIR(f->st.mode));
+
+ /* We already warned about the directory in pre_process_dir(). */
+
+ if ( ! sess->opts->recursive)
+ return 1;
+ else if (sess->opts->dry_run)
+ return 1;
+
+ if (-1 == fstatat(u->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW)) {
+ ERR(sess, "%s: fstatat", f->path);
+ return 0;
+ } else if ( ! S_ISDIR(st.st_mode)) {
+ WARNX(sess, "%s: not a directory", f->path);
+ return 0;
+ }
+
+ /*
+ * Update the modification time if we're a new directory *or* if
+ * we're preserving times and the time has changed.
+ */
+
+ if (u->newdir[idx] ||
+ (sess->opts->preserve_times &&
+ st.st_mtime != f->st.mtime)) {
+ tv[0].tv_sec = time(NULL);
+ tv[0].tv_nsec = 0;
+ tv[1].tv_sec = f->st.mtime;
+ tv[1].tv_nsec = 0;
+ rc = utimensat(u->rootfd, f->path, tv, 0);
+ if (-1 == rc) {
+ ERR(sess, "%s: utimensat", f->path);
+ return 0;
+ }
+ LOG4(sess, "%s: updated date", f->path);
+ }
+
+ /*
+ * Update the mode if we're a new directory *or* if we're
+ * preserving modes and it has changed.
+ */
+
+ if (u->newdir[idx] ||
+ (sess->opts->preserve_perms &&
+ st.st_mode != f->st.mode)) {
+ rc = fchmodat(u->rootfd, f->path, f->st.mode, 0);
+ if (-1 == rc) {
+ ERR(sess, "%s: fchmodat", f->path);
+ return 0;
+ }
+ LOG4(sess, "%s: updated mode", f->path);
+ }
+
+ return 1;
+}
+
+/*
+ * Try to open the file at the current index.
+ * If the file does not exist, returns with success.
+ * Return <0 on failure, 0 on success w/nothing to be done, >0 on
+ * success and the file needs attention.
+ */
+static int
+pre_file(const struct upload *p, int *filefd, struct sess *sess)
+{
+ const struct flist *f;
+
+ f = &p->fl[p->idx];
+ assert(S_ISREG(f->st.mode));
+
+ if (sess->opts->dry_run) {
+ log_file(sess, f);
+ if ( ! io_write_int(sess, p->fdout, p->idx)) {
+ ERRX1(sess, "io_write_int");
+ return -1;
+ }
+ return 0;
+ }
+
+ /*
+ * For non dry-run cases, we'll write the acknowledgement later
+ * in the rsync_uploader() function because we need to wait for
+ * the open() call to complete.
+ * If the call to openat() fails with ENOENT, there's a
+ * fast-path between here and the write function, so we won't do
+ * any blocking between now and then.
+ */
+
+ *filefd = openat(p->rootfd, f->path,
+ O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0);
+ if (-1 != *filefd || ENOENT == errno)
+ return 1;
+ ERR(sess, "%s: openat", f->path);
+ return -1;
+}
+
+/*
+ * Allocate an uploader object in the correct state to start.
+ * Returns NULL on failure or the pointer otherwise.
+ * On success, upload_free() must be called with the allocated pointer.
+ */
+struct upload *
+upload_alloc(struct sess *sess, int rootfd, int fdout,
+ size_t clen, const struct flist *fl, size_t flsz, mode_t msk)
+{
+ struct upload *p;
+
+ if (NULL == (p = calloc(1, sizeof(struct upload)))) {
+ ERR(sess, "calloc");
+ return NULL;
+ }
+
+ p->state = UPLOAD_FIND_NEXT;
+ p->oumask = msk;
+ p->rootfd = rootfd;
+ p->csumlen = clen;
+ p->fdout = fdout;
+ p->fl = fl;
+ p->flsz = flsz;
+ p->newdir = calloc(flsz, sizeof(int));
+ if (NULL == p->newdir) {
+ ERR(sess, "calloc");
+ free(p);
+ return NULL;
+ }
+ return p;
+}
+
+/*
+ * Perform all cleanups and free.
+ * Passing a NULL to this function is ok.
+ */
+void
+upload_free(struct upload *p)
+{
+
+ if (NULL == p)
+ return;
+ free(p->newdir);
+ free(p->buf);
+ free(p);
+}
+
+/*
+ * Iterates through all available files and conditionally gets the file
+ * ready for processing to check whether it's up to date.
+ * If not up to date or empty, sends file information to the sender.
+ * If returns 0, we've processed all files there are to process.
+ * If returns >0, we're waiting for POLLIN or POLLOUT data.
+ * Otherwise returns <0, which is an error.
+ */
+int
+rsync_uploader(struct upload *u, int *fileinfd,
+ struct sess *sess, int *fileoutfd)
+{
+ struct blkset blk;
+ struct stat st;
+ void *map, *bufp;
+ size_t i, mapsz, pos, sz;
+ off_t offs;
+ int c;
+
+ /* This should never get called. */
+
+ assert(UPLOAD_FINISHED != u->state);
+
+ /*
+ * If we have an upload in progress, then keep writing until the
+ * buffer has been fully written.
+ * We must only have the output file descriptor working and also
+ * have a valid buffer to write.
+ */
+
+ if (UPLOAD_WRITE_LOCAL == u->state) {
+ assert(NULL != u->buf);
+ assert(-1 != *fileoutfd);
+ assert(-1 == *fileinfd);
+
+ /*
+ * Unfortunately, we need to chunk these: if we're
+ * the server side of things, then we're multiplexing
+ * output and need to wrap this in chunks.
+ * This is a major deficiency of rsync.
+ * FIXME: add a "fast-path" mode that simply dumps out
+ * the buffer non-blocking if we're not mplexing.
+ */
+
+ if (u->bufpos < u->bufsz) {
+ sz = MAX_CHUNK < (u->bufsz - u->bufpos) ?
+ MAX_CHUNK : (u->bufsz - u->bufpos);
+ c = io_write_buf(sess, u->fdout,
+ u->buf + u->bufpos, sz);
+ if (0 == c) {
+ ERRX1(sess, "io_write_nonblocking");
+ return -1;
+ }
+ u->bufpos += sz;
+ if (u->bufpos < u->bufsz)
+ return 1;
+ }
+
+ /*
+ * Let the UPLOAD_FIND_NEXT state handle things if we
+ * finish, as we'll need to write a POLLOUT message and
+ * not have a writable descriptor yet.
+ */
+
+ u->state = UPLOAD_FIND_NEXT;
+ u->idx++;
+ return 1;
+ }
+
+ /*
+ * If we invoke the uploader without a file currently open, then
+ * we iterate through til the next available regular file and
+ * start the opening process.
+ * This means we must have the output file descriptor working.
+ */
+
+ if (UPLOAD_FIND_NEXT == u->state) {
+ assert(-1 == *fileinfd);
+ assert(-1 != *fileoutfd);
+
+ for ( ; u->idx < u->flsz; u->idx++) {
+ if (S_ISDIR(u->fl[u->idx].st.mode))
+ c = pre_dir(u, sess);
+ else if (S_ISLNK(u->fl[u->idx].st.mode))
+ c = pre_link(u, sess);
+ else if (S_ISREG(u->fl[u->idx].st.mode))
+ c = pre_file(u, fileinfd, sess);
+ else
+ c = 0;
+
+ if (c < 0)
+ return -1;
+ else if (c > 0)
+ break;
+ }
+
+ /*
+ * Whether we've finished writing files or not, we
+ * disable polling on the output channel.
+ */
+
+ *fileoutfd = -1;
+ if (u->idx == u->flsz) {
+ assert(-1 == *fileinfd);
+ if ( ! io_write_int(sess, u->fdout, -1)) {
+ ERRX1(sess, "io_write_int");
+ return -1;
+ }
+ u->state = UPLOAD_FINISHED;
+ LOG4(sess, "uploader: finished");
+ return 0;
+ }
+
+ /* Go back to the event loop, if necessary. */
+
+ u->state = -1 == *fileinfd ?
+ UPLOAD_WRITE_LOCAL : UPLOAD_READ_LOCAL;
+ if (UPLOAD_READ_LOCAL == u->state)
+ return 1;
+ }
+
+ /*
+ * If an input file is open, stat it and see if it's already up
+ * to date, in which case close it and go to the next one.
+ * Either way, we don't have a write channel open.
+ */
+
+ if (UPLOAD_READ_LOCAL == u->state) {
+ assert (-1 != *fileinfd);
+ assert(-1 == *fileoutfd);
+
+ if (-1 == fstat(*fileinfd, &st)) {
+ WARN(sess, "%s: fstat", u->fl[u->idx].path);
+ close(*fileinfd);
+ *fileinfd = -1;
+ return -1;
+ } else if ( ! S_ISREG(st.st_mode)) {
+ WARNX(sess, "%s: not regular", u->fl[u->idx].path);
+ close(*fileinfd);
+ *fileinfd = -1;
+ return -1;
+ }
+
+ if (st.st_size == u->fl[u->idx].st.size &&
+ st.st_mtime == u->fl[u->idx].st.mtime) {
+ LOG3(sess, "%s: skipping: "
+ "up to date", u->fl[u->idx].path);
+ close(*fileinfd);
+ *fileinfd = -1;
+ *fileoutfd = u->fdout;
+ u->state = UPLOAD_FIND_NEXT;
+ u->idx++;
+ return 1;
+ }
+
+ /* Fallthrough... */
+
+ u->state = UPLOAD_WRITE_LOCAL;
+ }
+
+ /* Initialies our blocks. */
+
+ assert(UPLOAD_WRITE_LOCAL == u->state);
+ memset(&blk, 0, sizeof(struct blkset));
+ blk.csum = u->csumlen;
+
+ if (-1 != *fileinfd && st.st_size > 0) {
+ mapsz = st.st_size;
+ map = mmap(NULL, mapsz,
+ PROT_READ, MAP_SHARED, *fileinfd, 0);
+ if (MAP_FAILED == map) {
+ WARN(sess, "%s: mmap", u->fl[u->idx].path);
+ close(*fileinfd);
+ *fileinfd = -1;
+ return -1;
+ }
+
+ init_blkset(&blk, st.st_size);
+ assert(blk.blksz);
+
+ blk.blks = calloc(blk.blksz, sizeof(struct blk));
+ if (NULL == blk.blks) {
+ ERR(sess, "calloc");
+ munmap(map, mapsz);
+ close(*fileinfd);
+ *fileinfd = -1;
+ return -1;
+ }
+
+ offs = 0;
+ for (i = 0; i < blk.blksz; i++) {
+ init_blk(&blk.blks[i],
+ &blk, offs, i, map, sess);
+ offs += blk.len;
+ }
+
+ munmap(map, mapsz);
+ close(*fileinfd);
+ *fileinfd = -1;
+ LOG3(sess, "%s: mapped %jd B with %zu blocks",
+ u->fl[u->idx].path, (intmax_t)blk.size,
+ blk.blksz);
+ } else {
+ if (-1 != *fileinfd) {
+ close(*fileinfd);
+ *fileinfd = -1;
+ }
+ blk.len = MAX_CHUNK; /* Doesn't matter. */
+ LOG3(sess, "%s: not mapped", u->fl[u->idx].path);
+ }
+
+ assert(-1 == *fileinfd);
+
+ /* Make sure the block metadata buffer is big enough. */
+
+ u->bufsz =
+ sizeof(int32_t) + /* identifier */
+ sizeof(int32_t) + /* block count */
+ sizeof(int32_t) + /* block length */
+ sizeof(int32_t) + /* checksum length */
+ sizeof(int32_t) + /* block remainder */
+ blk.blksz *
+ (sizeof(int32_t) + /* short checksum */
+ blk.csum); /* long checksum */
+
+ if (u->bufsz > u->bufmax) {
+ if (NULL == (bufp = realloc(u->buf, u->bufsz))) {
+ ERR(sess, "realloc");
+ return -1;
+ }
+ u->buf = bufp;
+ u->bufmax = u->bufsz;
+ }
+
+ u->bufpos = pos = 0;
+ io_buffer_int(sess, u->buf, &pos, u->bufsz, u->idx);
+ io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.blksz);
+ io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.len);
+ io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.csum);
+ io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.rem);
+ for (i = 0; i < blk.blksz; i++) {
+ io_buffer_int(sess, u->buf, &pos, u->bufsz,
+ blk.blks[i].chksum_short);
+ io_buffer_buf(sess, u->buf, &pos, u->bufsz,
+ blk.blks[i].chksum_long, blk.csum);
+ }
+ assert(pos == u->bufsz);
+
+ /* Reenable the output poller and clean up. */
+
+ *fileoutfd = u->fdout;
+ free(blk.blks);
+ return 1;
+}
+
+/*
+ * Fix up the directory permissions and times post-order.
+ * We can't fix up directory permissions in place because the server may
+ * want us to have overly-tight permissions---say, those that don't
+ * allow writing into the directory.
+ * We also need to do our directory times post-order because making
+ * files within the directory will change modification times.
+ * Returns zero on failure, non-zero on success.
+ */
+int
+rsync_uploader_tail(struct upload *u, struct sess *sess)
+{
+ size_t i;
+
+
+ if ( ! sess->opts->preserve_times &&
+ ! sess->opts->preserve_perms)
+ return 1;
+
+ LOG2(sess, "fixing up directory times and permissions");
+
+ for (i = 0; i < u->flsz; i++)
+ if (S_ISDIR(u->fl[i].st.mode))
+ if ( ! post_dir(sess, u, i))
+ return 0;
+
+ return 1;
+}