diff options
author | 2019-02-10 23:18:28 +0000 | |
---|---|---|
committer | 2019-02-10 23:18:28 +0000 | |
commit | 60a32ee9289593d1c350423a61f21dab436b3e7f (patch) | |
tree | e5b7c139d6c2987f1cdc502bc8edde00dea9b429 | |
parent | Revert previous. requested by deraadt@ (diff) | |
download | wireguard-openbsd-60a32ee9289593d1c350423a61f21dab436b3e7f.tar.xz wireguard-openbsd-60a32ee9289593d1c350423a61f21dab436b3e7f.zip |
Import Kristaps' openrsync into the tree.
OK deraadt@
-rw-r--r-- | usr.bin/rsync/Makefile | 57 | ||||
-rw-r--r-- | usr.bin/rsync/TODO.md | 42 | ||||
-rw-r--r-- | usr.bin/rsync/blocks.c | 678 | ||||
-rw-r--r-- | usr.bin/rsync/child.c | 72 | ||||
-rw-r--r-- | usr.bin/rsync/client.c | 106 | ||||
-rw-r--r-- | usr.bin/rsync/downloader.c | 600 | ||||
-rw-r--r-- | usr.bin/rsync/extern.h | 295 | ||||
-rw-r--r-- | usr.bin/rsync/fargs.c | 104 | ||||
-rw-r--r-- | usr.bin/rsync/flist.c | 1160 | ||||
-rw-r--r-- | usr.bin/rsync/hash.c | 94 | ||||
-rw-r--r-- | usr.bin/rsync/io.c | 585 | ||||
-rw-r--r-- | usr.bin/rsync/log.c | 194 | ||||
-rw-r--r-- | usr.bin/rsync/main.c | 453 | ||||
-rw-r--r-- | usr.bin/rsync/md4.c | 265 | ||||
-rw-r--r-- | usr.bin/rsync/md4.h | 49 | ||||
-rw-r--r-- | usr.bin/rsync/mkpath.c | 77 | ||||
-rw-r--r-- | usr.bin/rsync/receiver.c | 341 | ||||
-rw-r--r-- | usr.bin/rsync/rsync.1 | 213 | ||||
-rw-r--r-- | usr.bin/rsync/rsync.5 | 469 | ||||
-rw-r--r-- | usr.bin/rsync/rsyncd.5 | 135 | ||||
-rw-r--r-- | usr.bin/rsync/sender.c | 227 | ||||
-rw-r--r-- | usr.bin/rsync/server.c | 162 | ||||
-rw-r--r-- | usr.bin/rsync/session.c | 161 | ||||
-rw-r--r-- | usr.bin/rsync/socket.c | 435 | ||||
-rw-r--r-- | usr.bin/rsync/symlinks.c | 102 | ||||
-rw-r--r-- | usr.bin/rsync/uploader.c | 772 |
26 files changed, 7848 insertions, 0 deletions
diff --git a/usr.bin/rsync/Makefile b/usr.bin/rsync/Makefile new file mode 100644 index 00000000000..686f76a5983 --- /dev/null +++ b/usr.bin/rsync/Makefile @@ -0,0 +1,57 @@ +PREFIX = /usr/local +OBJS = blocks.o \ + child.o \ + client.o \ + downloader.o \ + fargs.o \ + flist.o \ + hash.o \ + io.o \ + log.o \ + md4.o \ + mkpath.o \ + receiver.o \ + sender.o \ + server.o \ + session.o \ + socket.o \ + symlinks.o \ + uploader.o +ALLOBJS = $(OBJS) \ + main.o +AFLS = afl/test-blk_recv \ + afl/test-flist_recv +CFLAGS += -O0 -g -W -Wall -Wextra -Wno-unused-parameter +MANDIR = $(PREFIX)/man +BINDIR = $(PREFIX)/bin + +all: openrsync + +openrsync: $(ALLOBJS) + $(CC) -o $@ $(ALLOBJS) -lm + +afl: $(AFLS) + +$(AFLS): $(OBJS) + $(CC) -o $@ $*.c $(OBJS) + +install: openrsync + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(MANDIR)/man1 + mkdir -p $(DESTDIR)$(MANDIR)/man5 + install -m 0444 openrsync.1 $(DESTDIR)$(MANDIR)/man1 + install -m 0444 rsync.5 rsyncd.5 $(DESTDIR)$(MANDIR)/man5 + install -m 0555 openrsync $(DESTDIR)$(BINDIR) + +uninstall: + rm -f $(DESTDIR)$(BINDIR)/openrsync + rm -f $(DESTDIR)$(MANDIR)/man1/openrsync.1 + rm -f $(DESTDIR)$(MANDIR)/man5/rsync.5 + rm -f $(DESTDIR)$(MANDIR)/man5/rsyncd.5 + +clean: + rm -f $(ALLOBJS) openrsync $(AFLS) + +$(ALLOBJS) $(AFLS): extern.h + +blocks.o downloader.o hash.o md4.o: md4.h diff --git a/usr.bin/rsync/TODO.md b/usr.bin/rsync/TODO.md new file mode 100644 index 00000000000..c66371a5181 --- /dev/null +++ b/usr.bin/rsync/TODO.md @@ -0,0 +1,42 @@ +This is a list of possible work projects within openrsync, rated by difficulty. + +First, porting: see +[Porting](https://github.com/kristapsdz/openrsync/blob/master/README.md#Portability) +for information on this topic. +I've included the specific security porting topics below. + +This list also does not include adding support for features (e.g., **-u** and +so on). + +- Easy: add a hashtable to `blk_find()` in + [blocks.c](https://github.com/kristapsdz/openrsync/blob/master/blocks.c) + for quickly looking up fast-hash matches. + +- Easy: print more statistics, such as transfer times and rates. + +- Easy: tighten the [pledge(2)](https://man.openbsd.org/pledge.2) and + [unveil(2)](https://man.openbsd.org/unveil.2) to work with **-n**, as + it does not touch files. + +- Easy: find the shared path for all input files and + [unveil(2)](https://man.openbsd.org/unveil.2) only the shared path + instead of each one. + +- Medium: have the log messages when multiplex writing (server mode) is + enabled by flushed out through the multiplex channel. + Right now, they're emitted on `stderr` just like with the client. + +- Medium: porting the security precautions + ([unveil(2)](https://man.openbsd.org/unveil.2), + [pledge(2)](https://man.openbsd.org/pledge.2)) to + [FreeBSD](https://www.freebsd.org)'s + [Capsicum](https://wiki.freebsd.org/Capsicum). + Without this in place, you're exposing your file-system to whatever is + coming down over the wire. + This is certainly possible, as openrsync makes exclusive use of the "at" + functions (e.g., [openat(2)](https://man.openbsd.org/openat.2)) for working + with files. + +- Hard: the same, but for Linux. + +Above all, `grep FIXME *.c *.h` and start from there. diff --git a/usr.bin/rsync/blocks.c b/usr.bin/rsync/blocks.c new file mode 100644 index 00000000000..d6c26eec988 --- /dev/null +++ b/usr.bin/rsync/blocks.c @@ -0,0 +1,678 @@ +/* $Id: blocks.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <endian.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "md4.h" +#include "extern.h" + +/* + * Flush out "size" bytes of the buffer, doing all of the appropriate + * chunking of the data, then the subsequent token (or zero). + * This is symmetrised in blk_merge(). + * Return zero on failure, non-zero on success. + */ +static int +blk_flush(struct sess *sess, int fd, + const void *b, off_t size, int32_t token) +{ + off_t i = 0, sz; + + while (i < size) { + sz = MAX_CHUNK < (size - i) ? + MAX_CHUNK : (size - i); + if ( ! io_write_int(sess, fd, sz)) { + ERRX1(sess, "io_write_int"); + return 0; + } else if ( ! io_write_buf(sess, fd, b + i, sz)) { + ERRX1(sess, "io_write_buf"); + return 0; + } + i += sz; + } + + if ( ! io_write_int(sess, fd, token)) { + ERRX1(sess, "io_write_int"); + return 0; + } + + return 1; +} + +/* + * From our current position of "offs" in buffer "buf" of total size + * "size", see if we can find a matching block in our list of blocks. + * The "hint" refers to the block that *might* work. + * Returns the blk or NULL if no matching block was found. + */ +static struct blk * +blk_find(struct sess *sess, const void *buf, off_t size, off_t offs, + const struct blkset *blks, const char *path, size_t hint) +{ + unsigned char md[MD4_DIGEST_LENGTH]; + uint32_t fhash; + off_t remain, osz; + size_t i; + int have_md = 0; + + /* + * First, compute our fast hash. + * FIXME: yes, this can be a rolling computation, but I'm + * deliberately making it simple first. + */ + + remain = size - offs; + assert(remain); + osz = remain < (off_t)blks->len ? remain : (off_t)blks->len; + fhash = hash_fast(buf + offs, (size_t)osz); + have_md = 0; + + /* + * Start with our match hint. + * This just runs the fast and slow check with the hint. + */ + + if (hint < blks->blksz && + fhash == blks->blks[hint].chksum_short && + (size_t)osz == blks->blks[hint].len) { + hash_slow(buf + offs, (size_t)osz, md, sess); + have_md = 1; + if (0 == memcmp(md, + blks->blks[hint].chksum_long, blks->csum)) { + LOG4(sess, "%s: found matching hinted match: " + "position %jd, block %zu " + "(position %jd, size %zu)", path, + (intmax_t)offs, blks->blks[hint].idx, + (intmax_t)blks->blks[hint].offs, + blks->blks[hint].len); + return &blks->blks[hint]; + } + } + + /* + * Now loop and look for the fast hash. + * If it's found, move on to the slow hash. + */ + + for (i = 0; i < blks->blksz; i++) { + if (fhash != blks->blks[i].chksum_short) + continue; + if ((size_t)osz != blks->blks[i].len) + continue; + + LOG4(sess, "%s: found matching fast match: " + "position %jd, block %zu " + "(position %jd, size %zu)", path, + (intmax_t)offs, blks->blks[i].idx, + (intmax_t)blks->blks[i].offs, + blks->blks[i].len); + + /* Compute slow hash on demand. */ + + if (0 == have_md) { + hash_slow(buf + offs, (size_t)osz, md, sess); + have_md = 1; + } + + if (memcmp(md, blks->blks[i].chksum_long, blks->csum)) + continue; + + LOG4(sess, "%s: sender verifies slow match", path); + return &blks->blks[i]; + } + + return NULL; +} + +/* + * The main reconstruction algorithm on the sender side. + * Scans byte-wise over the input file, looking for matching blocks in + * what the server sent us. + * If a block is found, emit all data up until the block, then the token + * for the block. + * The receiving end can then reconstruct the file trivially. + * Return zero on failure, non-zero on success. + */ +static int +blk_match_send(struct sess *sess, const char *path, int fd, + const void *buf, off_t size, const struct blkset *blks) +{ + off_t offs, last, end, fromcopy = 0, fromdown = 0, + total = 0, sz; + int32_t tok; + struct blk *blk; + size_t hint = 0; + + /* + * Stop searching at the length of the file minus the size of + * the last block. + * The reason for this being that we don't need to do an + * incremental hash within the last block---if it doesn't match, + * it doesn't match. + */ + + end = size + 1 - blks->blks[blks->blksz - 1].len; + + for (last = offs = 0; offs < end; offs++) { + blk = blk_find(sess, buf, size, + offs, blks, path, hint); + if (NULL == blk) + continue; + + sz = offs - last; + fromdown += sz; + total += sz; + LOG4(sess, "%s: flushing %jd B before %zu B " + "block %zu", path, (intmax_t)sz, blk->len, + blk->idx); + tok = -(blk->idx + 1); + + /* + * Write the data we have, then follow it with the tag + * of the block that matches. + * The receiver will then write our data, then the data + * it already has in the matching block. + */ + + if ( ! blk_flush(sess, fd, buf + last, sz, tok)) { + ERRX1(sess, "blk_flush"); + return 0; + } + + fromcopy += blk->len; + total += blk->len; + offs += blk->len - 1; + last = offs + 1; + hint = blk->idx + 1; + } + + /* Emit remaining data and send terminator token. */ + + sz = size - last; + total += sz; + fromdown += sz; + + LOG4(sess, "%s: flushing remaining %jd B", path, (intmax_t)sz); + + if ( ! blk_flush(sess, fd, buf + last, sz, 0)) { + ERRX1(sess, "blk_flush"); + return 0; + } + + LOG3(sess, "%s: flushed (chunked) %jd B total, " + "%.2f%% upload ratio", path, (intmax_t)total, + 100.0 * fromdown / total); + return 1; +} + +/* + * Given a local file "path" and the blocks created by a remote machine, + * find out which blocks of our file they don't have and send them. + * Return zero on failure, non-zero on success. + */ +int +blk_match(struct sess *sess, int fd, + const struct blkset *blks, const char *path) +{ + int nfd, rc = 0, c; + struct stat st; + void *map = MAP_FAILED; + size_t mapsz; + unsigned char filemd[MD4_DIGEST_LENGTH]; + + /* Start by mapping our file into memory. */ + + if (-1 == (nfd = open(path, O_RDONLY, 0))) { + ERR(sess, "%s: open", path); + return 0; + } else if (-1 == fstat(nfd, &st)) { + ERR(sess, "%s: fstat", path); + close(nfd); + return 0; + } + + /* + * We might possibly have a zero-length file, in which case the + * mmap() will fail, so only do this with non-zero files. + */ + + if ((mapsz = st.st_size) > 0) { + map = mmap(NULL, mapsz, PROT_READ, MAP_SHARED, nfd, 0); + if (MAP_FAILED == map) { + ERR(sess, "%s: mmap", path); + close(nfd); + return 0; + } + } + + /* + * If the file's empty or we don't have any blocks from the + * sender, then simply send the whole file. + * Otherwise, run the hash matching routine and send raw chunks + * and subsequent matching tokens. + * This part broadly symmetrises blk_merge(). + */ + + if (st.st_size && blks->blksz) { + c = blk_match_send(sess, path, + fd, map, st.st_size, blks); + if ( ! c) { + ERRX1(sess, "blk_match_send"); + goto out; + } + } else { + if ( ! blk_flush(sess, fd, map, st.st_size, 0)) { + ERRX1(sess, "blk_flush"); + return 0; + } + LOG3(sess, "%s: flushed (un-chunked) %jd B, 100%% " + "upload ratio", path, (intmax_t)st.st_size); + } + + /* + * Now write the full file hash. + * Since we're seeding the hash, this always gives us some sort + * of data even if the file's zero-length. + */ + + hash_file(map, st.st_size, filemd, sess); + + if ( ! io_write_buf(sess, fd, filemd, MD4_DIGEST_LENGTH)) { + ERRX1(sess, "io_write_buf"); + goto out; + } + + rc = 1; +out: + if (MAP_FAILED != map) + munmap(map, mapsz); + close(nfd); + return rc; +} + +/* FIXME: remove. */ +void +blkset_free(struct blkset *p) +{ + + if (NULL == p) + return; + free(p->blks); + free(p); +} + +/* + * Sent from the sender to the receiver to indicate that the block set + * has been received. + * Symmetrises blk_send_ack(). + * Returns zero on failure, non-zero on success. + */ +int +blk_recv_ack(struct sess *sess, + int fd, const struct blkset *blocks, int32_t idx) +{ + + /* FIXME: put into static block. */ + + if ( ! io_write_int(sess, fd, idx)) + ERRX1(sess, "io_write_int"); + else if ( ! io_write_int(sess, fd, blocks->blksz)) + ERRX1(sess, "io_write_int"); + else if ( ! io_write_int(sess, fd, blocks->len)) + ERRX1(sess, "io_write_int"); + else if ( ! io_write_int(sess, fd, blocks->csum)) + ERRX1(sess, "io_write_int"); + else if ( ! io_write_int(sess, fd, blocks->rem)) + ERRX1(sess, "io_write_int"); + else + return 1; + + return 0; +} + +/* + * Read all of the checksums for a file's blocks. + * Returns the set of blocks or NULL on failure. + */ +struct blkset * +blk_recv(struct sess *sess, int fd, const char *path) +{ + struct blkset *s; + int32_t i; + size_t j; + struct blk *b; + off_t offs = 0; + + if (NULL == (s = calloc(1, sizeof(struct blkset)))) { + ERR(sess, "calloc"); + return NULL; + } + + /* + * The block prologue consists of a few values that we'll need + * in reading the individual blocks for this file. + * FIXME: read into buffer and unbuffer. + */ + + if ( ! io_read_size(sess, fd, &s->blksz)) { + ERRX1(sess, "io_read_size"); + goto out; + } else if ( ! io_read_size(sess, fd, &s->len)) { + ERRX1(sess, "io_read_size"); + goto out; + } else if ( ! io_read_size(sess, fd, &s->csum)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if ( ! io_read_size(sess, fd, &s->rem)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if (s->rem && s->rem >= s->len) { + ERRX(sess, "block remainder is " + "greater than block size"); + goto out; + } + + LOG3(sess, "%s: read block prologue: %zu blocks of " + "%zu B, %zu B remainder, %zu B checksum", path, + s->blksz, s->len, s->rem, s->csum); + + if (s->blksz) { + s->blks = calloc(s->blksz, sizeof(struct blk)); + if (NULL == s->blks) { + ERR(sess, "calloc"); + goto out; + } + } + + /* + * Read each block individually. + * FIXME: read buffer and unbuffer. + */ + + for (j = 0; j < s->blksz; j++) { + b = &s->blks[j]; + if ( ! io_read_int(sess, fd, &i)) { + ERRX1(sess, "io_read_int"); + goto out; + } + b->chksum_short = i; + + assert(s->csum <= sizeof(b->chksum_long)); + if ( ! io_read_buf(sess, + fd, b->chksum_long, s->csum)) { + ERRX1(sess, "io_read_buf"); + goto out; + } + + /* + * If we're the last block, then we're assigned the + * remainder of the data. + */ + + b->offs = offs; + b->idx = j; + b->len = (j == (s->blksz - 1) && s->rem) ? + s->rem : s->len; + offs += b->len; + + LOG4(sess, "%s: read block %zu, " + "length %zu B", path, b->idx, b->len); + } + + s->size = offs; + LOG3(sess, "%s: read blocks: %zu blocks, %jd B total " + "blocked data", path, s->blksz, (intmax_t)s->size); + return s; +out: + blkset_free(s); + return NULL; +} + +/* + * Symmetrise blk_recv_ack(), except w/o the leading identifier. + * Return zero on failure, non-zero on success. + */ +int +blk_send_ack(struct sess *sess, int fd, struct blkset *p) +{ + char buf[16]; + size_t pos = 0, sz; + + /* Put the entire send routine into a buffer. */ + + sz = sizeof(int32_t) + /* block count */ + sizeof(int32_t) + /* block length */ + sizeof(int32_t) + /* checksum length */ + sizeof(int32_t); /* block remainder */ + assert(sz <= sizeof(buf)); + + if ( ! io_read_buf(sess, fd, buf, sz)) { + ERRX1(sess, "io_read_buf"); + return 0; + } + + if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->blksz)) + ERRX1(sess, "io_unbuffer_size"); + else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->len)) + ERRX1(sess, "io_unbuffer_size"); + else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->csum)) + ERRX1(sess, "io_unbuffer_size"); + else if ( ! io_unbuffer_size(sess, buf, &pos, sz, &p->rem)) + ERRX1(sess, "io_unbuffer_size"); + else if (p->len && p->rem >= p->len) + ERRX1(sess, "non-zero length is less than remainder"); + else if (0 == p->csum || p->csum > 16) + ERRX1(sess, "inappropriate checksum length"); + else + return 1; + + return 0; +} + +/* + * The receiver now reads raw data and block indices from the sender, + * and merges them into the temporary file. + * Returns zero on failure, non-zero on success. + */ +int +blk_merge(struct sess *sess, int fd, int ffd, + const struct blkset *block, int outfd, const char *path, + const void *map, size_t mapsz, float *stats) +{ + size_t sz, tok; + int32_t rawtok; + char *buf = NULL; + void *pp; + ssize_t ssz; + int rc = 0; + unsigned char md[MD4_DIGEST_LENGTH], + ourmd[MD4_DIGEST_LENGTH]; + off_t total = 0, fromcopy = 0, fromdown = 0; + MD4_CTX ctx; + + MD4_Init(&ctx); + + rawtok = htole32(sess->seed); + MD4_Update(&ctx, (unsigned char *)&rawtok, sizeof(int32_t)); + + for (;;) { + /* + * This matches the sequence in blk_flush(). + * We read the size/token, then optionally the data. + * The size >0 for reading data, 0 for no more data, and + * <0 for a token indicator. + */ + + if ( ! io_read_int(sess, fd, &rawtok)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if (0 == rawtok) + break; + + if (rawtok > 0) { + sz = rawtok; + if (NULL == (pp = realloc(buf, sz))) { + ERR(sess, "realloc"); + goto out; + } + buf = pp; + if ( ! io_read_buf(sess, fd, buf, sz)) { + ERRX1(sess, "io_read_int"); + goto out; + } + + if ((ssz = write(outfd, buf, sz)) < 0) { + ERR(sess, "write: temporary file"); + goto out; + } else if ((size_t)ssz != sz) { + ERRX(sess, "write: short write"); + goto out; + } + + fromdown += sz; + total += sz; + LOG4(sess, "%s: received %zd B block, now %jd " + "B total", path, ssz, (intmax_t)total); + + MD4_Update(&ctx, buf, sz); + } else { + tok = -rawtok - 1; + if (tok >= block->blksz) { + ERRX(sess, "token not in block set"); + goto out; + } + + /* + * Now we read from our block. + * We should only be at this point if we have a + * block to read from, i.e., if we were able to + * map our origin file and create a block + * profile from it. + */ + + assert(MAP_FAILED != map); + + ssz = write(outfd, + map + block->blks[tok].offs, + block->blks[tok].len); + + if (ssz < 0) { + ERR(sess, "write: temporary file"); + goto out; + } else if ((size_t)ssz != block->blks[tok].len) { + ERRX(sess, "write: short write"); + goto out; + } + + fromcopy += block->blks[tok].len; + total += block->blks[tok].len; + LOG4(sess, "%s: copied %zu B, now %jd " + "B total", path, block->blks[tok].len, + (intmax_t)total); + + MD4_Update(&ctx, + map + block->blks[tok].offs, + block->blks[tok].len); + } + } + + + /* Make sure our resulting MD4_ hashes match. */ + + MD4_Final(ourmd, &ctx); + + if ( ! io_read_buf(sess, fd, md, MD4_DIGEST_LENGTH)) { + ERRX1(sess, "io_read_buf"); + goto out; + } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) { + ERRX(sess, "%s: file hash does not match", path); + goto out; + } + + *stats = 100.0 * fromdown / total; + rc = 1; +out: + free(buf); + return rc; +} + +/* + * Transmit the metadata for set and blocks. + * Return zero on failure, non-zero on success. + */ +int +blk_send(struct sess *sess, int fd, size_t idx, + const struct blkset *p, const char *path) +{ + char *buf; + size_t i, pos = 0, sz; + int rc = 0; + + /* Put the entire send routine into a buffer. */ + + sz = sizeof(int32_t) + /* identifier */ + sizeof(int32_t) + /* block count */ + sizeof(int32_t) + /* block length */ + sizeof(int32_t) + /* checksum length */ + sizeof(int32_t) + /* block remainder */ + p->blksz * + (sizeof(int32_t) + /* short checksum */ + p->csum); /* long checksum */ + + if (NULL == (buf = malloc(sz))) { + ERR(sess, "malloc"); + return 0; + } + + io_buffer_int(sess, buf, &pos, sz, idx); + io_buffer_int(sess, buf, &pos, sz, p->blksz); + io_buffer_int(sess, buf, &pos, sz, p->len); + io_buffer_int(sess, buf, &pos, sz, p->csum); + io_buffer_int(sess, buf, &pos, sz, p->rem); + + for (i = 0; i < p->blksz; i++) { + io_buffer_int(sess, buf, &pos, + sz, p->blks[i].chksum_short); + io_buffer_buf(sess, buf, &pos, sz, + p->blks[i].chksum_long, p->csum); + } + + assert(pos == sz); + + if ( ! io_write_buf(sess, fd, buf, sz)) { + ERRX1(sess, "io_write_buf"); + goto out; + } + + LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, " + "%zu B remainder, %zu B checksum", path, + p->blksz, p->len, p->rem, p->csum); + rc = 1; +out: + free(buf); + return rc; +} diff --git a/usr.bin/rsync/child.c b/usr.bin/rsync/child.c new file mode 100644 index 00000000000..686c58afc91 --- /dev/null +++ b/usr.bin/rsync/child.c @@ -0,0 +1,72 @@ +/* $Id: child.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +/* + * This is run on the client machine to initiate a connection with the + * remote machine in --server mode. + * It does not return, as it executes into the remote shell. + * + * Pledges: exec, stdio. + */ +void +rsync_child(const struct opts *opts, int fd, const struct fargs *f) +{ + struct sess sess; + char **args; + size_t i; + + memset(&sess, 0, sizeof(struct sess)); + sess.opts = opts; + + /* Construct the remote shell command. */ + + if (NULL == (args = fargs_cmdline(&sess, f))) { + ERRX1(&sess, "fargs_cmdline"); + exit(EXIT_FAILURE); + } + + for (i = 0; NULL != args[i]; i++) + LOG2(&sess, "exec[%zu] = %s", i, args[i]); + + /* Make sure the child's stdin is from the sender. */ + + if (-1 == dup2(fd, STDIN_FILENO)) { + ERR(&sess, "dup2"); + exit(EXIT_FAILURE); + } if (-1 == dup2(fd, STDOUT_FILENO)) { + ERR(&sess, "dup2"); + exit(EXIT_FAILURE); + } + + /* Here we go... */ + + execvp(args[0], args); + + ERR(&sess, "%s: execvp", args[0]); + exit(EXIT_FAILURE); + /* NOTREACHED */ +} diff --git a/usr.bin/rsync/client.c b/usr.bin/rsync/client.c new file mode 100644 index 00000000000..9a115136f8d --- /dev/null +++ b/usr.bin/rsync/client.c @@ -0,0 +1,106 @@ +/* $Id: client.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +/* + * The rsync client runs on the operator's local machine. + * It can either be in sender or receiver mode. + * In the former, it synchronises local files from a remote sink. + * In the latter, the remote sink synchronses to the local files. + * + * Pledges: stdio, rpath, wpath, cpath, unveil, fattr. + * + * Pledges (dry-run): -cpath, -wpath, -fattr. + * Pledges (!preserve_times): -fattr. + */ +int +rsync_client(const struct opts *opts, int fd, const struct fargs *f) +{ + struct sess sess; + int rc = 0; + + /* Standard rsync preamble, sender side. */ + + memset(&sess, 0, sizeof(struct sess)); + sess.opts = opts; + sess.lver = RSYNC_PROTOCOL; + + if ( ! io_write_int(&sess, fd, sess.lver)) { + ERRX1(&sess, "io_write_int"); + goto out; + } else if ( ! io_read_int(&sess, fd, &sess.rver)) { + ERRX1(&sess, "io_read_int"); + goto out; + } else if ( ! io_read_int(&sess, fd, &sess.seed)) { + ERRX1(&sess, "io_read_int"); + goto out; + } + + if (sess.rver < sess.lver) { + ERRX(&sess, "remote protocol is older " + "than our own (%" PRId32 " < %" PRId32 "): " + "this is not supported", + sess.rver, sess.lver); + goto out; + } + + LOG2(&sess, "client detected client version %" PRId32 + ", server version %" PRId32 ", seed %" PRId32, + sess.lver, sess.rver, sess.seed); + + sess.mplex_reads = 1; + + /* + * Now we need to get our list of files. + * Senders (and locals) send; receivers receive. + */ + + if (FARGS_RECEIVER != f->mode) { + LOG2(&sess, "client starting sender: %s", + NULL == f->host ? "(local)" : f->host); + if ( ! rsync_sender(&sess, fd, fd, + f->sourcesz, f->sources)) { + ERRX1(&sess, "rsync_sender"); + goto out; + } + } else { + LOG2(&sess, "client starting receiver: %s", + NULL == f->host ? "(local)" : f->host); + if ( ! rsync_receiver(&sess, fd, fd, f->sink)) { + ERRX1(&sess, "rsync_receiver"); + goto out; + } + } + +#if 0 + /* Probably the EOF. */ + if (io_read_check(&sess, fd)) + WARNX(&sess, "data remains in read pipe"); +#endif + + rc = 1; +out: + return rc; +} diff --git a/usr.bin/rsync/downloader.c b/usr.bin/rsync/downloader.c new file mode 100644 index 00000000000..284e62b7f3c --- /dev/null +++ b/usr.bin/rsync/downloader.c @@ -0,0 +1,600 @@ +/* $Id: downloader.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "extern.h" +#include "md4.h" + +/* + * A small optimisation: have a 1 MB pre-write buffer. + * Disable the pre-write buffer by having this be zero. + * (It doesn't affect performance much.) + */ +#define OBUF_SIZE (1024 * 1024) + +enum downloadst { + DOWNLOAD_READ_NEXT = 0, + DOWNLOAD_READ_LOCAL, + DOWNLOAD_READ_REMOTE +}; + +/* + * Like struct upload, but used to keep track of what we're downloading. + * This also is managed by the receiver process. + */ +struct download { + enum downloadst state; /* state of affairs */ + size_t idx; /* index of current file */ + struct blkset blk; /* its blocks */ + void *map; /* mmap of current file */ + size_t mapsz; /* length of mapsz */ + int ofd; /* open origin file */ + int fd; /* open output file */ + char *fname; /* output filename */ + MD4_CTX ctx; /* current hashing context */ + off_t downloaded; /* total downloaded */ + off_t total; /* total in file */ + const struct flist *fl; /* file list */ + size_t flsz; /* size of file list */ + int rootfd; /* destination directory */ + int fdin; /* read descriptor from sender */ + char *obuf; /* pre-write buffer */ + size_t obufsz; /* current size of obuf */ + size_t obufmax; /* max size we'll wbuffer */ +}; + + +/* + * Simply log the filename. + */ +static void +log_file(struct sess *sess, + const struct download *dl, const struct flist *f) +{ + float frac, tot = dl->total; + int prec = 0; + const char *unit = "B"; + + if (sess->opts->server) + return; + + frac = 0 == dl->total ? 100.0 : + 100.0 * dl->downloaded / dl->total; + + if (dl->total > 1024 * 1024 * 1024) { + tot = dl->total / (1024. * 1024. * 1024.); + prec = 3; + unit = "GB"; + } else if (dl->total > 1024 * 1024) { + tot = dl->total / (1024. * 1024.); + prec = 2; + unit = "MB"; + } else if (dl->total > 1024) { + tot = dl->total / 1024.; + prec = 1; + unit = "KB"; + } + + LOG1(sess, "%s (%.*f %s, %.1f%% downloaded)", + f->path, prec, tot, unit, frac); +} + +/* + * Reinitialise a download context w/o overwriting the persistent parts + * of the structure (like p->fl or p->flsz) for index "idx". + * The MD4 context is pre-seeded. + */ +static void +download_reinit(struct sess *sess, struct download *p, size_t idx) +{ + int32_t seed = htole32(sess->seed); + + assert(DOWNLOAD_READ_NEXT == p->state); + + p->idx = idx; + memset(&p->blk, 0, sizeof(struct blkset)); + p->map = MAP_FAILED; + p->mapsz = 0; + p->ofd = -1; + p->fd = -1; + p->fname = NULL; + MD4_Init(&p->ctx); + p->downloaded = p->total = 0; + /* Don't touch p->fl. */ + /* Don't touch p->flsz. */ + /* Don't touch p->rootfd. */ + /* Don't touch p->fdin. */ + MD4_Update(&p->ctx, &seed, sizeof(int32_t)); +} + +/* + * Free a download context. + * If "cleanup" is non-zero, we also try to clean up the temporary file, + * assuming that it has been opened in p->fd. + */ +static void +download_cleanup(struct download *p, int cleanup) +{ + + if (MAP_FAILED != p->map) { + assert(p->mapsz); + munmap(p->map, p->mapsz); + p->map = MAP_FAILED; + p->mapsz = 0; + } + if (-1 != p->ofd) { + close(p->ofd); + p->ofd = -1; + } + if (-1 != p->fd) { + close(p->fd); + if (cleanup && NULL != p->fname) + unlinkat(p->rootfd, p->fname, 0); + p->fd = -1; + } + free(p->fname); + p->fname = NULL; + p->state = DOWNLOAD_READ_NEXT; +} + +/* + * Initial allocation of the download object using the file list "fl" of + * size "flsz", the destination "rootfd", and the sender read "fdin". + * Returns NULL on allocation failure. + * On success, download_free() must be called with the pointer. + */ +struct download * +download_alloc(struct sess *sess, int fdin, + const struct flist *fl, size_t flsz, int rootfd) +{ + struct download *p; + + if (NULL == (p = malloc(sizeof(struct download)))) { + ERR(sess, "malloc"); + return NULL; + } + + p->state = DOWNLOAD_READ_NEXT; + p->fl = fl; + p->flsz = flsz; + p->rootfd = rootfd; + p->fdin = fdin; + download_reinit(sess, p, 0); + p->obufsz = 0; + p->obuf = NULL; + p->obufmax = OBUF_SIZE; + if (p->obufmax && + NULL == (p->obuf = malloc(p->obufmax))) { + ERR(sess, "malloc"); + free(p); + return NULL; + } + return p; +} + +/* + * Perform all cleanups (including removing stray files) and free. + * Passing a NULL to this function is ok. + */ +void +download_free(struct download *p) +{ + + if (NULL == p) + return; + download_cleanup(p, 1); + free(p->obuf); + free(p); +} + +/* + * Optimisation: instead of dumping directly into the output file, keep + * a buffer and write as much as we can into the buffer. + * That way, we can avoid calling write() too much, and instead call it + * with big buffers. + * To flush the buffer w/o changing it, pass 0 as "sz". + * Returns zero on failure, non-zero on success. + */ +static int +buf_copy(struct sess *sess, + const char *buf, size_t sz, struct download *p) +{ + size_t rem, tocopy; + ssize_t ssz; + + assert(p->obufsz <= p->obufmax); + + /* + * Copy as much as we can. + * If we've copied everything, exit. + * If we have no pre-write buffer (obufmax of zero), this never + * gets called, so we never buffer anything. + */ + + if (sz && p->obufsz < p->obufmax) { + assert(NULL != p->obuf); + rem = p->obufmax - p->obufsz; + assert(rem > 0); + tocopy = rem < sz ? rem : sz; + memcpy(p->obuf + p->obufsz, buf, tocopy); + sz -= tocopy; + buf += tocopy; + p->obufsz += tocopy; + assert(p->obufsz <= p->obufmax); + if (0 == sz) + return 1; + } + + /* Drain the main buffer. */ + + if (p->obufsz) { + assert(p->obufmax); + assert(p->obufsz <= p->obufmax); + assert(NULL != p->obuf); + if ((ssz = write(p->fd, p->obuf, p->obufsz)) < 0) { + ERR(sess, "%s: write", p->fname); + return 0; + } else if ((size_t)ssz != p->obufsz) { + ERRX(sess, "%s: short write", p->fname); + return 0; + } + p->obufsz = 0; + } + + /* + * Now drain anything left. + * If we have no pre-write buffer, this is it. + */ + + if (sz) { + if ((ssz = write(p->fd, buf, sz)) < 0) { + ERR(sess, "%s: write", p->fname); + return 0; + } else if ((size_t)ssz != sz) { + ERRX(sess, "%s: short write", p->fname); + return 0; + } + } + return 1; +} + +/* + * The downloader waits on a file the sender is going to give us, opens + * and mmaps the existing file, opens a temporary file, dumps the file + * (or metadata) into the temporary file, then renames. + * This happens in several possible phases to avoid blocking. + * Returns <0 on failure, 0 on no more data (end of phase), >0 on + * success (more data to be read from the sender). + */ +int +rsync_downloader(struct download *p, struct sess *sess, int *ofd) +{ + int32_t idx, rawtok; + uint32_t hash; + const struct flist *f; + size_t sz, dirlen, tok; + const char *cp; + mode_t perm; + struct stat st; + char *buf = NULL; + unsigned char ourmd[MD4_DIGEST_LENGTH], + md[MD4_DIGEST_LENGTH]; + struct timespec tv[2]; + + /* + * If we don't have a download already in session, then the next + * one is coming in. + * Read either the stop (phase) signal from the sender or block + * metadata, in which case we open our file and wait for data. + */ + + if (DOWNLOAD_READ_NEXT == p->state) { + if ( ! io_read_int(sess, p->fdin, &idx)) { + ERRX1(sess, "io_read_int"); + return -1; + } else if (idx >= 0 && (size_t)idx >= p->flsz) { + ERRX(sess, "index out of bounds"); + return -1; + } else if (idx < 0) { + LOG3(sess, "downloader: phase complete"); + return 0; + } + + /* Short-circuit: dry_run mode does nothing. */ + + if (sess->opts->dry_run) + return 1; + + /* + * Now get our block information. + * This is all we'll need to reconstruct the file from + * the map, as block sizes are regular. + */ + + download_reinit(sess, p, idx); + if ( ! blk_send_ack(sess, p->fdin, &p->blk)) { + ERRX1(sess, "blk_send_ack"); + goto out; + } + + /* + * Next, we want to open the existing file for using as + * block input. + * We do this in a non-blocking way, so if the open + * succeeds, then we'll go reentrant til the file is + * readable and we can mmap() it. + * Set the file descriptor that we want to wait for. + */ + + p->state = DOWNLOAD_READ_LOCAL; + f = &p->fl[idx]; + p->ofd = openat(p->rootfd, f->path, + O_RDONLY | O_NONBLOCK, 0); + + if (-1 == p->ofd && ENOENT != errno) { + ERR(sess, "%s: openat", f->path); + goto out; + } else if (-1 != p->ofd) { + *ofd = p->ofd; + return 1; + } + + /* Fall-through: there's no file. */ + } + + /* + * At this point, the server is sending us data and we want to + * hoover it up as quickly as possible or we'll deadlock. + * We want to be pulling off of f->fdin as quickly as possible, + * so perform as much buffering as we can. + */ + + f = &p->fl[p->idx]; + + /* + * Next in sequence: we have an open download session but + * haven't created our temporary file. + * This means that we've already opened (or tried to open) the + * original file in a nonblocking way, and we can map it. + */ + + if (DOWNLOAD_READ_LOCAL == p->state) { + assert(NULL == p->fname); + + /* + * Try to fstat() the file descriptor if valid and make + * sure that we're still a regular file. + * Then, if it has non-zero size, mmap() it for hashing. + */ + + if (-1 != p->ofd && + -1 == fstat(p->ofd, &st)) { + ERR(sess, "%s: fstat", f->path); + goto out; + } else if (-1 != p->ofd && ! S_ISREG(st.st_mode)) { + WARNX(sess, "%s: not regular", f->path); + goto out; + } + + if (-1 != p->ofd && st.st_size > 0) { + p->mapsz = st.st_size; + p->map = mmap(NULL, p->mapsz, + PROT_READ, MAP_SHARED, p->ofd, 0); + if (MAP_FAILED == p->map) { + ERR(sess, "%s: mmap", f->path); + goto out; + } + } + + /* Success either way: we don't need this. */ + + *ofd = -1; + + /* + * Create the temporary file. + * Use a simple scheme of path/.FILE.RANDOM, where we + * fill in RANDOM with an arc4random number. + * The tricky part is getting into the directory if + * we're in recursive mode. + */ + + hash = arc4random(); + if (sess->opts->recursive && + NULL != (cp = strrchr(f->path, '/'))) { + dirlen = cp - f->path; + if (asprintf(&p->fname, "%.*s/.%s.%" PRIu32, + (int)dirlen, f->path, + f->path + dirlen + 1, hash) < 0) + p->fname = NULL; + } else { + if (asprintf(&p->fname, ".%s.%" PRIu32, + f->path, hash) < 0) + p->fname = NULL; + } + if (NULL == p->fname) { + ERR(sess, "asprintf"); + goto out; + } + + /* + * Inherit permissions from the source file if we're new + * or specifically told with -p. + */ + + if ( ! sess->opts->preserve_perms) + perm = -1 == p->ofd ? f->st.mode : st.st_mode; + else + perm = f->st.mode; + + p->fd = openat(p->rootfd, p->fname, + O_APPEND|O_WRONLY|O_CREAT|O_EXCL, perm); + + if (-1 == p->fd) { + ERR(sess, "%s: openat", p->fname); + goto out; + } + + /* + * FIXME: we can technically wait until the temporary + * file is writable, but since it's guaranteed to be + * empty, I don't think this is a terribly expensive + * operation as it doesn't involve reading the file into + * memory beforehand. + */ + + LOG3(sess, "%s: temporary: %s", f->path, p->fname); + p->state = DOWNLOAD_READ_REMOTE; + return 1; + } + + /* + * This matches the sequence in blk_flush(). + * If we've gotten here, then we have a possibly-open map file + * (not for new files) and our temporary file is writable. + * We read the size/token, then optionally the data. + * The size >0 for reading data, 0 for no more data, and <0 for + * a token indicator. + */ + + assert(DOWNLOAD_READ_REMOTE == p->state); + assert(NULL != p->fname); + assert(-1 != p->fd); + assert(-1 != p->fdin); + + if ( ! io_read_int(sess, p->fdin, &rawtok)) { + ERRX1(sess, "io_read_int"); + goto out; + } + + if (rawtok > 0) { + sz = rawtok; + if (NULL == (buf = malloc(sz))) { + ERR(sess, "realloc"); + goto out; + } + if ( ! io_read_buf(sess, p->fdin, buf, sz)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if ( ! buf_copy(sess, buf, sz, p)) { + ERRX1(sess, "buf_copy"); + goto out; + } + p->total += sz; + p->downloaded += sz; + LOG4(sess, "%s: received %zu B block", p->fname, sz); + MD4_Update(&p->ctx, buf, sz); + free(buf); + return 1; + } else if (rawtok < 0) { + tok = -rawtok - 1; + if (tok >= p->blk.blksz) { + ERRX(sess, "%s: token not in block " + "set: %zu (have %zu blocks)", + p->fname, tok, p->blk.blksz); + goto out; + } + sz = tok == p->blk.blksz - 1 ? p->blk.rem : p->blk.len; + assert(sz); + assert(MAP_FAILED != p->map); + buf = p->map + (tok * p->blk.len); + + /* + * Now we read from our block. + * We should only be at this point if we have a + * block to read from, i.e., if we were able to + * map our origin file and create a block + * profile from it. + */ + + assert(MAP_FAILED != p->map); + if ( ! buf_copy(sess, buf, sz, p)) { + ERRX1(sess, "buf_copy"); + goto out; + } + p->total += sz; + LOG4(sess, "%s: copied %zu B", p->fname, sz); + MD4_Update(&p->ctx, buf, sz); + return 1; + } + + if ( ! buf_copy(sess, NULL, 0, p)) { + ERRX1(sess, "buf_copy"); + goto out; + } + + assert(0 == rawtok); + assert(0 == p->obufsz); + + /* + * Make sure our resulting MD4 hashes match. + * FIXME: if the MD4 hashes don't match, then our file has + * changed out from under us. + * This should require us to re-run the sequence in another + * phase. + */ + + MD4_Final(ourmd, &p->ctx); + + if ( ! io_read_buf(sess, p->fdin, md, MD4_DIGEST_LENGTH)) { + ERRX1(sess, "io_read_buf"); + goto out; + } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) { + ERRX(sess, "%s: hash does not match", p->fname); + goto out; + } + + /* Conditionally adjust file modification time. */ + + if (sess->opts->preserve_times) { + tv[0].tv_sec = time(NULL); + tv[0].tv_nsec = 0; + tv[1].tv_sec = f->st.mtime; + tv[1].tv_nsec = 0; + if (-1 == futimens(p->fd, tv)) { + ERR(sess, "%s: futimens", p->fname); + goto out; + } + LOG4(sess, "%s: updated date", f->path); + } + + /* Finally, rename the temporary to the real file. */ + + if (-1 == renameat(p->rootfd, p->fname, p->rootfd, f->path)) { + ERR(sess, "%s: renameat: %s", p->fname, f->path); + goto out; + } + + log_file(sess, p, f); + download_cleanup(p, 0); + return 1; +out: + download_cleanup(p, 1); + return -1; +} diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h new file mode 100644 index 00000000000..8d3d591eace --- /dev/null +++ b/usr.bin/rsync/extern.h @@ -0,0 +1,295 @@ +/* $Id: extern.h,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef EXTERN_H +#define EXTERN_H + +/* + * This is the rsync protocol version that we support. + */ +#define RSYNC_PROTOCOL (27) + +/* + * Maximum amount of file data sent over the wire at once. + */ +#define MAX_CHUNK (32 * 1024) + +/* + * This is the minimum size for a block of data not including those in + * the remainder block. + */ +#define BLOCK_SIZE_MIN (700) + +/* + * The sender and receiver use a two-phase synchronisation process. + * The first uses two-byte hashes; the second, 16-byte. + * (The second must hold a full MD4 digest.) + */ +#define CSUM_LENGTH_PHASE1 (2) +#define CSUM_LENGTH_PHASE2 (16) + +/* + * Operating mode for a client or a server. + * Sender means we synchronise local files with those from remote. + * Receiver is the opposite. + * This is relative to which host we're running on. + */ +enum fmode { + FARGS_SENDER, + FARGS_RECEIVER +}; + +/* + * File arguments given on the command line. + * See struct opts. + */ +struct fargs { + char *host; /* hostname or NULL if local */ + char **sources; /* transfer source */ + size_t sourcesz; /* number of sources */ + char *sink; /* transfer endpoint */ + enum fmode mode; /* mode of operation */ + int remote; /* uses rsync:// or :: for remote */ + char *module; /* if rsync://, the module */ +}; + +/* + * The subset of stat(2) information that we need. + * (There are some parts we don't use yet.) + */ +struct flstat { + mode_t mode; /* mode */ + uid_t uid; /* user */ + gid_t gid; /* group */ + off_t size; /* size */ + time_t mtime; /* modification */ + unsigned int flags; +#define FLSTAT_TOP_DIR 0x01 /* a top-level directory */ + +}; + +/* + * A list of files with their statistics. + */ +struct flist { + char *path; /* path relative to root */ + const char *wpath; /* "working" path for receiver */ + struct flstat st; /* file information */ + char *link; /* symlink target or NULL */ +}; + +/* + * Options passed into the command line. + * See struct fargs. + */ +struct opts { + int sender; /* --sender */ + int server; /* --server */ + int recursive; /* -r */ + int verbose; /* -v */ + int dry_run; /* -n */ + int preserve_times; /* -t */ + int preserve_perms; /* -p */ + int preserve_links; /* -l */ + int del; /* --delete */ + const char *rsync_path; /* --rsync-path */ +}; + +/* + * An individual block description for a file. + * See struct blkset. + */ +struct blk { + off_t offs; /* offset in file */ + size_t idx; /* block index */ + size_t len; /* bytes in block */ + uint32_t chksum_short; /* fast checksum */ + unsigned char chksum_long[CSUM_LENGTH_PHASE2]; /* slow checksum */ +}; + +/* + * When transferring file contents, we break the file down into blocks + * and work with those. + */ +struct blkset { + off_t size; /* file size */ + size_t rem; /* terminal block length if non-zero */ + size_t len; /* block length */ + size_t csum; /* checksum length */ + struct blk *blks; /* all blocks */ + size_t blksz; /* number of blks */ +}; + +/* + * Values required during a communication session. + */ +struct sess { + const struct opts *opts; /* system options */ + int32_t seed; /* checksum seed */ + int32_t lver; /* local version */ + int32_t rver; /* remote version */ + uint64_t total_read; /* non-logging wire/reads */ + uint64_t total_size; /* total file size */ + uint64_t total_write; /* non-logging wire/writes */ + int mplex_reads; /* multiplexing reads? */ + size_t mplex_read_remain; /* remaining bytes */ + int mplex_writes; /* multiplexing writes? */ +}; + +struct download; +struct upload; + +#define LOG0(_sess, _fmt, ...) \ + rsync_log((_sess), __FILE__, __LINE__, -1, (_fmt), ##__VA_ARGS__) +#define LOG1(_sess, _fmt, ...) \ + rsync_log((_sess), __FILE__, __LINE__, 0, (_fmt), ##__VA_ARGS__) +#define LOG2(_sess, _fmt, ...) \ + rsync_log((_sess), __FILE__, __LINE__, 1, (_fmt), ##__VA_ARGS__) +#define LOG3(_sess, _fmt, ...) \ + rsync_log((_sess), __FILE__, __LINE__, 2, (_fmt), ##__VA_ARGS__) +#define LOG4(_sess, _fmt, ...) \ + rsync_log((_sess), __FILE__, __LINE__, 3, (_fmt), ##__VA_ARGS__) +#define ERRX1(_sess, _fmt, ...) \ + rsync_errx1((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define WARNX(_sess, _fmt, ...) \ + rsync_warnx((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define WARN(_sess, _fmt, ...) \ + rsync_warn((_sess), 0, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define WARN1(_sess, _fmt, ...) \ + rsync_warn((_sess), 1, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define WARN2(_sess, _fmt, ...) \ + rsync_warn((_sess), 2, __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define ERR(_sess, _fmt, ...) \ + rsync_err((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) +#define ERRX(_sess, _fmt, ...) \ + rsync_errx((_sess), __FILE__, __LINE__, (_fmt), ##__VA_ARGS__) + +__BEGIN_DECLS + +void rsync_log(struct sess *, + const char *, size_t, int, const char *, ...) + __attribute__((format(printf, 5, 6))); +void rsync_warnx1(struct sess *, + const char *, size_t, const char *, ...) + __attribute__((format(printf, 4, 5))); +void rsync_warn(struct sess *, int, + const char *, size_t, const char *, ...) + __attribute__((format(printf, 5, 6))); +void rsync_warnx(struct sess *, const char *, + size_t, const char *, ...) + __attribute__((format(printf, 4, 5))); +void rsync_err(struct sess *, const char *, + size_t, const char *, ...) + __attribute__((format(printf, 4, 5))); +void rsync_errx(struct sess *, const char *, + size_t, const char *, ...) + __attribute__((format(printf, 4, 5))); +void rsync_errx1(struct sess *, const char *, + size_t, const char *, ...) + __attribute__((format(printf, 4, 5))); + +int flist_del(struct sess *, int, + const struct flist *, size_t); +int flist_gen(struct sess *, size_t, char **, + struct flist **, size_t *); +int flist_gen_local(struct sess *, const char *, + struct flist **, size_t *); +void flist_free(struct flist *, size_t); +int flist_recv(struct sess *, int, + struct flist **, size_t *); +int flist_send(struct sess *, int, int, + const struct flist *, size_t); +int flist_gen_dels(struct sess *, const char *, + struct flist **, size_t *, + const struct flist *, size_t); + +char **fargs_cmdline(struct sess *, const struct fargs *); + +int io_read_buf(struct sess *, int, void *, size_t); +int io_read_byte(struct sess *, int, uint8_t *); +int io_read_check(struct sess *, int); +int io_read_flush(struct sess *, int); +int io_read_int(struct sess *, int, int32_t *); +int io_read_long(struct sess *, int, int64_t *); +int io_read_size(struct sess *, int, size_t *); +int io_read_ulong(struct sess *, int, uint64_t *); +int io_write_buf(struct sess *, int, const void *, size_t); +int io_write_byte(struct sess *, int, uint8_t); +int io_write_int(struct sess *, int, int32_t); +int io_write_line(struct sess *, int, const char *); +int io_write_long(struct sess *, int, int64_t); + +void io_buffer_int(struct sess *, void *, + size_t *, size_t, int32_t); +void io_buffer_buf(struct sess *, void *, + size_t *, size_t, const void *, size_t); + +void io_unbuffer_int(struct sess *, const void *, + size_t *, size_t, int32_t *); +int io_unbuffer_size(struct sess *, const void *, + size_t *, size_t, size_t *); +void io_unbuffer_buf(struct sess *, const void *, + size_t *, size_t, void *, size_t); + +void rsync_child(const struct opts *, int, const struct fargs *) + __attribute__((noreturn)); +int rsync_receiver(struct sess *, int, int, const char *); +int rsync_sender(struct sess *, int, int, size_t, char **); +int rsync_client(const struct opts *, int, const struct fargs *); +int rsync_socket(const struct opts *, const struct fargs *); +int rsync_server(const struct opts *, size_t, char *[]); +int rsync_downloader(struct download *, struct sess *, int *); +int rsync_uploader(struct upload *, + int *, struct sess *, int *); +int rsync_uploader_tail(struct upload *, struct sess *); + +struct download *download_alloc(struct sess *, int, + const struct flist *, size_t, int); +void download_free(struct download *); +struct upload *upload_alloc(struct sess *, int, int, size_t, + const struct flist *, size_t, mode_t); +void upload_free(struct upload *); + +struct blkset *blk_recv(struct sess *, int, const char *); +int blk_recv_ack(struct sess *, + int, const struct blkset *, int32_t); +int blk_match(struct sess *, int, + const struct blkset *, const char *); +int blk_send(struct sess *, int, size_t, + const struct blkset *, const char *); +int blk_send_ack(struct sess *, int, struct blkset *); +int blk_merge(struct sess *, int, int, + const struct blkset *, int, const char *, + const void *, size_t, float *); +void blkset_free(struct blkset *); + +uint32_t hash_fast(const void *, size_t); +void hash_slow(const void *, size_t, + unsigned char *, const struct sess *); +void hash_file(const void *, size_t, + unsigned char *, const struct sess *); + +int mkpath(struct sess *, char *); + +char *symlink_read(struct sess *, const char *); +char *symlinkat_read(struct sess *, int, const char *); + +int sess_stats_send(struct sess *, int); +int sess_stats_recv(struct sess *, int); + +__END_DECLS + +#endif /*!EXTERN_H*/ diff --git a/usr.bin/rsync/fargs.c b/usr.bin/rsync/fargs.c new file mode 100644 index 00000000000..0120479bee1 --- /dev/null +++ b/usr.bin/rsync/fargs.c @@ -0,0 +1,104 @@ +/* $Id: fargs.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> + +#include "extern.h" + +#define RSYNC_PATH "rsync" + +char ** +fargs_cmdline(struct sess *sess, const struct fargs *f) +{ + char **args; + size_t i = 0, j, argsz = 0; + const char *rsync_path; + + assert(NULL != f); + assert(f->sourcesz > 0); + + if (NULL == (rsync_path = sess->opts->rsync_path)) + rsync_path = RSYNC_PATH; + + /* Be explicit with array size. */ + + argsz += 1; /* dot separator */ + argsz += 1; /* sink file */ + argsz += 5; /* per-mode maximum */ + argsz += 10; /* shared args */ + argsz += 1; /* NULL pointer */ + argsz += f->sourcesz; + + args = calloc(argsz, sizeof(char *)); + if (NULL == args) { + ERR(sess, "calloc"); + return NULL; + } + + if (NULL != f->host) { + assert(NULL != f->host); + args[i++] = "ssh"; + args[i++] = f->host; + args[i++] = (char *)rsync_path; + args[i++] = "--server"; + if (FARGS_RECEIVER == f->mode) + args[i++] = "--sender"; + } else { + args[i++] = (char *)rsync_path; + args[i++] = "--server"; + } + + /* Shared arguments. */ + + if (sess->opts->verbose > 3) + args[i++] = "-v"; + if (sess->opts->verbose > 2) + args[i++] = "-v"; + if (sess->opts->verbose > 1) + args[i++] = "-v"; + if (sess->opts->verbose > 0) + args[i++] = "-v"; + if (sess->opts->dry_run) + args[i++] = "-n"; + if (sess->opts->preserve_times) + args[i++] = "-t"; + if (sess->opts->preserve_perms) + args[i++] = "-p"; + if (sess->opts->recursive) + args[i++] = "-r"; + if (sess->opts->preserve_links) + args[i++] = "-l"; + if (sess->opts->del) + args[i++] = "--delete"; + + /* Terminate with a full-stop for reasons unknown. */ + + args[i++] = "."; + + if (FARGS_RECEIVER == f->mode) { + for (j = 0; j < f->sourcesz; j++) + args[i++] = f->sources[j]; + } else + args[i++] = f->sink; + + args[i] = NULL; + return args; +} + diff --git a/usr.bin/rsync/flist.c b/usr.bin/rsync/flist.c new file mode 100644 index 00000000000..be091267f0d --- /dev/null +++ b/usr.bin/rsync/flist.c @@ -0,0 +1,1160 @@ +/* $Id: flist.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/param.h> +#include <sys/stat.h> + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <fts.h> +#include <search.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +/* + * We allocate our file list in chunk sizes so as not to do it one by + * one. + * Preferrably we get one or two allocation. + */ +#define FLIST_CHUNK_SIZE (1024) + +/* + * These flags are part of the rsync protocol. + * They are sent as the first byte for a file transmission and encode + * information that affects subsequent transmissions. + */ +#define FLIST_MODE_SAME 0x0002 /* mode is repeat */ +#define FLIST_NAME_SAME 0x0020 /* name is repeat */ +#define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ +#define FLIST_TIME_SAME 0x0080 /* time is repeat */ + +/* + * Requied way to sort a filename list. + */ +static int +flist_cmp(const void *p1, const void *p2) +{ + const struct flist *f1 = p1, *f2 = p2; + + return strcmp(f1->wpath, f2->wpath); +} + +/* + * Deduplicate our file list (which may be zero-length). + * Returns zero on failure, non-zero on success. + */ +static int +flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) +{ + size_t i, j; + struct flist *new; + struct flist *f, *fnext; + + if (0 == *sz) + return 1; + + /* Create a new buffer, "new", and copy. */ + + new = calloc(*sz, sizeof(struct flist)); + if (NULL == new) { + ERR(sess, "calloc"); + return 0; + } + + for (i = j = 0; i < *sz - 1; i++) { + f = &(*fl)[i]; + fnext = &(*fl)[i + 1]; + + if (strcmp(f->wpath, fnext->wpath)) { + new[j++] = *f; + continue; + } + + /* + * Our working (destination) paths are the same. + * If the actual file is the same (as given on the + * command-line), then we can just discard the first. + * Otherwise, we need to bail out: it means we have two + * different files with the relative path on the + * destination side. + */ + + if (0 == strcmp(f->path, fnext->path)) { + new[j++] = *f; + i++; + WARNX(sess, "%s: duplicate path: %s", + f->wpath, f->path); + free(fnext->path); + free(fnext->link); + fnext->path = fnext->link = NULL; + continue; + } + + ERRX(sess, "%s: duplicate working path for " + "possibly different file: %s, %s", + f->wpath, f->path, fnext->path); + free(new); + return 0; + } + + /* Don't forget the last entry. */ + + if (i == *sz - 1) + new[j++] = (*fl)[i]; + + /* + * Reassign to the deduplicated array. + * If we started out with *sz > 0, which we check for at the + * beginning, then we'll always continue having *sz > 0. + */ + + free(*fl); + *fl = new; + *sz = j; + assert(*sz); + return 1; +} + +/* + * We're now going to find our top-level directories. + * This only applies to recursive mode. + * If we have the first element as the ".", then that's the "top + * directory" of our transfer. + * Otherwise, mark up all top-level directories in the set. + */ +static void +flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) +{ + size_t i; + const char *cp; + + if ( ! sess->opts->recursive) + return; + + if (flsz && strcmp(fl[0].wpath, ".")) { + for (i = 0; i < flsz; i++) { + if ( ! S_ISDIR(fl[i].st.mode)) + continue; + cp = strchr(fl[i].wpath, '/'); + if (NULL != cp && '\0' != cp[1]) + continue; + fl[i].st.flags |= FLSTAT_TOP_DIR; + LOG4(sess, "%s: top-level", fl[i].wpath); + } + } else if (flsz) { + fl[0].st.flags |= FLSTAT_TOP_DIR; + LOG4(sess, "%s: top-level", fl[0].wpath); + } +} + +/* + * Filter through the fts() file information. + * We want directories (pre-order), regular files, and symlinks. + * Everything else is skipped and possibly warned about. + * Return zero to skip, non-zero to examine. + */ +static int +flist_fts_check(struct sess *sess, FTSENT *ent) +{ + + if (FTS_F == ent->fts_info || + FTS_D == ent->fts_info || + FTS_SL == ent->fts_info || + FTS_SLNONE == ent->fts_info) + return 1; + + if (FTS_DC == ent->fts_info) { + WARNX(sess, "%s: directory cycle", ent->fts_path); + } else if (FTS_DNR == ent->fts_info) { + errno = ent->fts_errno; + WARN(sess, "%s: unreadable directory", ent->fts_path); + } else if (FTS_DOT == ent->fts_info) { + WARNX(sess, "%s: skipping dot-file", ent->fts_path); + } else if (FTS_ERR == ent->fts_info) { + errno = ent->fts_errno; + WARN(sess, "%s", ent->fts_path); + } else if (FTS_DEFAULT == ent->fts_info) { + WARNX(sess, "%s: skipping special", ent->fts_path); + } else if (FTS_NS == ent->fts_info) { + errno = ent->fts_errno; + WARN(sess, "%s: could not stat", ent->fts_path); + } + + return 0; +} + +/* + * Copy necessary elements in "st" into the fields of "f". + */ +static void +flist_copy_stat(struct flist *f, const struct stat *st) +{ + + f->st.mode = st->st_mode; + f->st.uid = st->st_uid; + f->st.gid = st->st_gid; + f->st.size = st->st_size; + f->st.mtime = st->st_mtime; +} + +void +flist_free(struct flist *f, size_t sz) +{ + size_t i; + + if (NULL == f) + return; + + for (i = 0; i < sz; i++) { + free(f[i].path); + free(f[i].link); + } + free(f); +} + +/* + * Serialise our file list (which may be zero-length) to the wire. + * Makes sure that the receiver isn't going to block on sending us + * return messages on the log channel. + * Return zero on failure, non-zero on success. + */ +int +flist_send(struct sess *sess, int fdin, + int fdout, const struct flist *fl, size_t flsz) +{ + size_t i, fnlen; + uint8_t flag; + const struct flist *f; + const char *fn; + + /* Double-check that we've no pending multiplexed data. */ + + LOG2(sess, "sending file metadata list: %zu", flsz); + + for (i = 0; i < flsz; i++) { + f = &fl[i]; + fn = f->wpath; + fnlen = strlen(f->wpath); + assert(fnlen > 0); + + /* + * If applicable, unclog the read buffer. + * This happens when the receiver has a lot of log + * messages and all we're doing is sending our file list + * without checking for messages. + */ + + if (sess->mplex_reads && + io_read_check(sess, fdin) && + ! io_read_flush(sess, fdin)) { + ERRX1(sess, "io_read_flush"); + return 0; + } + + /* + * For ease, make all of our filenames be "long" + * regardless their actual length. + * This also makes sure that we don't transmit a zero + * byte unintentionally. + */ + + flag = FLIST_NAME_LONG; + + LOG3(sess, "%s: sending file metadata: " + "size %jd, mtime %jd, mode %o", + fn, (intmax_t)f->st.size, + (intmax_t)f->st.mtime, f->st.mode); + + /* Now write to the wire. */ + /* FIXME: buffer this. */ + + if ( ! io_write_byte(sess, fdout, flag)) { + ERRX1(sess, "io_write_byte"); + return 0; + } else if ( ! io_write_int(sess, fdout, fnlen)) { + ERRX1(sess, "io_write_int"); + return 0; + } else if ( ! io_write_buf(sess, fdout, fn, fnlen)) { + ERRX1(sess, "io_write_buf"); + return 0; + } else if ( ! io_write_long(sess, fdout, f->st.size)) { + ERRX1(sess, "io_write_long"); + return 0; + } else if ( ! io_write_int(sess, fdout, f->st.mtime)) { + ERRX1(sess, "io_write_int"); + return 0; + } else if ( ! io_write_int(sess, fdout, f->st.mode)) { + ERRX1(sess, "io_write_int"); + return 0; + } + + /* Optional link information. */ + + if (S_ISLNK(f->st.mode) && + sess->opts->preserve_links) { + fn = f->link; + fnlen = strlen(f->link); + if ( ! io_write_int(sess, fdout, fnlen)) { + ERRX1(sess, "io_write_int"); + return 0; + } + if ( ! io_write_buf(sess, fdout, fn, fnlen)) { + ERRX1(sess, "io_write_int"); + return 0; + } + } + + if (S_ISREG(f->st.mode)) + sess->total_size += f->st.size; + } + + if ( ! io_write_byte(sess, fdout, 0)) { + ERRX1(sess, "io_write_byte"); + return 0; + } + + return 1; +} + +/* + * Read the filename of a file list. + * This is the most expensive part of the file list transfer, so a lot + * of attention has gone into transmitting as little as possible. + * Micro-optimisation, but whatever. + * Fills in "f" with the full path on success. + * Returns zero on failure, non-zero on success. + */ +static int +flist_recv_name(struct sess *sess, int fd, + struct flist *f, uint8_t flags, char last[MAXPATHLEN]) +{ + uint8_t bval; + size_t partial = 0; + size_t pathlen = 0, len; + + /* + * Read our filename. + * If we have FLIST_NAME_SAME, we inherit some of the last + * transmitted name. + * If we have FLIST_NAME_LONG, then the string length is greater + * than byte-size. + */ + + if (FLIST_NAME_SAME & flags) { + if ( ! io_read_byte(sess, fd, &bval)) { + ERRX1(sess, "io_read_byte"); + return 0; + } + partial = bval; + } + + /* Get the (possibly-remaining) filename length. */ + + if (FLIST_NAME_LONG & flags) { + if ( ! io_read_size(sess, fd, &pathlen)) { + ERRX1(sess, "io_read_size"); + return 0; + } + } else { + if ( ! io_read_byte(sess, fd, &bval)) { + ERRX1(sess, "io_read_byte"); + return 0; + } + pathlen = bval; + } + + /* Allocate our full filename length. */ + /* FIXME: maximum pathname length. */ + + if (0 == (len = pathlen + partial)) { + ERRX(sess, "security violation: " + "zero-length pathname"); + return 0; + } + + if (NULL == (f->path = malloc(len + 1))) { + ERR(sess, "malloc"); + return 0; + } + f->path[len] = '\0'; + + if (FLIST_NAME_SAME & flags) + memcpy(f->path, last, partial); + + if ( ! io_read_buf(sess, fd, f->path + partial, pathlen)) { + ERRX1(sess, "io_read_buf"); + return 0; + } + + if ('/' == f->path[0]) { + ERRX(sess, "security violation: " + "absolute pathname: %s", f->path); + return 0; + } + + if (NULL != strstr(f->path, "/../") || + (len > 2 && 0 == strcmp(f->path + len - 3, "/..")) || + (len > 2 && 0 == strncmp(f->path, "../", 3)) || + 0 == strcmp(f->path, "..")) { + ERRX(sess, "%s: security violation: " + "backtracking pathname", f->path); + return 0; + } + + /* Record our last path and construct our filename. */ + + strlcpy(last, f->path, MAXPATHLEN); + f->wpath = f->path; + return 1; +} + +/* + * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; + * Returns zero on failure, non-zero on success. + */ +static int +flist_realloc(struct sess *sess, + struct flist **fl, size_t *sz, size_t *max) +{ + void *pp; + + if (*sz + 1 <= *max) { + (*sz)++; + return 1; + } + + pp = recallocarray(*fl, *max, + *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); + if (NULL == pp) { + ERR(sess, "recallocarray"); + return 0; + } + *fl = pp; + *max += FLIST_CHUNK_SIZE; + (*sz)++; + return 1; +} + +/* + * Copy a regular or symbolic link file "path" into "f". + * This handles the correct path creation and symbolic linking. + * Returns zero on failure, non-zero on success. + */ +static int +flist_append(struct sess *sess, struct flist *f, + struct stat *st, const char *path) +{ + + /* + * Copy the full path for local addressing and transmit + * only the filename part for the receiver. + */ + + if (NULL == (f->path = strdup(path))) { + ERR(sess, "strdup"); + return 0; + } + + if (NULL == (f->wpath = strrchr(f->path, '/'))) + f->wpath = f->path; + else + f->wpath++; + + /* + * On the receiving end, we'll strip out all bits on the + * mode except for the file permissions. + * No need to warn about it here. + */ + + flist_copy_stat(f, st); + + /* Optionally copy link information. */ + + if (S_ISLNK(st->st_mode)) { + f->link = symlink_read(sess, f->path); + if (NULL == f->link) { + ERRX1(sess, "symlink_read"); + return 0; + } + } + + return 1; +} + +/* + * Receive a file list from the wire, filling in length "sz" (which may + * possibly be zero) and list "flp" on success. + * Return zero on failure, non-zero on success. + */ +int +flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) +{ + struct flist *fl = NULL; + struct flist *ff; + const struct flist *fflast = NULL; + size_t flsz = 0, flmax = 0, lsz; + uint8_t flag; + char last[MAXPATHLEN]; + uint64_t lval; /* temporary values... */ + int32_t ival; + + last[0] = '\0'; + + for (;;) { + if ( ! io_read_byte(sess, fd, &flag)) { + ERRX1(sess, "io_read_byte"); + goto out; + } else if (0 == flag) + break; + + if ( ! flist_realloc(sess, &fl, &flsz, &flmax)) { + ERRX1(sess, "flist_realloc"); + goto out; + } + + ff = &fl[flsz - 1]; + fflast = flsz > 1 ? &fl[flsz - 2] : NULL; + + /* Filename first. */ + + if ( ! flist_recv_name(sess, fd, ff, flag, last)) { + ERRX1(sess, "flist_recv_name"); + goto out; + } + + /* Read the file size. */ + + if ( ! io_read_ulong(sess, fd, &lval)) { + ERRX1(sess, "io_read_ulong"); + goto out; + } + ff->st.size = lval; + + /* Read the modification time. */ + + if ( ! (FLIST_TIME_SAME & flag)) { + if ( ! io_read_int(sess, fd, &ival)) { + ERRX1(sess, "io_read_int"); + goto out; + } + ff->st.mtime = ival; + } else if (NULL == fflast) { + ERRX(sess, "same time without last entry"); + goto out; + } else + ff->st.mtime = fflast->st.mtime; + + /* Read the file mode. */ + + if ( ! (FLIST_MODE_SAME & flag)) { + if ( ! io_read_int(sess, fd, &ival)) { + ERRX1(sess, "io_read_int"); + goto out; + } + ff->st.mode = ival; + } else if (NULL == fflast) { + ERRX(sess, "same mode without last entry"); + goto out; + } else + ff->st.mode = fflast->st.mode; + + /* Optionally read the link information. */ + + if (S_ISLNK(ff->st.mode) && + sess->opts->preserve_links) { + if ( ! io_read_size(sess, fd, &lsz)) { + ERRX1(sess, "io_read_size"); + goto out; + } else if (0 == lsz) { + ERRX(sess, "empty link name"); + goto out; + } + ff->link = calloc(lsz + 1, 1); + if (NULL == ff->link) { + ERR(sess, "calloc"); + goto out; + } + if ( ! io_read_buf(sess, fd, ff->link, lsz)) { + ERRX1(sess, "io_read_buf"); + goto out; + } + } + + LOG3(sess, "%s: received file metadata: " + "size %jd, mtime %jd, mode %o", + ff->path, (intmax_t)ff->st.size, + (intmax_t)ff->st.mtime, ff->st.mode); + + if (S_ISREG(ff->st.mode)) + sess->total_size += ff->st.size; + } + + /* Remember to order the received list. */ + + LOG2(sess, "received file metadata list: %zu", flsz); + qsort(fl, flsz, sizeof(struct flist), flist_cmp); + flist_topdirs(sess, fl, flsz); + *sz = flsz; + *flp = fl; + return 1; +out: + flist_free(fl, flsz); + *sz = 0; + *flp = NULL; + return 0; +} + +/* + * Generate a flist possibly-recursively given a file root, which may + * also be a regular file or symlink. + * On success, augments the generated list in "flp" of length "sz". + * Returns zero on failure, non-zero on success. + */ +static int +flist_gen_dirent(struct sess *sess, char *root, + struct flist **fl, size_t *sz, size_t *max) +{ + char *cargv[2], *cp; + int rc = 0; + FTS *fts; + FTSENT *ent; + struct flist *f; + size_t flsz = 0, stripdir; + struct stat st; + + cargv[0] = root; + cargv[1] = NULL; + + /* + * If we're a file, then revert to the same actions we use for + * the non-recursive scan. + */ + + if (-1 == lstat(root, &st)) { + ERR(sess, "%s: lstat", root); + return 0; + } else if (S_ISREG(st.st_mode)) { + if ( ! flist_realloc(sess, fl, sz, max)) { + ERRX1(sess, "flist_realloc"); + return 0; + } + f = &(*fl)[(*sz) - 1]; + assert(NULL != f); + + if ( ! flist_append(sess, f, &st, root)) { + ERRX1(sess, "flist_append"); + return 0; + } else if (-1 == unveil(root, "r")) { + ERR(sess, "%s: unveil", root); + return 0; + } + return 1; + } else if (S_ISLNK(st.st_mode)) { + if ( ! sess->opts->preserve_links) { + WARNX(sess, "%s: skipping symlink", root); + return 1; + } else if ( ! flist_realloc(sess, fl, sz, max)) { + ERRX1(sess, "flist_realloc"); + return 0; + } + f = &(*fl)[(*sz) - 1]; + assert(NULL != f); + + if ( ! flist_append(sess, f, &st, root)) { + ERRX1(sess, "flist_append"); + return 0; + } else if (-1 == unveil(root, "r")) { + ERR(sess, "%s: unveil", root); + return 0; + } + return 1; + } else if ( ! S_ISDIR(st.st_mode)) { + WARNX(sess, "%s: skipping special", root); + return 1; + } + + /* + * If we end with a slash, it means that we're not supposed to + * copy the directory part itself---only the contents. + * So set "stripdir" to be what we take out. + */ + + stripdir = strlen(root); + assert(stripdir > 0); + if ('/' != root[stripdir - 1]) + stripdir = 0; + + /* + * If we're not stripping anything, then see if we need to strip + * out the leading material in the path up to and including the + * last directory component. + */ + + if (0 == stripdir) + if (NULL != (cp = strrchr(root, '/'))) + stripdir = cp - root + 1; + + /* + * If we're recursive, then we need to take down all of the + * files and directory components, so use fts(3). + * Copying the information file-by-file into the flstat. + * We'll make sense of it in flist_send. + */ + + if (NULL == (fts = fts_open(cargv, FTS_PHYSICAL, NULL))) { + ERR(sess, "fts_open"); + return 0; + } + + errno = 0; + while (NULL != (ent = fts_read(fts))) { + if ( ! flist_fts_check(sess, ent)) { + errno = 0; + continue; + } + + /* We don't allow symlinks without -l. */ + + assert(NULL != ent->fts_statp); + if (S_ISLNK(ent->fts_statp->st_mode) && + ! sess->opts->preserve_links) { + WARNX(sess, "%s: skipping " + "symlink", ent->fts_path); + continue; + } + + /* Allocate a new file entry. */ + + if ( ! flist_realloc(sess, fl, sz, max)) { + ERRX1(sess, "flist_realloc"); + goto out; + } + flsz++; + f = &(*fl)[*sz - 1]; + + /* Our path defaults to "." for the root. */ + + if ('\0' == ent->fts_path[stripdir]) { + if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { + ERR(sess, "asprintf"); + f->path = NULL; + goto out; + } + } else { + if (NULL == (f->path = strdup(ent->fts_path))) { + ERR(sess, "strdup"); + goto out; + } + } + + f->wpath = f->path + stripdir; + flist_copy_stat(f, ent->fts_statp); + + /* Optionally copy link information. */ + + if (S_ISLNK(ent->fts_statp->st_mode)) { + f->link = symlink_read(sess, f->path); + if (NULL == f->link) { + ERRX1(sess, "symlink_read"); + goto out; + } + } + + /* Reset errno for next fts_read() call. */ + errno = 0; + } + if (errno) { + ERR(sess, "fts_read"); + goto out; + } else if (-1 == unveil(root, "r")) { + ERR(sess, "%s: unveil", root); + goto out; + } + + LOG3(sess, "generated %zu filenames: %s", flsz, root); + rc = 1; +out: + fts_close(fts); + return rc; +} + +/* + * Generate a flist recursively given the array of directories (or + * files, symlinks, doesn't matter) specified in argv (argc >0). + * On success, stores the generated list in "flp" with length "sz", + * which may be zero. + * Returns zero on failure, non-zero on success. + */ +static int +flist_gen_dirs(struct sess *sess, size_t argc, + char **argv, struct flist **flp, size_t *sz) +{ + size_t i, max = 0; + + for (i = 0; i < argc; i++) + if ( ! flist_gen_dirent(sess, argv[i], flp, sz, &max)) + break; + + if (i == argc) { + LOG2(sess, "recursively generated %zu filenames", *sz); + return 1; + } + + ERRX1(sess, "flist_gen_dirent"); + flist_free(*flp, max); + *flp = NULL; + *sz = 0; + return 0; +} + +/* + * Generate list of files from the command-line argc (>0) and argv. + * On success, stores the generated list in "flp" with length "sz", + * which may be zero. + * Returns zero on failure, non-zero on success. + */ +static int +flist_gen_files(struct sess *sess, size_t argc, + char **argv, struct flist **flp, size_t *sz) +{ + struct flist *fl = NULL, *f; + size_t i, flsz = 0; + struct stat st; + + assert(argc); + + if (NULL == (fl = calloc(argc, sizeof(struct flist)))) { + ERR(sess, "calloc"); + return 0; + } + + for (i = 0; i < argc; i++) { + if ('\0' == argv[i][0]) + continue; + if (-1 == lstat(argv[i], &st)) { + ERR(sess, "%s: lstat", argv[i]); + goto out; + } + + /* + * File type checks. + * In non-recursive mode, we don't accept directories. + * We also skip symbolic links without -l. + * Beyond that, we only accept regular files. + */ + + if (S_ISDIR(st.st_mode)) { + WARNX(sess, "%s: skipping directory", argv[i]); + continue; + } else if (S_ISLNK(st.st_mode)) { + if ( ! sess->opts->preserve_links) { + WARNX(sess, "%s: skipping " + "symlink", argv[i]); + continue; + } + } else if ( ! S_ISREG(st.st_mode)) { + WARNX(sess, "%s: skipping special", argv[i]); + continue; + } + + f = &fl[flsz++]; + assert(NULL != f); + + /* Add this file to our file-system worldview. */ + + if (-1 == unveil(argv[i], "r")) { + ERR(sess, "%s: unveil", argv[i]); + goto out; + } else if ( ! flist_append(sess, f, &st, argv[i])) { + ERRX1(sess, "flist_append"); + goto out; + } + } + + LOG2(sess, "non-recursively generated %zu filenames", flsz); + *sz = flsz; + *flp = fl; + return 1; +out: + flist_free(fl, argc); + *sz = 0; + *flp = NULL; + return 0; +} + +/* + * Generate a sorted, de-duplicated list of file metadata. + * In non-recursive mode (the default), we use only the files we're + * given. + * Otherwise, directories are recursively examined. + * Returns zero on failure, non-zero on success. + * On success, "fl" will need to be freed with flist_free(). + */ +int +flist_gen(struct sess *sess, size_t argc, + char **argv, struct flist **flp, size_t *sz) +{ + int rc; + + assert(argc > 0); + rc = sess->opts->recursive ? + flist_gen_dirs(sess, argc, argv, flp, sz) : + flist_gen_files(sess, argc, argv, flp, sz); + + /* After scanning, lock our file-system view. */ + + if (-1 == unveil(NULL, NULL)) { + ERR(sess, "unveil"); + return 0; + } else if ( ! rc) + return 0; + + qsort(*flp, *sz, sizeof(struct flist), flist_cmp); + + if (flist_dedupe(sess, flp, sz)) { + flist_topdirs(sess, *flp, *sz); + return 1; + } + + ERRX1(sess, "flist_dedupe"); + flist_free(*flp, *sz); + *flp = NULL; + *sz = 0; + return 0; +} + +/* + * Generate a list of files in root to delete that are within the + * top-level directories stipulated by "wfl". + * Only handles symbolic links, directories, and regular files. + * Returns zero on failure (fl and flsz will be NULL and zero), non-zero + * on success. + * On success, "fl" will need to be freed with flist_free(). + */ +int +flist_gen_dels(struct sess *sess, const char *root, + struct flist **fl, size_t *sz, + const struct flist *wfl, size_t wflsz) +{ + char **cargv = NULL; + int rc = 0, c; + FTS *fts = NULL; + FTSENT *ent; + struct flist *f; + size_t cargvs = 0, i, j, max = 0, stripdir; + ENTRY hent; + ENTRY *hentp; + + *fl = NULL; + *sz = 0; + + /* Only run this code when we're recursive. */ + + if ( ! sess->opts->recursive) + return 1; + + /* + * Gather up all top-level directories for scanning. + * This is stipulated by rsync's --delete behaviour, where we + * only delete things in the top-level directories given on the + * command line. + */ + + assert(wflsz > 0); + for (i = 0; i < wflsz; i++) + if (FLSTAT_TOP_DIR & wfl[i].st.flags) + cargvs++; + if (0 == cargvs) + return 1; + + if (NULL == (cargv = calloc(cargvs + 1, sizeof(char *)))) { + ERR(sess, "calloc"); + return 0; + } + + /* + * If we're given just a "." as the first entry, that means + * we're doing a relative copy with a trailing slash. + * Special-case this just for the sake of simplicity. + * Otherwise, look through all top-levels. + */ + + if (wflsz && 0 == strcmp(wfl[0].wpath, ".")) { + assert(1 == cargvs); + assert(S_ISDIR(wfl[0].st.mode)); + if (asprintf(&cargv[0], "%s/", root) < 0) { + ERR(sess, "asprintf"); + cargv[0] = NULL; + goto out; + } + cargv[1] = NULL; + } else { + for (i = j = 0; i < wflsz; i++) { + if ( ! (FLSTAT_TOP_DIR & wfl[i].st.flags)) + continue; + assert(S_ISDIR(wfl[i].st.mode)); + assert(strcmp(wfl[i].wpath, ".")); + c = asprintf(&cargv[j], + "%s/%s", root, wfl[i].wpath); + if (c < 0) { + ERR(sess, "asprintf"); + cargv[j] = NULL; + goto out; + } + LOG4(sess, "%s: will scan " + "for deletions", cargv[j]); + j++; + } + assert(j == cargvs); + cargv[j] = NULL; + } + + LOG2(sess, "delete from %zu directories", cargvs); + + /* + * Next, use the standard hcreate(3) hashtable interface to hash + * all of the files that we want to synchronise. + * This way, we'll be able to determine which files we want to + * delete in O(n) time instead of O(n * search) time. + * Plus, we can do the scan in-band and only allocate the files + * we want to delete. + */ + + if ( ! hcreate(wflsz)) { + ERR(sess, "hcreate"); + goto out; + } + + for (i = 0; i < wflsz; i++) { + memset(&hent, 0, sizeof(ENTRY)); + if (NULL == (hent.key = strdup(wfl[i].wpath))) { + ERR(sess, "strdup"); + goto out; + } + if (NULL == (hentp = hsearch(hent, ENTER))) { + ERR(sess, "hsearch"); + goto out; + } else if (hentp->key != hent.key) { + ERRX(sess, "%s: duplicate", wfl[i].wpath); + free(hent.key); + goto out; + } + } + + /* + * Now we're going to try to descend into all of the top-level + * directories stipulated by the file list. + * If the directories don't exist, it's ok. + */ + + if (NULL == (fts = fts_open(cargv, FTS_PHYSICAL, NULL))) { + ERR(sess, "fts_open"); + goto out; + } + + stripdir = strlen(root) + 1; + errno = 0; + while (NULL != (ent = fts_read(fts))) { + if (FTS_NS == ent->fts_info) + continue; + if ( ! flist_fts_check(sess, ent)) { + errno = 0; + continue; + } else if (stripdir >= ent->fts_pathlen) + continue; + + /* Look up in hashtable. */ + + memset(&hent, 0, sizeof(ENTRY)); + hent.key = ent->fts_path + stripdir; + if (NULL != hsearch(hent, FIND)) + continue; + + /* Not found: we'll delete it. */ + + if ( ! flist_realloc(sess, fl, sz, &max)) { + ERRX1(sess, "flist_realloc"); + goto out; + } + f = &(*fl)[*sz - 1]; + + if (NULL == (f->path = strdup(ent->fts_path))) { + ERR(sess, "strdup"); + goto out; + } + f->wpath = f->path + stripdir; + assert(NULL != ent->fts_statp); + flist_copy_stat(f, ent->fts_statp); + errno = 0; + } + + if (errno) { + ERR(sess, "fts_read"); + goto out; + } + + qsort(*fl, *sz, sizeof(struct flist), flist_cmp); + rc = 1; +out: + if (NULL != fts) + fts_close(fts); + for (i = 0; i < cargvs; i++) + free(cargv[i]); + free(cargv); + hdestroy(); + return rc; +} + +/* + * Delete all files and directories in "fl". + * If called with a zero-length "fl", does nothing. + * If dry_run is specified, simply write what would be done. + * Return zero on failure, non-zero on success. + */ +int +flist_del(struct sess *sess, int root, + const struct flist *fl, size_t flsz) +{ + ssize_t i; + int flag; + + if (0 == flsz) + return 1; + + assert(sess->opts->del); + assert(sess->opts->recursive); + + for (i = flsz - 1; i >= 0; i--) { + LOG1(sess, "%s: deleting", fl[i].wpath); + if (sess->opts->dry_run) + continue; + assert(-1 != root); + flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; + if (-1 == unlinkat(root, fl[i].wpath, flag) && + ENOENT != errno) { + ERR(sess, "%s: unlinkat", fl[i].wpath); + return 0; + } + } + + return 1; +} diff --git a/usr.bin/rsync/hash.c b/usr.bin/rsync/hash.c new file mode 100644 index 00000000000..97c12db25b1 --- /dev/null +++ b/usr.bin/rsync/hash.c @@ -0,0 +1,94 @@ +/* $Id: hash.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <endian.h> +#include <stdint.h> +#include <stdlib.h> + +#include "extern.h" +#include "md4.h" + +/* + * A fast 32-bit hash. + * Described in Tridgell's "Efficient Algorithms for Sorting and + * Synchronization" thesis and the "Rolling checksum" document. + */ +uint32_t +hash_fast(const void *buf, size_t len) +{ + size_t i = 0; + uint32_t a = 0, /* part of a(k, l) */ + b = 0; /* b(k, l) */ + const signed char *dat = buf; + + if (len > 4) + for ( ; i < len - 4; i += 4) { + b += 4 * (a + dat[i]) + + 3 * dat[i + 1] + + 2 * dat[i + 2] + + dat[i + 3]; + a += dat[i + 0] + + dat[i + 1] + + dat[i + 2] + + dat[i + 3]; + } + + for ( ; i < len; i++) { + a += dat[i]; + b += a; + } + + /* s(k, l) = (eps % M) + 2^16 b(k, l) % M */ + + return (a & 0xffff) + (b << 16); +} + +/* + * Slow MD4-based hash with trailing seed. + */ +void +hash_slow(const void *buf, size_t len, + unsigned char *md, const struct sess *sess) +{ + MD4_CTX ctx; + int32_t seed = htole32(sess->seed); + + MD4_Init(&ctx); + MD4_Update(&ctx, buf, len); + MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t)); + MD4_Final(md, &ctx); +} + +/* + * Hash an entire file. + * This is similar to hash_slow() except the seed is hashed at the end + * of the sequence, not the beginning. + */ +void +hash_file(const void *buf, size_t len, + unsigned char *md, const struct sess *sess) +{ + MD4_CTX ctx; + int32_t seed = htole32(sess->seed); + + MD4_Init(&ctx); + MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t)); + MD4_Update(&ctx, buf, len); + MD4_Final(md, &ctx); +} diff --git a/usr.bin/rsync/io.c b/usr.bin/rsync/io.c new file mode 100644 index 00000000000..630701dbb13 --- /dev/null +++ b/usr.bin/rsync/io.c @@ -0,0 +1,585 @@ +/* $Id: io.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <endian.h> +#include <errno.h> +#include <poll.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +int +io_read_check(struct sess *sess, int fd) +{ + struct pollfd pfd; + + pfd.fd = fd; + pfd.events = POLLIN; + + if (poll(&pfd, 1, 0) < 0) { + ERR(sess, "poll"); + return -1; + } + return pfd.revents & POLLIN; +} + +/* + * Write buffer to non-blocking descriptor. + * Returns zero on failure, non-zero on success (zero or more bytes). + */ +static int +io_write_nonblocking(struct sess *sess, + int fd, const void *buf, size_t bsz, size_t *sz) +{ + struct pollfd pfd; + ssize_t wsz; + + *sz = 0; + + if (0 == bsz) + return 1; + + pfd.fd = fd; + pfd.events = POLLOUT; + + if (poll(&pfd, 1, INFTIM) < 0) { + ERR(sess, "poll"); + return 0; + } + if ((pfd.revents & (POLLERR|POLLNVAL))) { + ERRX(sess, "poll: bad fd"); + return 0; + } else if ((pfd.revents & POLLHUP)) { + ERRX(sess, "poll: hangup"); + return 0; + } else if ( ! (pfd.revents & POLLOUT)) { + ERRX(sess, "poll: unknown event"); + return 0; + } + + if ((wsz = write(fd, buf, bsz)) < 0) { + ERR(sess, "write"); + return 0; + } + + *sz = wsz; + return 1; +} + +/* + * Blocking write of the full size of the buffer. + * Returns 0 on failure, non-zero on success (all bytes written). + */ +static int +io_write_blocking(struct sess *sess, + int fd, const void *buf, size_t sz) +{ + size_t wsz; + int c; + + while (sz > 0) { + c = io_write_nonblocking(sess, fd, buf, sz, &wsz); + if ( ! c) { + ERRX1(sess, "io_write_nonblocking"); + return 0; + } else if (0 == wsz) { + ERRX(sess, "io_write_nonblocking: short write"); + return 0; + } + buf += wsz; + sz -= wsz; + } + + return 1; +} + +/* + * Write "buf" of size "sz" to non-blocking descriptor. + * Returns zero on failure, non-zero on success (all bytes written to + * the descriptor). + */ +int +io_write_buf(struct sess *sess, int fd, const void *buf, size_t sz) +{ + int32_t tag, tagbuf; + size_t wsz; + int c; + + if ( ! sess->mplex_writes) { + c = io_write_blocking(sess, fd, buf, sz); + sess->total_write += sz; + return c; + } + + while (sz > 0) { + wsz = sz & 0xFFFFFF; + tag = (7 << 24) + wsz; + tagbuf = htole32(tag); + if ( ! io_write_blocking(sess, fd, &tagbuf, sizeof(tagbuf))) { + ERRX1(sess, "io_write_blocking"); + return 0; + } + if ( ! io_write_blocking(sess, fd, buf, wsz)) { + ERRX1(sess, "io_write_blocking"); + return 0; + } + sess->total_write += wsz; + sz -= wsz; + buf += wsz; + } + + return 1; +} + +/* + * Write "line" (NUL-terminated) followed by a newline. + * Returns zero on failure, non-zero on succcess. + */ +int +io_write_line(struct sess *sess, int fd, const char *line) +{ + + if ( ! io_write_buf(sess, fd, line, strlen(line))) + ERRX1(sess, "io_write_buf"); + else if ( ! io_write_byte(sess, fd, '\n')) + ERRX1(sess, "io_write_byte"); + else + return 1; + + return 0; +} + +/* + * Read buffer from non-blocking descriptor. + * Returns zero on failure, non-zero on success (zero or more bytes). + */ +static int +io_read_nonblocking(struct sess *sess, + int fd, void *buf, size_t bsz, size_t *sz) +{ + struct pollfd pfd; + ssize_t rsz; + + *sz = 0; + + if (0 == bsz) + return 1; + + pfd.fd = fd; + pfd.events = POLLIN; + + if (poll(&pfd, 1, INFTIM) < 0) { + ERR(sess, "poll"); + return 0; + } + if ((pfd.revents & (POLLERR|POLLNVAL))) { + ERRX(sess, "poll: bad fd"); + return 0; + } else if ( ! (pfd.revents & (POLLIN|POLLHUP))) { + ERRX(sess, "poll: unknown event"); + return 0; + } + + if ((rsz = read(fd, buf, bsz)) < 0) { + ERR(sess, "read"); + return 0; + } else if (0 == rsz) { + ERRX(sess, "unexpected end of file"); + return 0; + } + + *sz = rsz; + return 1; +} + +/* + * Blocking read of the full size of the buffer. + * This can be called from either the error type message or a regular + * message---or for that matter, multiplexed or not. + * Returns 0 on failure, non-zero on success (all bytes read). + */ +static int +io_read_blocking(struct sess *sess, + int fd, void *buf, size_t sz) +{ + size_t rsz; + int c; + + while (sz > 0) { + c = io_read_nonblocking(sess, fd, buf, sz, &rsz); + if ( ! c) { + ERRX1(sess, "io_read_nonblocking"); + return 0; + } else if (0 == rsz) { + ERRX(sess, "io_read_nonblocking: short read"); + return 0; + } + buf += rsz; + sz -= rsz; + } + + return 1; +} + +/* + * When we do a lot of writes in a row (such as when the sender emits + * the file list), the server might be sending us multiplexed log + * messages. + * If it sends too many, it clogs the socket. + * This function looks into the read buffer and clears out any log + * messages pending. + * If called when there are valid data reads available, this function + * does nothing. + * Returns zero on failure, non-zero on success. + */ +int +io_read_flush(struct sess *sess, int fd) +{ + int32_t tagbuf, tag; + char mpbuf[1024]; + + if (sess->mplex_read_remain) + return 1; + + /* + * First, read the 4-byte multiplex tag. + * The first byte is the tag identifier (7 for normal + * data, !7 for out-of-band data), the last three are + * for the remaining data size. + */ + + if ( ! io_read_blocking(sess, fd, &tagbuf, sizeof(tagbuf))) { + ERRX1(sess, "io_read_blocking"); + return 0; + } + tag = le32toh(tagbuf); + sess->mplex_read_remain = tag & 0xFFFFFF; + tag >>= 24; + if (7 == tag) + return 1; + + tag -= 7; + + if (sess->mplex_read_remain > sizeof(mpbuf)) { + ERRX(sess, "multiplex buffer overflow"); + return 0; + } else if (0 == sess->mplex_read_remain) + return 1; + + if ( ! io_read_blocking(sess, fd, + mpbuf, sess->mplex_read_remain)) { + ERRX1(sess, "io_read_blocking"); + return 0; + } + if ('\n' == mpbuf[sess->mplex_read_remain - 1]) + mpbuf[--sess->mplex_read_remain] = '\0'; + + /* + * Always print the server's messages, as the server + * will control its own log levelling. + */ + + LOG0(sess, "%.*s", (int)sess->mplex_read_remain, mpbuf); + sess->mplex_read_remain = 0; + + /* + * I only know that a tag of one means an error. + * This means that we should exit. + */ + + if (1 == tag) { + ERRX1(sess, "error from remote host"); + return 0; + } + return 1; +} + +/* + * Read buffer from non-blocking descriptor, possibly in multiplex read + * mode. + * Returns zero on failure, non-zero on success (all bytes read from + * the descriptor). + */ +int +io_read_buf(struct sess *sess, int fd, void *buf, size_t sz) +{ + size_t rsz; + int c; + + /* If we're not multiplexing, read directly. */ + + if ( ! sess->mplex_reads) { + assert(0 == sess->mplex_read_remain); + c = io_read_blocking(sess, fd, buf, sz); + sess->total_read += sz; + return c; + } + + while (sz > 0) { + /* + * First, check to see if we have any regular data + * hanging around waiting to be read. + * If so, read the lesser of that data and whatever + * amount we currently want. + */ + + if (sess->mplex_read_remain) { + rsz = sess->mplex_read_remain < sz ? + sess->mplex_read_remain : sz; + if ( ! io_read_blocking(sess, fd, buf, rsz)) { + ERRX1(sess, "io_read_blocking"); + return 0; + } + sz -= rsz; + sess->mplex_read_remain -= rsz; + buf += rsz; + sess->total_read += rsz; + continue; + } + + assert(0 == sess->mplex_read_remain); + if ( ! io_read_flush(sess, fd)) { + ERRX1(sess, "io_read_flush"); + return 0; + } + } + + return 1; +} + +int +io_write_long(struct sess *sess, int fd, int64_t val) +{ + int64_t nv; + + /* Short-circuit: send as an integer if possible. */ + + if (val <= INT32_MAX && val >= 0) + return io_write_int(sess, fd, (int32_t)val); + + /* Otherwise, pad with max integer, then send 64-bit. */ + + nv = htole64(val); + + if ( ! io_write_int(sess, fd, INT32_MAX)) + ERRX(sess, "io_write_int"); + else if ( ! io_write_buf(sess, fd, &nv, sizeof(int64_t))) + ERRX(sess, "io_write_buf"); + else + return 1; + + return 0; +} + +int +io_write_int(struct sess *sess, int fd, int32_t val) +{ + int32_t nv; + + nv = htole32(val); + + if ( ! io_write_buf(sess, fd, &nv, sizeof(int32_t))) { + ERRX(sess, "io_write_buf"); + return 0; + } + return 1; +} + +/* + * A simple assertion-protected memory copy from th einput "val" or size + * "valsz" into our buffer "buf", full size "buflen", position "bufpos". + * Increases our "bufpos" appropriately. + * This has no return value, but will assert() if the size of the buffer + * is insufficient for the new data. + */ +void +io_buffer_buf(struct sess *sess, void *buf, + size_t *bufpos, size_t buflen, const void *val, size_t valsz) +{ + + assert(*bufpos + valsz <= buflen); + memcpy(buf + *bufpos, val, valsz); + *bufpos += valsz; +} + +/* + * Converts "val" to LE prior to io_buffer_buf(). + */ +void +io_buffer_int(struct sess *sess, void *buf, + size_t *bufpos, size_t buflen, int32_t val) +{ + int32_t nv = htole32(val); + + io_buffer_buf(sess, buf, bufpos, + buflen, &nv, sizeof(int32_t)); +} + +int +io_read_ulong(struct sess *sess, int fd, uint64_t *val) +{ + int64_t oval; + + if ( ! io_read_long(sess, fd, &oval)) { + ERRX(sess, "io_read_int"); + return 0; + } else if (oval < 0) { + ERRX(sess, "io_read_size: negative value"); + return 1; + } + + *val = oval; + return 1; +} + +int +io_read_long(struct sess *sess, int fd, int64_t *val) +{ + int64_t oval; + int32_t sval; + + /* Start with the short-circuit: read as an int. */ + + if ( ! io_read_int(sess, fd, &sval)) { + ERRX(sess, "io_read_int"); + return 0; + } else if (INT32_MAX != sval) { + *val = sval; + return 1; + } + + /* If the int is maximal, read as 64 bits. */ + + if ( ! io_read_buf(sess, fd, &oval, sizeof(int64_t))) { + ERRX(sess, "io_read_buf"); + return 0; + } + + *val = le64toh(oval); + return 1; +} + +/* + * One thing we often need to do is read a size_t. + * These are transmitted as int32_t, so make sure that the value + * transmitted is not out of range. + * FIXME: I assume that size_t can handle int32_t's max. + */ +int +io_read_size(struct sess *sess, int fd, size_t *val) +{ + int32_t oval; + + if ( ! io_read_int(sess, fd, &oval)) { + ERRX(sess, "io_read_int"); + return 0; + } else if (oval < 0) { + ERRX(sess, "io_read_size: negative value"); + return 0; + } + + *val = oval; + return 1; +} + +int +io_read_int(struct sess *sess, int fd, int32_t *val) +{ + int32_t oval; + + if ( ! io_read_buf(sess, fd, &oval, sizeof(int32_t))) { + ERRX(sess, "io_read_buf"); + return 0; + } + + *val = le32toh(oval); + return 1; +} + +/* + * Copies "valsz" from "buf", full size "bufsz" at position" bufpos", + * into "val". + * Calls assert() if the source doesn't have enough data. + * Increases "bufpos" to the new position. + */ +void +io_unbuffer_buf(struct sess *sess, const void *buf, + size_t *bufpos, size_t bufsz, void *val, size_t valsz) +{ + + assert(*bufpos + valsz <= bufsz); + memcpy(val, buf + *bufpos, valsz); + *bufpos += valsz; +} + +/* + * Calls io_unbuffer_buf() and converts from LE. + */ +void +io_unbuffer_int(struct sess *sess, const void *buf, + size_t *bufpos, size_t bufsz, int32_t *val) +{ + int32_t oval; + + io_unbuffer_buf(sess, buf, bufpos, + bufsz, &oval, sizeof(int32_t)); + *val = le32toh(oval); +} + +int +io_unbuffer_size(struct sess *sess, const void *buf, + size_t *bufpos, size_t bufsz, size_t *val) +{ + int32_t oval; + + io_unbuffer_int(sess, buf, bufpos, bufsz, &oval); + if (oval < 0) { + ERRX(sess, "io_unbuffer_size: negative value"); + return 0; + } + *val = oval; + return 1; +} + +int +io_read_byte(struct sess *sess, int fd, uint8_t *val) +{ + + if ( ! io_read_buf(sess, fd, val, sizeof(uint8_t))) { + ERRX(sess, "io_read_buf"); + return 0; + } + return 1; +} + +int +io_write_byte(struct sess *sess, int fd, uint8_t val) +{ + + if ( ! io_write_buf(sess, fd, &val, sizeof(uint8_t))) { + ERRX(sess, "io_write_buf"); + return 0; + } + return 1; +} + diff --git a/usr.bin/rsync/log.c b/usr.bin/rsync/log.c new file mode 100644 index 00000000000..093264e3bb4 --- /dev/null +++ b/usr.bin/rsync/log.c @@ -0,0 +1,194 @@ +/* $Id: log.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <errno.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "extern.h" + +/* + * Log a message at level "level", starting at zero, which corresponds + * to the current verbosity level opts->verbose (whose verbosity starts + * at one). + */ +void +rsync_log(struct sess *sess, const char *fname, + size_t line, int level, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + + if (sess->opts->verbose < level + 1) + return; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + if (level <= 0 && NULL != buf) + fprintf(stderr, "%s\n", buf); + else if (level > 0) + fprintf(stderr, "%s:%zu%s%s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : ""); + free(buf); +} + +/* + * This reports an error---not a warning. + * However, it is not like errx(3) in that it does not exit. + */ +void +rsync_errx(struct sess *sess, const char *fname, + size_t line, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + fprintf(stderr, "%s:%zu: error%s%s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : ""); + free(buf); +} + +/* + * This reports an error---not a warning. + * However, it is not like err(3) in that it does not exit. + */ +void +rsync_err(struct sess *sess, const char *fname, + size_t line, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + int er = errno; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + fprintf(stderr, "%s:%zu: error%s%s: %s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : "", strerror(er)); + free(buf); +} + +/* + * Prints a non-terminal error message, that is, when reporting on the + * chain of functions from which the actual warning occurred. + */ +void +rsync_errx1(struct sess *sess, const char *fname, + size_t line, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + + if (sess->opts->verbose < 1) + return; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + fprintf(stderr, "%s:%zu: error%s%s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : ""); + free(buf); +} + +/* + * Prints a warning message. + */ +void +rsync_warnx(struct sess *sess, const char *fname, + size_t line, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + fprintf(stderr, "%s:%zu: warning%s%s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : ""); + free(buf); +} + +/* + * Prints a warning with an errno. + * It uses a level detector for when to inhibit printing. + */ +void +rsync_warn(struct sess *sess, int level, + const char *fname, size_t line, const char *fmt, ...) +{ + char *buf = NULL; + va_list ap; + int er = errno; + + if (sess->opts->verbose < level) + return; + + if (NULL != fmt) { + va_start(ap, fmt); + if (vasprintf(&buf, fmt, ap) < 0) { + va_end(ap); + return; + } + va_end(ap); + } + + fprintf(stderr, "%s:%zu: warning%s%s: %s\n", fname, line, + NULL != buf ? ": " : "", + NULL != buf ? buf : "", strerror(er)); + free(buf); +} diff --git a/usr.bin/rsync/main.c b/usr.bin/rsync/main.c new file mode 100644 index 00000000000..871922f2c15 --- /dev/null +++ b/usr.bin/rsync/main.c @@ -0,0 +1,453 @@ +/* $Id: main.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#include <assert.h> +#include <err.h> +#include <getopt.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +static void +fargs_free(struct fargs *p) +{ + size_t i; + + if (NULL == p) + return; + + if (NULL != p->sources) + for (i = 0; i < p->sourcesz; i++) + free(p->sources[i]); + + free(p->sources); + free(p->sink); + free(p->host); + free(p); +} + +/* + * A remote host is has a colon before the first path separator. + * This works for rsh remote hosts (host:/foo/bar), implicit rsync + * remote hosts (host::/foo/bar), and explicit (rsync://host/foo). + * Return zero if local, non-zero if remote. + */ +static int +fargs_is_remote(const char *v) +{ + size_t pos; + + pos = strcspn(v, ":/"); + return ':' == v[pos]; +} + +/* + * Test whether a remote host is specifically an rsync daemon. + * Return zero if not, non-zero if so. + */ +static int +fargs_is_daemon(const char *v) +{ + size_t pos; + + if (0 == strncasecmp(v, "rsync://", 8)) + return 1; + + pos = strcspn(v, ":/"); + return ':' == v[pos] && ':' == v[pos + 1]; +} + +/* + * Take the command-line filenames (e.g., rsync foo/ bar/ baz/) and + * determine our operating mode. + * For example, if the first argument is a remote file, this means that + * we're going to transfer from the remote to the local. + * We also make sure that the arguments are consistent, that is, if + * we're going to transfer from the local to the remote, that no + * filenames for the local transfer indicate remote hosts. + * Always returns the parsed and sanitised options. + */ +static struct fargs * +fargs_parse(size_t argc, char *argv[]) +{ + struct fargs *f = NULL; + char *cp; + size_t i, j, len = 0; + + /* Allocations. */ + + if (NULL == (f = calloc(1, sizeof(struct fargs)))) + err(EXIT_FAILURE, "calloc"); + + f->sourcesz = argc - 1; + if (NULL == (f->sources = calloc(f->sourcesz, sizeof(char *)))) + err(EXIT_FAILURE, "calloc"); + + for (i = 0; i < argc - 1; i++) + if (NULL == (f->sources[i] = strdup(argv[i]))) + err(EXIT_FAILURE, "strdup"); + + if (NULL == (f->sink = strdup(argv[i]))) + err(EXIT_FAILURE, "strdup"); + + /* + * Test files for its locality. + * If the last is a remote host, then we're sending from the + * local to the remote host ("sender" mode). + * If the first, remote to local ("receiver" mode). + * If neither, a local transfer in sender style. + */ + + f->mode = FARGS_SENDER; + + if (fargs_is_remote(f->sink)) { + f->mode = FARGS_SENDER; + if (NULL == (f->host = strdup(f->sink))) + err(EXIT_FAILURE, "strdup"); + } + + if (fargs_is_remote(f->sources[0])) { + if (NULL != f->host) + errx(EXIT_FAILURE, "both source and " + "destination cannot be remote files"); + f->mode = FARGS_RECEIVER; + if (NULL == (f->host = strdup(f->sources[0]))) + err(EXIT_FAILURE, "strdup"); + } + + if (NULL != f->host) { + if (0 == strncasecmp(f->host, "rsync://", 8)) { + /* rsync://host/module[/path] */ + f->remote = 1; + len = strlen(f->host) - 8 + 1; + memmove(f->host, f->host + 8, len); + if (NULL == (cp = strchr(f->host, '/'))) + errx(EXIT_FAILURE, "rsync protocol " + "requires a module name"); + *cp++ = '\0'; + f->module = cp; + if (NULL != (cp = strchr(f->module, '/'))) + *cp = '\0'; + } else { + /* host:[/path] */ + cp = strchr(f->host, ':'); + assert(NULL != cp); + *cp++ = '\0'; + if (':' == *cp) { + /* host::module[/path] */ + f->remote = 1; + f->module = ++cp; + cp = strchr(f->module, '/'); + if (NULL != cp) + *cp = '\0'; + } + } + if (0 == (len = strlen(f->host))) + errx(EXIT_FAILURE, "empty remote host"); + if (f->remote && 0 == strlen(f->module)) + errx(EXIT_FAILURE, "empty remote module"); + } + + /* Make sure we have the same "hostspec" for all files. */ + + if ( ! f->remote) { + if (FARGS_SENDER == f->mode) + for (i = 0; i < f->sourcesz; i++) { + if ( ! fargs_is_remote(f->sources[i])) + continue; + errx(EXIT_FAILURE, "remote file in " + "list of local sources: %s", + f->sources[i]); + } + if (FARGS_RECEIVER == f->mode) + for (i = 0; i < f->sourcesz; i++) { + if (fargs_is_remote(f->sources[i]) && + ! fargs_is_daemon(f->sources[i])) + continue; + if (fargs_is_daemon(f->sources[i])) + errx(EXIT_FAILURE, "remote " + "daemon in list of " + "remote sources: %s", + f->sources[i]); + errx(EXIT_FAILURE, "local file in " + "list of remote sources: %s", + f->sources[i]); + } + } else { + if (FARGS_RECEIVER != f->mode) + errx(EXIT_FAILURE, "sender mode for remote " + "daemon receivers not yet supported"); + for (i = 0; i < f->sourcesz; i++) { + if (fargs_is_daemon(f->sources[i])) + continue; + errx(EXIT_FAILURE, "non-remote daemon file " + "in list of remote daemon sources: " + "%s", f->sources[i]); + } + } + + /* + * If we're not remote and a sender, strip our hostname. + * Then exit if we're a sender or a local connection. + */ + + if ( ! f->remote) { + if (NULL == f->host) + return f; + if (FARGS_SENDER == f->mode) { + assert(NULL != f->host); + assert(len > 0); + j = strlen(f->sink); + memmove(f->sink, f->sink + len + 1, j - len); + return f; + } else if (FARGS_RECEIVER != f->mode) + return f; + } + + /* + * Now strip the hostnames from the remote host. + * rsync://host/module/path -> module/path + * host::module/path -> module/path + * host:path -> path + * Also make sure that the remote hosts are the same. + */ + + assert(NULL != f->host); + assert(len > 0); + + for (i = 0; i < f->sourcesz; i++) { + cp = f->sources[i]; + j = strlen(cp); + if (f->remote && + 0 == strncasecmp(cp, "rsync://", 8)) { + /* rsync://path */ + cp += 8; + if (strncmp(cp, f->host, len) || + ('/' != cp[len] && '\0' != cp[len])) + errx(EXIT_FAILURE, "different remote " + "host: %s", f->sources[i]); + memmove(f->sources[i], + f->sources[i] + len + 8 + 1, + j - len - 8); + } else if (f->remote && 0 == strncmp(cp, "::", 2)) { + /* ::path */ + memmove(f->sources[i], + f->sources[i] + 2, j - 1); + } else if (f->remote) { + /* host::path */ + if (strncmp(cp, f->host, len) || + (':' != cp[len] && '\0' != cp[len])) + errx(EXIT_FAILURE, "different remote " + "host: %s", f->sources[i]); + memmove(f->sources[i], + f->sources[i] + len + 2, + j - len - 1); + } else if (':' == cp[0]) { + /* :path */ + memmove(f->sources[i], f->sources[i] + 1, j); + } else { + /* host:path */ + if (strncmp(cp, f->host, len) || + (':' != cp[len] && '\0' != cp[len])) + errx(EXIT_FAILURE, "different remote " + "host: %s", f->sources[i]); + memmove(f->sources[i], + f->sources[i] + len + 1, j - len); + } + } + + return f; +} + +int +main(int argc, char *argv[]) +{ + struct opts opts; + pid_t child; + int fds[2], flags, c, st; + struct fargs *fargs; + struct option lopts[] = { + { "delete", no_argument, &opts.del, 1 }, + { "rsync-path", required_argument, NULL, 1 }, + { "sender", no_argument, &opts.sender, 1 }, + { "server", no_argument, &opts.server, 1 }, + { NULL, 0, NULL, 0 }}; + + /* Global pledge. */ + + if (-1 == pledge("dns inet unveil exec stdio rpath wpath cpath proc fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + + memset(&opts, 0, sizeof(struct opts)); + + for (;;) { + c = getopt_long(argc, argv, "e:lnprtv", lopts, NULL); + if (-1 == c) + break; + switch (c) { + case 'e': + /* Ignore. */ + break; + case 'l': + opts.preserve_links = 1; + break; + case 'n': + opts.dry_run = 1; + break; + case 'p': + opts.preserve_perms = 1; + break; + case 'r': + opts.recursive = 1; + break; + case 't': + opts.preserve_times = 1; + break; + case 'v': + opts.verbose++; + break; + case 0: + /* Non-NULL flag values (e.g., --sender). */ + break; + case 1: + opts.rsync_path = optarg; + break; + default: + goto usage; + } + } + + argc -= optind; + argv += optind; + + /* FIXME: reference implementation rsync accepts this. */ + + if (argc < 2) + goto usage; + + /* + * This is what happens when we're started with the "hidden" + * --server option, which is invoked for the rsync on the remote + * host by the parent. + */ + + if (opts.server) { + if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + c = rsync_server(&opts, (size_t)argc, argv); + return c ? EXIT_SUCCESS : EXIT_FAILURE; + } + + /* + * Now we know that we're the client on the local machine + * invoking rsync(1). + * At this point, we need to start the client and server + * initiation logic. + * The client is what we continue running on this host; the + * server is what we'll use to connect to the remote and + * invoke rsync with the --server option. + */ + + fargs = fargs_parse(argc, argv); + assert(NULL != fargs); + + /* + * If we're contacting an rsync:// daemon, then we don't need to + * fork, because we won't start a server ourselves. + * Route directly into the socket code, in that case. + */ + + if (fargs->remote) { + assert(FARGS_RECEIVER == fargs->mode); + if (-1 == pledge("dns inet unveil stdio rpath wpath cpath fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + c = rsync_socket(&opts, fargs); + fargs_free(fargs); + return c ? EXIT_SUCCESS : EXIT_FAILURE; + } + + /* Drop the dns/inet possibility. */ + + if (-1 == pledge("unveil exec stdio rpath wpath cpath proc fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + + /* Create a bidirectional socket and start our child. */ + + flags = SOCK_STREAM | SOCK_NONBLOCK; + + if (-1 == socketpair(AF_UNIX, flags, 0, fds)) + err(EXIT_FAILURE, "socketpair"); + + if (-1 == (child = fork())) { + close(fds[0]); + close(fds[1]); + err(EXIT_FAILURE, "fork"); + } + + /* Drop the fork possibility. */ + + if (-1 == pledge("unveil exec stdio rpath wpath cpath fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + + if (0 == child) { + close(fds[0]); + fds[0] = -1; + if (-1 == pledge("exec stdio", NULL)) + err(EXIT_FAILURE, "pledge"); + rsync_child(&opts, fds[1], fargs); + /* NOTREACHED */ + } + + close(fds[1]); + fds[1] = -1; + if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) + err(EXIT_FAILURE, "pledge"); + c = rsync_client(&opts, fds[0], fargs); + fargs_free(fargs); + + /* + * If the client has an error and exits, the server may be + * sitting around waiting to get data while we waitpid(). + * So close the connection here so that they don't hang. + */ + + if ( ! c) { + close(fds[0]); + fds[0] = -1; + } + + if (-1 == waitpid(child, &st, 0)) + err(EXIT_FAILURE, "waitpid"); + if ( ! (WIFEXITED(st) && EXIT_SUCCESS == WEXITSTATUS(st))) + c = 0; + + if (-1 != fds[0]) + close(fds[0]); + return c ? EXIT_SUCCESS : EXIT_FAILURE; +usage: + fprintf(stderr, "usage: %s [-lnprtv] " + "[--delete] [--rsync-path=prog] src ... dst\n", + getprogname()); + return EXIT_FAILURE; +} diff --git a/usr.bin/rsync/md4.c b/usr.bin/rsync/md4.c new file mode 100644 index 00000000000..3641f03ff64 --- /dev/null +++ b/usr.bin/rsync/md4.c @@ -0,0 +1,265 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD4 Message-Digest Algorithm (RFC 1320). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md4 + * + * Author: + * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ +#include <string.h> + +#include "md4.h" + +/* + * The basic MD4 functions. + * + * F and G are optimized compared to their RFC 1320 definitions, with the + * optimization for F borrowed from Colin Plumb's MD5 implementation. + */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & ((y) | (z))) | ((y) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +/* + * The MD4 transformation for all three rounds. + */ +#define STEP(f, a, b, c, d, x, s) \ + (a) += f((b), (c), (d)) + (x); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); + +/* + * SET reads 4 input bytes in little-endian byte order and stores them in a + * properly aligned word in host byte order. + * + * The check for little-endian architectures that tolerate unaligned memory + * accesses is just an optimization. Nothing will break if it fails to detect + * a suitable architecture. + * + * Unfortunately, this optimization may be a C strict aliasing rules violation + * if the caller's data buffer has effective type that cannot be aliased by + * MD4_u32plus. In practice, this problem may occur if these MD4 routines are + * inlined into a calling function, or with future and dangerously advanced + * link-time optimizations. For the time being, keeping these MD4 routines in + * their own translation unit avoids the problem. + */ +#if defined(__i386__) || defined(__x86_64__) || defined(__vax__) +#define SET(n) \ + (*(MD4_u32plus *)&ptr[(n) * 4]) +#define GET(n) \ + SET(n) +#else +#define SET(n) \ + (ctx->block[(n)] = \ + (MD4_u32plus)ptr[(n) * 4] | \ + ((MD4_u32plus)ptr[(n) * 4 + 1] << 8) | \ + ((MD4_u32plus)ptr[(n) * 4 + 2] << 16) | \ + ((MD4_u32plus)ptr[(n) * 4 + 3] << 24)) +#define GET(n) \ + (ctx->block[(n)]) +#endif + +/* + * This processes one or more 64-byte data blocks, but does NOT update the bit + * counters. There are no alignment requirements. + */ +static const void *body(MD4_CTX *ctx, const void *data, unsigned long size) +{ + const unsigned char *ptr; + MD4_u32plus a, b, c, d; + MD4_u32plus saved_a, saved_b, saved_c, saved_d; + const MD4_u32plus ac1 = 0x5a827999, ac2 = 0x6ed9eba1; + + ptr = (const unsigned char *)data; + + a = ctx->a; + b = ctx->b; + c = ctx->c; + d = ctx->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + +/* Round 1 */ + STEP(F, a, b, c, d, SET(0), 3) + STEP(F, d, a, b, c, SET(1), 7) + STEP(F, c, d, a, b, SET(2), 11) + STEP(F, b, c, d, a, SET(3), 19) + STEP(F, a, b, c, d, SET(4), 3) + STEP(F, d, a, b, c, SET(5), 7) + STEP(F, c, d, a, b, SET(6), 11) + STEP(F, b, c, d, a, SET(7), 19) + STEP(F, a, b, c, d, SET(8), 3) + STEP(F, d, a, b, c, SET(9), 7) + STEP(F, c, d, a, b, SET(10), 11) + STEP(F, b, c, d, a, SET(11), 19) + STEP(F, a, b, c, d, SET(12), 3) + STEP(F, d, a, b, c, SET(13), 7) + STEP(F, c, d, a, b, SET(14), 11) + STEP(F, b, c, d, a, SET(15), 19) + +/* Round 2 */ + STEP(G, a, b, c, d, GET(0) + ac1, 3) + STEP(G, d, a, b, c, GET(4) + ac1, 5) + STEP(G, c, d, a, b, GET(8) + ac1, 9) + STEP(G, b, c, d, a, GET(12) + ac1, 13) + STEP(G, a, b, c, d, GET(1) + ac1, 3) + STEP(G, d, a, b, c, GET(5) + ac1, 5) + STEP(G, c, d, a, b, GET(9) + ac1, 9) + STEP(G, b, c, d, a, GET(13) + ac1, 13) + STEP(G, a, b, c, d, GET(2) + ac1, 3) + STEP(G, d, a, b, c, GET(6) + ac1, 5) + STEP(G, c, d, a, b, GET(10) + ac1, 9) + STEP(G, b, c, d, a, GET(14) + ac1, 13) + STEP(G, a, b, c, d, GET(3) + ac1, 3) + STEP(G, d, a, b, c, GET(7) + ac1, 5) + STEP(G, c, d, a, b, GET(11) + ac1, 9) + STEP(G, b, c, d, a, GET(15) + ac1, 13) + +/* Round 3 */ + STEP(H, a, b, c, d, GET(0) + ac2, 3) + STEP(H, d, a, b, c, GET(8) + ac2, 9) + STEP(H, c, d, a, b, GET(4) + ac2, 11) + STEP(H, b, c, d, a, GET(12) + ac2, 15) + STEP(H, a, b, c, d, GET(2) + ac2, 3) + STEP(H, d, a, b, c, GET(10) + ac2, 9) + STEP(H, c, d, a, b, GET(6) + ac2, 11) + STEP(H, b, c, d, a, GET(14) + ac2, 15) + STEP(H, a, b, c, d, GET(1) + ac2, 3) + STEP(H, d, a, b, c, GET(9) + ac2, 9) + STEP(H, c, d, a, b, GET(5) + ac2, 11) + STEP(H, b, c, d, a, GET(13) + ac2, 15) + STEP(H, a, b, c, d, GET(3) + ac2, 3) + STEP(H, d, a, b, c, GET(11) + ac2, 9) + STEP(H, c, d, a, b, GET(7) + ac2, 11) + STEP(H, b, c, d, a, GET(15) + ac2, 15) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (size -= 64); + + ctx->a = a; + ctx->b = b; + ctx->c = c; + ctx->d = d; + + return ptr; +} + +void MD4_Init(MD4_CTX *ctx) +{ + ctx->a = 0x67452301; + ctx->b = 0xefcdab89; + ctx->c = 0x98badcfe; + ctx->d = 0x10325476; + + ctx->lo = 0; + ctx->hi = 0; +} + +void MD4_Update(MD4_CTX *ctx, const void *data, unsigned long size) +{ + MD4_u32plus saved_lo; + unsigned long used, available; + + saved_lo = ctx->lo; + if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) + ctx->hi++; + ctx->hi += size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + available = 64 - used; + + if (size < available) { + memcpy(&ctx->buffer[used], data, size); + return; + } + + memcpy(&ctx->buffer[used], data, available); + data = (const unsigned char *)data + available; + size -= available; + body(ctx, ctx->buffer, 64); + } + + if (size >= 64) { + data = body(ctx, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(ctx->buffer, data, size); +} + +#define OUT(dst, src) \ + (dst)[0] = (unsigned char)(src); \ + (dst)[1] = (unsigned char)((src) >> 8); \ + (dst)[2] = (unsigned char)((src) >> 16); \ + (dst)[3] = (unsigned char)((src) >> 24); + +void MD4_Final(unsigned char *result, MD4_CTX *ctx) +{ + unsigned long used, available; + + used = ctx->lo & 0x3f; + + ctx->buffer[used++] = 0x80; + + available = 64 - used; + + if (available < 8) { + memset(&ctx->buffer[used], 0, available); + body(ctx, ctx->buffer, 64); + used = 0; + available = 64; + } + + memset(&ctx->buffer[used], 0, available - 8); + + ctx->lo <<= 3; + OUT(&ctx->buffer[56], ctx->lo) + OUT(&ctx->buffer[60], ctx->hi) + + body(ctx, ctx->buffer, 64); + + OUT(&result[0], ctx->a) + OUT(&result[4], ctx->b) + OUT(&result[8], ctx->c) + OUT(&result[12], ctx->d) + + memset(ctx, 0, sizeof(*ctx)); +} diff --git a/usr.bin/rsync/md4.h b/usr.bin/rsync/md4.h new file mode 100644 index 00000000000..8a6a0021ca7 --- /dev/null +++ b/usr.bin/rsync/md4.h @@ -0,0 +1,49 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD4 Message-Digest Algorithm (RFC 1320). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md4 + * + * Author: + * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See md4.c for more information. + */ + +#ifndef MD4_H +#define MD4_H + +#define MD4_DIGEST_LENGTH 16 + +/* Any 32-bit or wider unsigned integer data type will do */ +typedef unsigned int MD4_u32plus; + +typedef struct { + MD4_u32plus lo, hi; + MD4_u32plus a, b, c, d; + unsigned char buffer[64]; + MD4_u32plus block[16]; +} MD4_CTX; + +__BEGIN_DECLS + +extern void MD4_Init(MD4_CTX *ctx); +extern void MD4_Update(MD4_CTX *ctx, const void *data, unsigned long size); +extern void MD4_Final(unsigned char *result, MD4_CTX *ctx); + +__END_DECLS + +#endif diff --git a/usr.bin/rsync/mkpath.c b/usr.bin/rsync/mkpath.c new file mode 100644 index 00000000000..8dc44e544b5 --- /dev/null +++ b/usr.bin/rsync/mkpath.c @@ -0,0 +1,77 @@ +/* $OpenBSD: mkpath.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> + +#include "extern.h" + +/* Code taken directly from mkdir(1). + + * mkpath -- create directories. + * path - path + */ +int +mkpath(struct sess *sess, char *path) +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || (mkdir(path, 0777) && + errno != EEXIST)) { + ERR(sess, "%s: stat", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + errno = ENOTDIR; + ERR(sess, "%s: stat", path); + return (-1); + } + + *slash = '/'; + } + + return (0); +} + diff --git a/usr.bin/rsync/receiver.c b/usr.bin/rsync/receiver.c new file mode 100644 index 00000000000..8bc4779274e --- /dev/null +++ b/usr.bin/rsync/receiver.c @@ -0,0 +1,341 @@ +/* $Id: receiver.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ + +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "extern.h" + +enum pfdt { + PFD_SENDER_IN = 0, /* input from the sender */ + PFD_UPLOADER_IN, /* uploader input from a local file */ + PFD_DOWNLOADER_IN, /* downloader input from a local file */ + PFD_SENDER_OUT, /* output to the sender */ + PFD__MAX +}; + +/* + * Pledges: unveil, rpath, cpath, wpath, stdio, fattr. + * Pledges (dry-run): -cpath, -wpath, -fattr. + */ +int +rsync_receiver(struct sess *sess, + int fdin, int fdout, const char *root) +{ + struct flist *fl = NULL, *dfl = NULL; + size_t i, flsz = 0, dflsz = 0, excl; + char *tofree; + int rc = 0, dfd = -1, phase = 0, c; + int32_t ioerror; + struct pollfd pfd[PFD__MAX]; + struct download *dl = NULL; + struct upload *ul = NULL; + mode_t oumask; + + if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) { + ERR(sess, "pledge"); + goto out; + } + + /* Client sends zero-length exclusions. */ + + if ( ! sess->opts->server && + ! io_write_int(sess, fdout, 0)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + if (sess->opts->server && sess->opts->del) { + if ( ! io_read_size(sess, fdin, &excl)) { + ERRX1(sess, "io_read_size"); + goto out; + } else if (0 != excl) { + ERRX(sess, "exclusion list is non-empty"); + goto out; + } + } + + /* + * Start by receiving the file list and our mystery number. + * These we're going to be touching on our local system. + */ + + if ( ! flist_recv(sess, fdin, &fl, &flsz)) { + ERRX1(sess, "flist_recv"); + goto out; + } + + /* The IO error is sent after the file list. */ + + if ( ! io_read_int(sess, fdin, &ioerror)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if (0 != ioerror) { + ERRX1(sess, "io_error is non-zero"); + goto out; + } + + if (0 == flsz && ! sess->opts->server) { + WARNX(sess, "receiver has empty file list: exiting"); + rc = 1; + goto out; + } else if ( ! sess->opts->server) + LOG1(sess, "Transfer starting: %zu files", flsz); + + LOG2(sess, "%s: receiver destination", root); + + /* + * Create the path for our destination directory, if we're not + * in dry-run mode (which would otherwise crash w/the pledge). + * This uses our current umask: we might set the permissions on + * this directory in post_dir(). + */ + + if ( ! sess->opts->dry_run) { + if (NULL == (tofree = strdup(root))) { + ERR(sess, "strdup"); + goto out; + } else if (mkpath(sess, tofree) < 0) { + ERRX1(sess, "%s: mkpath", root); + free(tofree); + goto out; + } + free(tofree); + } + + /* + * Disable umask() so we can set permissions fully. + * Then open the directory iff we're not in dry_run. + */ + + oumask = umask(0); + + if ( ! sess->opts->dry_run) { + dfd = open(root, O_RDONLY | O_DIRECTORY, 0); + if (-1 == dfd) { + ERR(sess, "%s: open", root); + goto out; + } + } + + /* + * Begin by conditionally getting all files we have currently + * available in our destination. + * XXX: THIS IS A BUG IN OPENBSD 6.4. + * For newer version of OpenBSD, this is safe to put after the + * unveil. + */ + + if (sess->opts->del && + sess->opts->recursive && + ! flist_gen_dels(sess, root, &dfl, &dflsz, fl, flsz)) { + ERRX1(sess, "flist_gen_local"); + goto out; + } + + /* + * Make our entire view of the file-system be limited to what's + * in the root directory. + * This prevents us from accidentally (or "under the influence") + * writing into other parts of the file-system. + */ + + if (-1 == unveil(root, "rwc")) { + ERR(sess, "%s: unveil", root); + goto out; + } else if (-1 == unveil(NULL, NULL)) { + ERR(sess, "%s: unveil", root); + goto out; + } + + /* If we have a local set, go for the deletion. */ + + if ( ! flist_del(sess, dfd, dfl, dflsz)) { + ERRX1(sess, "flist_del"); + goto out; + } + + /* Initialise poll events to listen from the sender. */ + + pfd[PFD_SENDER_IN].fd = fdin; + pfd[PFD_UPLOADER_IN].fd = -1; + pfd[PFD_DOWNLOADER_IN].fd = -1; + pfd[PFD_SENDER_OUT].fd = fdout; + + pfd[PFD_SENDER_IN].events = POLLIN; + pfd[PFD_UPLOADER_IN].events = POLLIN; + pfd[PFD_DOWNLOADER_IN].events = POLLIN; + pfd[PFD_SENDER_OUT].events = POLLOUT; + + ul = upload_alloc(sess, dfd, fdout, + CSUM_LENGTH_PHASE1, fl, flsz, oumask); + if (NULL == ul) { + ERRX1(sess, "upload_alloc"); + goto out; + } + + dl = download_alloc(sess, fdin, fl, flsz, dfd); + if (NULL == dl) { + ERRX1(sess, "download_alloc"); + goto out; + } + + LOG2(sess, "%s: ready for phase 1 data", root); + + for (;;) { + if (-1 == (c = poll(pfd, PFD__MAX, INFTIM))) { + ERR(sess, "poll"); + goto out; + } + + for (i = 0; i < PFD__MAX; i++) + if (pfd[i].revents & (POLLERR|POLLNVAL)) { + ERRX(sess, "poll: bad fd"); + goto out; + } else if (pfd[i].revents & POLLHUP) { + ERRX(sess, "poll: hangup"); + goto out; + } + + /* + * If we have a read event and we're multiplexing, we + * might just have error messages in the pipe. + * It's important to flush these out so that we don't + * clog the pipe. + * Unset our polling status if there's nothing that + * remains in the pipe. + */ + + if (sess->mplex_reads && + (POLLIN & pfd[PFD_SENDER_IN].revents)) { + if ( ! io_read_flush(sess, fdin)) { + ERRX1(sess, "io_read_flush"); + goto out; + } else if (0 == sess->mplex_read_remain) + pfd[PFD_SENDER_IN].revents &= ~POLLIN; + } + + + /* + * We run the uploader if we have files left to examine + * (i < flsz) or if we have a file that we've opened and + * is read to mmap. + */ + + if ((POLLIN & pfd[PFD_UPLOADER_IN].revents) || + (POLLOUT & pfd[PFD_SENDER_OUT].revents)) { + c = rsync_uploader(ul, + &pfd[PFD_UPLOADER_IN].fd, + sess, &pfd[PFD_SENDER_OUT].fd); + if (c < 0) { + ERRX1(sess, "rsync_uploader"); + goto out; + } + } + + /* + * We need to run the downloader when we either have + * read events from the sender or an asynchronous local + * open is ready. + * XXX: we don't disable PFD_SENDER_IN like with the + * uploader because we might stop getting error + * messages, which will otherwise clog up the pipes. + */ + + if ((POLLIN & pfd[PFD_SENDER_IN].revents) || + (POLLIN & pfd[PFD_DOWNLOADER_IN].revents)) { + c = rsync_downloader(dl, sess, + &pfd[PFD_DOWNLOADER_IN].fd); + if (c < 0) { + ERRX1(sess, "rsync_downloader"); + goto out; + } else if (0 == c) { + assert(0 == phase); + phase++; + LOG2(sess, "%s: receiver ready " + "for phase 2 data", root); + break; + } + + /* + * FIXME: if we have any errors during the + * download, most notably files getting out of + * sync between the send and the receiver, then + * here we should bump our checksum length and + * go into the second phase. + */ + } + } + + /* Properly close us out by progressing through the phases. */ + + if (1 == phase) { + if ( ! io_write_int(sess, fdout, -1)) { + ERRX1(sess, "io_write_int"); + goto out; + } else if ( ! io_read_int(sess, fdin, &ioerror)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if (-1 != ioerror) { + ERRX(sess, "expected phase ack"); + goto out; + } + } + + /* + * Now all of our transfers are complete, so we can fix up our + * directory permissions. + */ + + if ( ! rsync_uploader_tail(ul, sess)) { + ERRX1(sess, "rsync_uploader_tail"); + goto out; + } + + /* Process server statistics and say good-bye. */ + + if ( ! sess_stats_recv(sess, fdin)) { + ERRX1(sess, "sess_stats_recv"); + goto out; + } else if ( ! io_write_int(sess, fdout, -1)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + LOG2(sess, "receiver finished updating"); + rc = 1; +out: + if (-1 != dfd) + close(dfd); + upload_free(ul); + download_free(dl); + flist_free(fl, flsz); + flist_free(dfl, dflsz); + return rc; +} diff --git a/usr.bin/rsync/rsync.1 b/usr.bin/rsync/rsync.1 new file mode 100644 index 00000000000..1a727ec776e --- /dev/null +++ b/usr.bin/rsync/rsync.1 @@ -0,0 +1,213 @@ +.\" $OpenBSD: rsync.1,v 1.1 2019/02/10 23:18:28 benno Exp $ +.\" +.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: February 10 2019 $ +.Dt RSYNC 1 +.Os +.Sh NAME +.Nm rsync +.Nd synchronise local and remote files +.Sh SYNOPSIS +.Nm rsync +.Op Fl lnprtv +.Op Fl -delete +.Op Fl -rsync-path Ar prog +.Ar source ... +.Ar directory +.Sh DESCRIPTION +The +.Nm +utility synchronises files in the destination +.Ar directory +with one or more +.Ar source +files. +Either the +.Ar source +or the destination +.Ar directory +may be remote, +but not both. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl l +Transfer symbolic links. +The link is transferred as a standalone file: if the destination does +not exist, it will be broken. +.It Fl n +Dry-run mode. +Does not actually modify the destination. +.It Fl p +Set destination file or directory permissions to match the source when +it is updated. +.It Fl r +If +.Ar source +designates a directory, synchronise the directory and the entire subtree +connected at that point. +If +.Ar source +ends with a slash, only the subtree is synchronised, not the root +directory. +If +.Ar source +is a file, this has no effect. +.It Fl t +Set destination file and directory modification time to match the source +when it is updated or created. +.It Fl v +Increase verbosity. +Specify once for files being transferred, twice for specific status, +thrice for per-file transfer information, and four times for per-file +breakdowns. +.It Fl -delete +Delete files in +.Ar directory +not found in +.Ar source +directories. +Only applicable with +.Fl r . +.It Fl -rsync-path Ar prog +Run +.Ar prog +on the remote host instead of the default +.Ar rsync . +.El +.Pp +A remote +.Ar source +or +.Ar directory +has syntax +.Ar host:path +for connecting via +.Xr ssh 1 , +or +.Ar rsync://host/path +or +.Ar host::path +for connecting to a remote daemon. +Subsequent to the first remote +.Ar source , +the host may be dropped to become just +.Ar :path +or +.Ar ::path . +.Pp +For connecting to a remote daemon with +.Ar rsync://host +or +.Ar host::path , +the first path component is interpreted as a +.Qq module : +.Ar host::module/path . +This only applies to the first +.Ar source +invocation; subsequent to that, the module should not be specified. +.Pp +By default, new destination files and directories are given the current +time and the source file permissions. +Updated files retain their existing permissions. +It is an error if updated files have their file types change (e.g., +updating a directory with a file). +.Pp +At this time, +.Ar source +may only consist of regular files, directories +.Pq only with Fl r , +or symbolic links +.Pq only with Fl l . +The destination +.Ar directory +must be a directory and is created if not found. +.Pp +.Nm +is compatible with the GPL-licensed +.Xr rsync 1 +protocol version 27. +.\" The following requests should be uncommented and used where appropriate. +.\" .Sh CONTEXT +.\" For section 9 functions only. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, and 9 function return values only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, and 8 only. +.\" .Sh FILES +.\" .Sh EXIT STATUS +.\" For sections 1, 6, and 8 only. +.Sh EXAMPLES +All examples use +.Fl t +so that destination files inherit the source time. +If not changed, subsequent invocations of +.Nm +will then consider the file up to date and not transfer block hashes. +.Pp +To update the out-of-date remote files +.Pa host:dest/bar +and +.Pa host:dest/baz +with the local +.Pa ../src/bar +and +.Pa ../src/baz : +.Pp +.Dl % rsync -t ../src/bar ../src/baz host:dest +.Pp +To update the out-of-date local files +.Pa bar +and +.Pa baz +with the remote files +.Pa host:src/bar +and +.Pa host:src/baz : +.Pp +.Dl % rsync -t host:src/bar :src/baz \&. +.Pp +To update the out-of-date local files +.Pa ../dest/bar +and +.Pa ../dest/baz +with +.Pa bar +and +.Pa baz : +.Pp +.Dl % rsync -t bar baz ../dest +.Pp +To update the out-of-date remote files in +.Pa host:dest +on a remote host running +.Nm +with the local host running +.Xr rsync 1 : +.Pp +.Dl % rsync --rsync-path rsync -t ../dest/* host:dest +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only. +.\" .Sh ERRORS +.\" For sections 2, 3, 4, and 9 errno settings only. +.Sh SEE ALSO +.Xr ssh 1 , +.Xr rsync 5 , +.Xr rsyncd 5 +.\" .Sh STANDARDS +.\" .Sh HISTORY +.\" .Sh AUTHORS +.\" .Sh CAVEATS +.\" .Sh BUGS diff --git a/usr.bin/rsync/rsync.5 b/usr.bin/rsync/rsync.5 new file mode 100644 index 00000000000..5c56bc25528 --- /dev/null +++ b/usr.bin/rsync/rsync.5 @@ -0,0 +1,469 @@ +.\" $OpenBSD: rsync.5,v 1.1 2019/02/10 23:18:28 benno Exp $ +.\" +.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: February 10 2019 $ +.Dt RSYNC 5 +.Os +.Sh NAME +.Nm rsync +.Nd rsync wire protocol +.Sh DESCRIPTION +The +.Nm +protocol described in this relates to the BSD-licensed +.Xr openrsync 1 , +a re-implementation of the GPL-licensed reference utility +.Xr rsync 1 . +It is compatible with version 27 of the reference. +.Pp +In this document, the +.Qq client process +refers to the utility as run on the operator's local computer. +The +.Qq server process +is run either on the local or remote computer, depending upon the +command-line given file locations. +.Pp +There are a number of options in the protocol that are dictated by command-line +flags. +These will be noted as +.Fl n +for dry-run, +.Fl l +for links, +.Fl r +for recursion, +.Fl v +for verbose, and +.Fl -delete +for deletion (before). +.Ss Data types +The binary protocol encodes all data in little-endian format. +Integers are signed 32-bit, shorts are signed 16-bit, bytes are unsigned +8-bit. +A long is variable-length. +For values less than the maximum integer, the value is transmitted and +read as a 32-bit integer. +For values greater, the value is transmitted first as a maximum integer, +then a 64-bit signed integer. +.Pp +There are three types of checksums: long (slow), short (fast), and +whole-file. +The fast checksum is a derivative of Adler-32. +The slow checksum is MD4, +made over the checksum seed first (serialised in little-endian format), +then the data. +The whole-file applies MD4 to the file first, then the checksum seed at +the end (also serialised in little-endian format). +.Ss Multiplexing +Most +.Nm +transmissions are wrapped in a multiplexing envelope protocol. +It is composed as follows: +.Pp +.Bl -enum -compact +.It +envelope header (4 bytes) +.It +envelope payload (arbitrary length) +.El +.Pp +The first byte of the envelope header consists of a tag. +If the tag is 7, the payload is normal data. +Otherwise, the payload is out-of-band server messages. +If the tag is 1, it is an error on the sender's part and must trigger an +exit. +This limits message payloads to 24 bit integer size, +.Li 0x0fffffff . +.Pp +The only data not using this envelope are the initial handshake between +client and server. +.Ss File list +A central part of the protocol is the file list, which is generated by +the sender. +It consists of all files that must be sent to the receiver, either +explicitly as given or recursively generated. +.Pp +The file list itself consists of filenames and attributes (mode, time, +size, etc.). +Filenames must be relative to the destination root and not be absolute +or contain backtracking. +So if a file is given to the sender as +.Pa ../../foo/bar , +it must be sent as +.Pa foo/bar . +.Pp +The file list should be cleaned of inappropriate files prior to sending. +For example, if +.Fl l +is not specified, symbolic links may be omitted. +Directory entries without +.Fl r +may also be omitted. +Duplicates may be omitted. +.Pp +The receiver +.Em must not +assume that the file list is clean. +It should not omit inappropriate files from the file list (which would +affect the indexing), but may omit them during processing. +.Pp +Prior to be sent from sender to receiver, and upon being received, the +file list must be lexicographically sorted such as with +.Xr strcmp 3 . +Subsequent references to the file are by index in the sorted list. +.Ss Client process +The client can operate in sender or receiver mode depending upon the +command-line source and destination. +.Pp +If the destination directory (sink) is remote, the client is in sender +mode: the client will push its data to the server. +If the source file is remote, it is in receiver mode: the server pushes +to the client. +If neither are remote, the client operates in sender mode. +These are all mutually exclusive. +.Pp +When the client starts, regardless its mode, it first handshakes the +server. +This exchange is +.Em not +multiplexed. +.Pp +.Bl -enum -compact +.It +send local version (integer) +.It +receive remote version (integer) +.It +receive random seed (integer) +.El +.Pp +Following this, the client multiplexes when reading from the server. +Transmissions sent from client to server are not multiplexed. +It then enters the +.Sx Update exchange +protocol. +.Ss Server process +The server can operate in sender or receiver mode depending upon how the +client starts the server. +This may be directly from the parent process (when invoked for local +files) or indirectly via a remote shell. +.Pp +When in sender mode, the server pushes data to the client. +(This is equivalent to receiver mode for the client.) +In receiver, the opposite is true. +.Pp +When the server starts, regardless the mode, it first handshakes the +client. +This exchange is +.Em not +multiplexed. +.Pp +.Bl -enum -compact +.It +send local version (integer) +.It +receive remote version (integer) +.It +send random seed (integer) +.El +.Pp +Following this, the server multiplexes when writing to the client. +(Transmissions received from the client are not multiplexed.) +It then enters the +.Sx Update exchange +protocol. +.Ss Update exchange +When the client or server is in sender mode, it begins by conditionally +sending the exclusion list. +At this time, this is always empty. +.Pp +.Bl -enum -compact +.It +if +.Fl -delete +and the client, exclusion list zero (integer) +.El +.Pp +It then sends the +.Sx File list . +Prior to being sent, the file list should be lexicographically sorted. +.Pp +.Bl -enum -compact +.It +status byte (integer) +.It +inherited filename length (optional, byte) +.It +filename length (integer or byte) +.It +file (byte array) +.It +file length (long) +.It +file modification time (optional, time_t, integer) +.It +file mode (optional, mode_t, integer) +.It +if a symbolic link and +.Fl l , +the link target's length (integer) +.It +if a symbolic link and +.Fl l , +the link target (byte array) +.El +.Pp +The status byte may consist of the following bits and determines which +of the optional fields are transmitted. +.Pp +.Bl -tag -compact -width Ds +.It 0x02 +Do not send the file mode: it is a repeat of the last file's mode. +.It 0x20 +Inherit some of the prior file name. +Enables the inherited filename length transmission. +.It 0x40 +Use full integer length for file name. +Otherwise, use only the byte length. +.It 0x80 +Do not send the file modification time: it is a repeat of the last +file's. +.El +.Pp +If the status byte is zero, the file-list has terminated. +The sender then sends any IO error values, which for +.Xr openrsync 1 +is always zero. +.Pp +.Bl -enum -compact +.It +constant zero (integer) +.El +.Pp +The server sender then reads the exclusion list, which is always zero. +.Pp +.Bl -enum -compact +.It +if server, constant zero (integer) +.El +.Pp +Following that, the sender receives data regarding the receiver's copy +of the file list contents. +This data is not ordered in any way. +Each of these requests starts as follows: +.Pp +.Bl -enum -compact +.It +file index or -1 to signal a change of phase (integer) +.El +.Pp +The phase starts in phase 1, then proceeds to phase 2, and phase 3 +signals an end of transmission (no subsequent blocks). +If a phase change occurs, the sender must write back the -1 constant +integer value and increment its phase state. +.Pp +Blocks are read as follows: +.Pp +.Bl -enum -compact +.It +block index (integer) +.El +.Pp +In +.Pq Fl n +mode, the sender may immediately write back the index (integer) to skip +the following. +.Pp +.Bl -enum -compact +.It +number of blocks (integer) +.It +block length in the file (integer) +.It +long checksum length (integer) +.It +terminal (remainder) block length (integer) +.El +.Pp +And for each block: +.Pp +.Bl -enum -compact +.It +short checksum (integer) +.It +long checksum (bytes of checksum length) +.El +.Pp +The client then compares the two files, block by block, and updates the +server with mismatches as follows. +.Pp +.Bl -enum -compact +.It +file index (integer) +.It +number of blocks (integer) +.It +block length (integer) +.It +long checksum length (integer) +.It +remainder block length (integer) +.El +.Pp +Then for each block: +.Pp +.Bl -enum -compact +.It +data chunk size (integer) +.It +data chunk (bytes) +.It +block index subsequent to chunk or zero for finished (integer) +.El +.Pp +Following this sequence, the sender sends the followng: +.Pp +.Bl -enum -compact +.It +whole-file long checksum (16 bytes) +.El +.Pp +The sender then either handles the next queued file or, if the receiver +has written a phase change, the phase change step. +.Pp +If the sender is the server and +.Fl v +has been specified, the sender must send statistics. +.Pp +.Bl -enum -compact +.It +total bytes read (long) +.It +total bytes written (long) +.It +total size of files (long) +.El +.Pp +Finally, the sender must read a final constant-value integer. +.Pp +.Bl -enum -compact +.It +end-of-sequence -1 value (integer) +.El +.Pp +If in receiver mode, the inverse above (write instead of read, read +instead of write) is performed. +.Pp +The receiver begins by conditionally writing, then reading, the +exclusion list count, which is always zero. +.Pp +.Bl -enum -compact +.It +if client, send zero (integer) +.It +if receiver and +.Fl -delete , +read zero (integer) +.El +.Pp +The receiver then proceeds with reading the +.Sx File list +as already +defined. +Following the list, the receiver reads the IO error, which must be zero. +.Pp +.Bl -enum -compact +.It +constant zero (integer) +.El +.Pp +The receiver must then sort the file names lexicographically. +.Pp +If there are no files in the file list at this time, the receiver must +exit prior to sending per-file data. +It then proceeds with the file blocks. +.Pp +For file blocks, the receiver must look at each file that is not up to +date, defined by having the same file size and timestamp, and send it to +the server. +Symbolic links and directory entries are never sent to the server. +.Pp +After the second phase has completed and prior to writing the +end-of-data signal, the client receiver reads statistics. +This is only performed with +.Pq Fl v . +.Pp +.Bl -enum -compact +.It +total bytes read (long) +.It +total bytes written (long) +.It +total size of files (long) +.El +.Pp +Finally, the receiver must send the constant end-of-sequence marker. +.Pp +.Bl -enum -compact +.It +end-of-sequence -1 value (integer) +.El +.Ss Sender and receiver asynchrony +The sender and receiver need not work in lockstep. +The receiver may send file update requests as quickly as it parses them, +and respond to the sender's update notices on demand. +Similarly, the sender may read as many update requests as it can, and +service them in any order it wishes. +.Pp +The sender and receiver synchronise state only at the end of phase. +.Pp +The reference +.Xr rsync 1 +takes advantage of this with a two-process receiver, one for sending +update requests (the generator) and another for receiving. +.Xr openrsync 1 +uses an event-loop model instead. +.\" .Sh CONTEXT +.\" For section 9 functions only. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, and 9 function return values only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, and 8 only. +.\" .Sh FILES +.\" .Sh EXIT STATUS +.\" For sections 1, 6, and 8 only. +.\" .Sh EXAMPLES +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only. +.\" .Sh ERRORS +.\" For sections 2, 3, 4, and 9 errno settings only. +.Sh SEE ALSO +.Xr openrsync 1 , +.Xr rsync 1 , +.Xr rsyncd 5 +.\" .Sh STANDARDS +.\" .Sh HISTORY +.\" .Sh AUTHORS +.\" .Sh CAVEATS +.Sh BUGS +Time values are sent as 32-bit integers. +.Pp +When in server mode +.Em and +when communicating to a client with a newer protocol (>27), the phase +change integer (-1) acknowledgement must be sent twice by the sender. +The is probably a bug in the reference implementation. diff --git a/usr.bin/rsync/rsyncd.5 b/usr.bin/rsync/rsyncd.5 new file mode 100644 index 00000000000..d2e18fbac2e --- /dev/null +++ b/usr.bin/rsync/rsyncd.5 @@ -0,0 +1,135 @@ +.\" $OpenBSD: rsyncd.5,v 1.1 2019/02/10 23:18:28 benno Exp $ +.\" +.\" Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: February 10 2019 $ +.Dt RSYNCD 5 +.Os +.Sh NAME +.Nm rsyncd +.Nd rsyncd wire protocol +.Sh DESCRIPTION +The +.Nm +protocol described in this relates to the BSD-licensed +.Xr openrsync 1 , +a re-implementation of the GPL-licensed reference utility +.Xr rsync 1 . +It is compatible with version 27 of the reference. +.Pp +The +.Nm +protocol is an envelope protocol for +.Xr rsync 5 +between a client and an rsync://-capable server. +It provides a means to exchange capabilities information prior to file +transfer. +.Pp +In this document, +.Qq client +refers to the +.Xr openrsync 1 +utility making the request. +It follows that +.Qq server +refers to the daemon servicing the request. +.Pp +A connection between a client and server consists of host, a module, and +zero or more paths. +.Pp +.Dl openrsync rsync://host/module/path1 rsync://host/path2... dest +.Pp +At this time, operating in sender mode (with the rsync:// host receiving +information) is not described in this document. +.Ss Data types +These are the same as in +.Xr rsync 5 . +A newline is always a standalone \en. +.Ss Client process +After initialising a connection, the client and server exchange the +following information, in order. +This portion of the process is +.Em not +multiplexed. +.Pp +.Bl -enum -compact +.It +client sends requested module followed by newline +.It +server responds with preamble followed by standalone newline +.El +.Pp +The requested module must have non-zero length. +The preamble consists in a sequence of lines. +Each line either contains free-form text sent by the server as a +.Qq motd +.Pq message of the day +or a command: +.Pp +.Dl @RSYNCD: command\en +.Pp +The only supported command is the server protocol specification: +.Pp +.Dl @RSYNCD: xx[.yy]\en +.Pp +The optional component is the submodule, which may be discarded. +The version may only be specified once. +Both the motd and commands end in the special termination command: +.Pp +.Dl @RSYNCD: OK\en +.Pp +Following that, the client must send the command-line arguments that +would otherwise be used to start a +.Xr openrsync 1 +server. +Each argument must be specified on its own line, e.g., +.Pp +.Dl --server\en--sender\en-r\en-t\en.\enpath1\enpath2 +.Pp +This must be followed by a standalone newline. +.Pp +If the server does not understand or accept any of the command-line +arguments, it will exit at this point. +.Pp +Following this, the client must read the integer-length session checksum +seed. +Multiplexing is subsequently enabled. +.Pp +The sequence that follows is stipulated in +.Xr rsync 5 +following the handshake. +.\" The following requests should be uncommented and used where appropriate. +.\" .Sh CONTEXT +.\" For section 9 functions only. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, and 9 function return values only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, and 8 only. +.\" .Sh FILES +.\" .Sh EXIT STATUS +.\" For sections 1, 6, and 8 only. +.\" .Sh EXAMPLES +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only. +.\" .Sh ERRORS +.\" For sections 2, 3, 4, and 9 errno settings only. +.Sh SEE ALSO +.Xr openrsync 1 , +.Xr rsync 5 +.\" .Sh STANDARDS +.\" .Sh HISTORY +.\" .Sh AUTHORS +.\" .Sh CAVEATS +.\" .Sh BUGS diff --git a/usr.bin/rsync/sender.c b/usr.bin/rsync/sender.c new file mode 100644 index 00000000000..362c6c93ecb --- /dev/null +++ b/usr.bin/rsync/sender.c @@ -0,0 +1,227 @@ +/* $Id: sender.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +/* + * A client sender manages the read-only source files and sends data to + * the receiver as requested. + * First it sends its list of files, then it waits for the server to + * request updates to individual files. + * Returns zero on failure, non-zero on success. + * + * Pledges: stdio, rpath, unveil. + */ +int +rsync_sender(struct sess *sess, int fdin, + int fdout, size_t argc, char **argv) +{ + struct flist *fl = NULL; + size_t flsz = 0, phase = 0, excl; + int rc = 0, c; + int32_t idx; + struct blkset *blks = NULL; + + if (-1 == pledge("unveil stdio rpath", NULL)) { + ERR(sess, "pledge"); + return 0; + } + + /* + * Generate the list of files we want to send from our + * command-line input. + * This will also remove all invalid files. + */ + + if ( ! flist_gen(sess, argc, argv, &fl, &flsz)) { + ERRX1(sess, "flist_gen"); + goto out; + } + + /* Client sends zero-length exclusions if deleting. */ + + if ( ! sess->opts->server && sess->opts->del && + ! io_write_int(sess, fdout, 0)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + /* + * Then the file list in any mode. + * Finally, the IO error (always zero for us). + */ + + if ( ! flist_send(sess, fdin, fdout, fl, flsz)) { + ERRX1(sess, "flist_send"); + goto out; + } else if ( ! io_write_int(sess, fdout, 0)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + /* Exit if we're the server with zero files. */ + + if (0 == flsz && sess->opts->server) { + WARNX(sess, "sender has empty file list: exiting"); + rc = 1; + goto out; + } else if ( ! sess->opts->server) + LOG1(sess, "Transfer starting: %zu files", flsz); + + /* + * If we're the server, read our exclusion list. + * This is always 0 for now. + */ + + if (sess->opts->server) { + if ( ! io_read_size(sess, fdin, &excl)) { + ERRX1(sess, "io_read_size"); + goto out; + } else if (0 != excl) { + ERRX1(sess, "exclusion list is non-empty"); + goto out; + } + } + + /* + * We have two phases: the first has a two-byte checksum, the + * second has a full 16-byte checksum. + */ + + LOG2(sess, "sender transmitting phase 1 data"); + + for (;;) { + if ( ! io_read_int(sess, fdin, &idx)) { + ERRX1(sess, "io_read_int"); + goto out; + } + + /* + * If we receive an invalid index (-1), then we're + * either promoted to the second phase or it's time to + * exit, depending upon which phase we're in. + */ + + if (-1 == idx) { + if ( ! io_write_int(sess, fdout, idx)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + /* FIXME: I don't understand this ack. */ + + if (sess->opts->server && sess->rver > 27) + if ( ! io_write_int(sess, fdout, idx)) { + ERRX1(sess, "io_write_int"); + goto out; + } + + if (phase++) + break; + LOG2(sess, "sender transmitting phase 2 data"); + continue; + } + + /* Validate index and file type. */ + + if (idx < 0 || (uint32_t)idx >= flsz) { + ERRX(sess, "file index out of bounds: " + "invalid %" PRId32 " out of %zu", + idx, flsz); + goto out; + } else if (S_ISDIR(fl[idx].st.mode)) { + ERRX(sess, "blocks requested for " + "directory: %s", fl[idx].path); + goto out; + } else if (S_ISLNK(fl[idx].st.mode)) { + ERRX(sess, "blocks requested for " + "symlink: %s", fl[idx].path); + goto out; + } else if ( ! S_ISREG(fl[idx].st.mode)) { + ERRX(sess, "blocks requested for " + "special: %s", fl[idx].path); + goto out; + } + + if ( ! sess->opts->server) + LOG1(sess, "%s", fl[idx].wpath); + + /* Dry-run doesn't do anything. */ + + if (sess->opts->dry_run) { + if ( ! io_write_int(sess, fdout, idx)) { + ERRX1(sess, "io_write_int"); + goto out; + } + continue; + } + + /* + * The server will now send us its view of the file. + * It does so by cutting a file into a series of blocks + * and checksumming each block. + * We can then compare the blocks in our file and those + * in theirs, and send them blocks they're missing or + * don't have. + */ + + blks = blk_recv(sess, fdin, fl[idx].path); + if (NULL == blks) { + ERRX1(sess, "blk_recv"); + goto out; + } else if ( ! blk_recv_ack(sess, fdout, blks, idx)) { + ERRX1(sess, "blk_recv_ack"); + goto out; + } + + c = blk_match(sess, fdout, blks, fl[idx].path); + blkset_free(blks); + + if ( ! c) { + ERRX1(sess, "blk_match"); + goto out; + } + } + + if ( ! sess_stats_send(sess, fdout)) { + ERRX1(sess, "sess_stats_end"); + goto out; + } + + /* Final "goodbye" message. */ + + if ( ! io_read_int(sess, fdin, &idx)) { + ERRX1(sess, "io_read_int"); + goto out; + } else if (-1 != idx) { + ERRX(sess, "read incorrect update complete ack"); + goto out; + } + + LOG2(sess, "sender finished updating"); + rc = 1; +out: + flist_free(fl, flsz); + return rc; +} diff --git a/usr.bin/rsync/server.c b/usr.bin/rsync/server.c new file mode 100644 index 00000000000..8ce49f0867f --- /dev/null +++ b/usr.bin/rsync/server.c @@ -0,0 +1,162 @@ +/* $Id: server.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> + +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +static int +fcntl_nonblock(struct sess *sess, int fd) +{ + int fl; + + if (-1 == (fl = fcntl(fd, F_GETFL, 0))) + ERR(sess, "fcntl: F_GETFL"); + else if (-1 == fcntl(fd, F_SETFL, fl|O_NONBLOCK)) + ERR(sess, "fcntl: F_SETFL"); + else + return 1; + + return 0; +} + +/* + * The server (remote) side of the system. + * This parses the arguments given it by the remote shell then moves + * into receiver or sender mode depending upon those arguments. + * + * Pledges: unveil rpath, cpath, wpath, stdio, fattr. + * + * Pledges (dry-run): -cpath, -wpath, -fattr. + * Pledges (!preserve_times): -fattr. + */ +int +rsync_server(const struct opts *opts, size_t argc, char *argv[]) +{ + struct sess sess; + int fdin = STDIN_FILENO, + fdout = STDOUT_FILENO, c = 0; + + memset(&sess, 0, sizeof(struct sess)); + sess.opts = opts; + + /* Begin by making descriptors non-blocking. */ + + if ( ! fcntl_nonblock(&sess, fdin) || + ! fcntl_nonblock(&sess, fdout)) { + ERRX1(&sess, "fcntl_nonblock"); + goto out; + } + + /* Standard rsync preamble, server side. */ + + sess.lver = RSYNC_PROTOCOL; + sess.seed = arc4random(); + + if ( ! io_read_int(&sess, fdin, &sess.rver)) { + ERRX1(&sess, "io_read_int"); + goto out; + } else if ( ! io_write_int(&sess, fdout, sess.lver)) { + ERRX1(&sess, "io_write_int"); + goto out; + } else if ( ! io_write_int(&sess, fdout, sess.seed)) { + ERRX1(&sess, "io_write_int"); + goto out; + } + + sess.mplex_writes = 1; + + if (sess.rver < sess.lver) { + ERRX(&sess, "remote protocol is older " + "than our own (%" PRId32 " < %" PRId32 "): " + "this is not supported", + sess.rver, sess.lver); + goto out; + } + + LOG2(&sess, "server detected client version %" PRId32 + ", server version %" PRId32 ", seed %" PRId32, + sess.rver, sess.lver, sess.seed); + + if (sess.opts->sender) { + LOG2(&sess, "server starting sender"); + + /* + * At this time, I always get a period as the first + * argument of the command line. + * Let's make it a requirement until I figure out when + * that differs. + * rsync [flags] "." <source> <...> + */ + + if (strcmp(argv[0], ".")) { + ERRX(&sess, "first argument must " + "be a standalone period"); + goto out; + } + argv++; + argc--; + if (0 == argc) { + ERRX(&sess, "must have arguments"); + goto out; + } + + if ( ! rsync_sender(&sess, fdin, fdout, argc, argv)) { + ERRX1(&sess, "rsync_sender"); + goto out; + } + } else { + LOG2(&sess, "server starting receiver"); + + /* + * I don't understand why this calling convention + * exists, but we must adhere to it. + * rsync [flags] "." <destination> + */ + + if (2 != argc) { + ERRX(&sess, "server receiver mode " + "requires two argument"); + goto out; + } else if (strcmp(argv[0], ".")) { + ERRX(&sess, "first argument must " + "be a standalone period"); + goto out; + } + + if ( ! rsync_receiver(&sess, fdin, fdout, argv[1])) { + ERRX1(&sess, "rsync_receiver"); + goto out; + } + } + +#if 0 + /* Probably the EOF. */ + if (io_read_check(&sess, fdin)) + WARNX(&sess, "data remains in read pipe"); +#endif + + c = 1; +out: + return c; +} diff --git a/usr.bin/rsync/session.c b/usr.bin/rsync/session.c new file mode 100644 index 00000000000..8ba1ebb0d38 --- /dev/null +++ b/usr.bin/rsync/session.c @@ -0,0 +1,161 @@ +/* $Id: session.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/param.h> + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include "extern.h" + +/* + * Accept how much we've read, written, and file-size, and print them in + * a human-readable fashion (with GB, MB, etc. prefixes). + * This only prints as the client. + */ +static void +stats_log(struct sess *sess, + uint64_t tread, uint64_t twrite, uint64_t tsize) +{ + double tr, tw, ts; + const char *tru = "B", *twu = "B", *tsu = "B"; + int trsz = 0, twsz = 0, tssz = 0; + + assert(sess->opts->verbose); + if (sess->opts->server) + return; + + if (tread >= 1024 * 1024 * 1024) { + tr = tread / (1024.0 * 1024.0 * 1024.0); + tru = "GB"; + trsz = 3; + } else if (tread >= 1024 * 1024) { + tr = tread / (1024.0 * 1024.0); + tru = "MB"; + trsz = 2; + } else if (tread >= 1024) { + tr = tread / 1024.0; + tru = "KB"; + trsz = 1; + } else + tr = tread; + + if (twrite >= 1024 * 1024 * 1024) { + tw = twrite / (1024.0 * 1024.0 * 1024.0); + twu = "GB"; + twsz = 3; + } else if (twrite >= 1024 * 1024) { + tw = twrite / (1024.0 * 1024.0); + twu = "MB"; + twsz = 2; + } else if (twrite >= 1024) { + tw = twrite / 1024.0; + twu = "KB"; + twsz = 1; + } else + tw = twrite; + + if (tsize >= 1024 * 1024 * 1024) { + ts = tsize / (1024.0 * 1024.0 * 1024.0); + tsu = "GB"; + tssz = 3; + } else if (tsize >= 1024 * 1024) { + ts = tsize / (1024.0 * 1024.0); + tsu = "MB"; + tssz = 2; + } else if (tsize >= 1024) { + ts = tsize / 1024.0; + tsu = "KB"; + tssz = 1; + } else + ts = tsize; + + LOG1(sess, "Transfer complete: " + "%.*lf %s sent, " + "%.*lf %s read, " + "%.*lf %s file size", + trsz, tr, tru, + twsz, tw, twu, + tssz, ts, tsu); +} + +/* + * At the end of transmission, we write our statistics if we're the + * server, then log only if we're not the server. + * Either way, only do this if we're in verbose mode. + * Returns zero on failure, non-zero on success. + */ +int +sess_stats_send(struct sess *sess, int fd) +{ + uint64_t tw, tr, ts; + + if (0 == sess->opts->verbose) + return 1; + + tw = sess->total_write; + tr = sess->total_read; + ts = sess->total_size; + + if (sess->opts->server) { + if ( ! io_write_long(sess, fd, tr)) { + ERRX1(sess, "io_write_long"); + return 0; + } else if ( ! io_write_long(sess, fd, tw)) { + ERRX1(sess, "io_write_long"); + return 0; + } else if ( ! io_write_long(sess, fd, ts)) { + ERRX1(sess, "io_write_long"); + return 0; + } + } + + stats_log(sess, tr, tw, ts); + return 1; +} + +/* + * At the end of the transmission, we have some statistics to read. + * Only do this (1) if we're in verbose mode and (2) if we're the + * server. + * Then log the findings. + * Return zero on failure, non-zero on success. + */ +int +sess_stats_recv(struct sess *sess, int fd) +{ + uint64_t tr, tw, ts; + + if (sess->opts->server || 0 == sess->opts->verbose) + return 1; + + if ( ! io_read_ulong(sess, fd, &tw)) { + ERRX1(sess, "io_read_ulong"); + return 0; + } else if ( ! io_read_ulong(sess, fd, &tr)) { + ERRX1(sess, "io_read_ulong"); + return 0; + } else if ( ! io_read_ulong(sess, fd, &ts)) { + ERRX1(sess, "io_read_ulong"); + return 0; + } + + stats_log(sess, tr, tw, ts); + return 1; +} + diff --git a/usr.bin/rsync/socket.c b/usr.bin/rsync/socket.c new file mode 100644 index 00000000000..ca28e172b17 --- /dev/null +++ b/usr.bin/rsync/socket.c @@ -0,0 +1,435 @@ +/* $Id: socket.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/stat.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <netdb.h> +#include <poll.h> +#include <resolv.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "extern.h" + +/* + * Defines a resolved IP address for the host + * There can be many, IPV4 or IPV6. + */ +struct source { + int family; /* PF_INET or PF_INET6 */ + char ip[INET6_ADDRSTRLEN]; /* formatted string */ + struct sockaddr_storage sa; /* socket */ + socklen_t salen; /* length of socket buffer */ +}; + +/* + * Connect to an IP address representing a host. + * Return <0 on failure, 0 on try another address, >0 on success. + */ +static int +inet_connect(struct sess *sess, int *sd, + const struct source *src, const char *host) +{ + int c, flags; + + if (-1 != *sd) + close(*sd); + + LOG2(sess, "trying: %s, %s", src->ip, host); + + if (-1 == (*sd = socket(src->family, SOCK_STREAM, 0))) { + ERR(sess, "socket"); + return -1; + } + + /* + * Initiate blocking connection. + * We use the blocking connect() instead of passing NONBLOCK to + * the socket() function because we don't need to do anything + * while waiting for this to finish. + */ + + c = connect(*sd, + (const struct sockaddr *)&src->sa, + src->salen); + if (-1 == c) { + if (ECONNREFUSED == errno || + EHOSTUNREACH == errno) { + WARNX(sess, "connect refused: " + "%s, %s", src->ip, host); + return 0; + } + ERR(sess, "connect"); + return -1; + } + + /* Set up non-blocking mode. */ + + if (-1 == (flags = fcntl(*sd, F_GETFL, 0))) { + ERR(sess, "fcntl"); + return -1; + } else if (-1 == fcntl(*sd, F_SETFL, flags|O_NONBLOCK)) { + ERR(sess, "fcntl"); + return -1; + } + + return 1; +} + +/* + * Resolve the socket addresses for host, both in IPV4 and IPV6. + * Once completed, the "dns" pledge may be dropped. + * Returns the addresses on success, NULL on failure (sz is always zero, + * in this case). + */ +static struct source * +inet_resolve(struct sess *sess, const char *host, size_t *sz) +{ + struct addrinfo hints, *res0, *res; + struct sockaddr *sa; + struct source *src = NULL; + size_t i, srcsz = 0; + int error; + + *sz = 0; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; /* DUMMY */ + + error = getaddrinfo(host, "873", &hints, &res0); + + LOG2(sess, "resolving: %s", host); + + if (error == EAI_AGAIN || error == EAI_NONAME) { + ERRX(sess, "DNS resolve error: %s: %s", + host, gai_strerror(error)); + return NULL; + } else if (error) { + ERRX(sess, "DNS parse error: %s: %s", + host, gai_strerror(error)); + return NULL; + } + + /* Allocate for all available addresses. */ + + for (res = res0; NULL != res; res = res->ai_next) + if (res->ai_family == AF_INET || + res->ai_family == AF_INET6) + srcsz++; + + if (0 == srcsz) { + ERRX(sess, "no addresses resolved: %s", host); + freeaddrinfo(res0); + return NULL; + } + + src = calloc(srcsz, sizeof(struct source)); + if (NULL == src) { + ERRX(sess, "calloc"); + freeaddrinfo(res0); + return NULL; + } + + for (i = 0, res = res0; NULL != res; res = res->ai_next) { + if (res->ai_family != AF_INET && + res->ai_family != AF_INET6) + continue; + + assert(i < srcsz); + + /* Copy the socket address. */ + + src[i].salen = res->ai_addrlen; + memcpy(&src[i].sa, res->ai_addr, src[i].salen); + + /* Format as a string, too. */ + + sa = res->ai_addr; + if (AF_INET == res->ai_family) { + src[i].family = PF_INET; + inet_ntop(AF_INET, + &(((struct sockaddr_in *)sa)->sin_addr), + src[i].ip, INET6_ADDRSTRLEN); + } else { + src[i].family = PF_INET6; + inet_ntop(AF_INET6, + &(((struct sockaddr_in6 *)sa)->sin6_addr), + src[i].ip, INET6_ADDRSTRLEN); + } + + LOG2(sess, "DNS resolved: %s: %s", host, src[i].ip); + i++; + } + + freeaddrinfo(res0); + *sz = srcsz; + return src; +} + +/* + * Process an rsyncd preamble line. + * This is either free-form text or @RSYNCD commands. + * Return <0 on failure, 0 on try more lines, >0 on finished. + */ +static int +protocol_line(struct sess *sess, const char *host, const char *cp) +{ + int major, minor; + + if (strncmp(cp, "@RSYNCD: ", 9)) { + LOG0(sess, "%s", cp); + return 0; + } + + cp += 9; + while (isspace((unsigned char)*cp)) + cp++; + + /* @RSYNCD: OK indicates that we're finished. */ + + if (0 == strcmp(cp, "OK")) + return 1; + + /* + * Otherwise, all we have left is our version. + * There are two formats: x.y (w/submodule) and x. + */ + + if (2 == sscanf(cp, "%d.%d", &major, &minor)) { + sess->rver = major; + return 0; + } else if (1 == sscanf(cp, "%d", &major)) { + sess->rver = major; + return 0; + } + + ERRX(sess, "rsyncd protocol error: unknown command"); + return -1; +} + +/* + * Pledges: dns, inet, unveil, rpath, cpath, wpath, stdio, fattr. + * + * Pledges (dry-run): -cpath, -wpath, -fattr. + * Pledges (!preserve_times): -fattr. + */ +int +rsync_socket(const struct opts *opts, const struct fargs *f) +{ + struct sess sess; + struct source *src = NULL; + size_t i, srcsz = 0; + int sd = -1, rc = 0, c; + char **args, buf[BUFSIZ]; + uint8_t byte; + + memset(&sess, 0, sizeof(struct sess)); + sess.lver = RSYNC_PROTOCOL; + sess.opts = opts; + + assert(NULL != f->host); + assert(NULL != f->module); + + if (NULL == (args = fargs_cmdline(&sess, f))) { + ERRX1(&sess, "fargs_cmdline"); + return 0; + } + + /* Resolve all IP addresses from the host. */ + + if (NULL == (src = inet_resolve(&sess, f->host, &srcsz))) { + ERRX1(&sess, "inet_resolve"); + free(args); + return 0; + } + + /* Drop the DNS pledge. */ + + if (-1 == pledge("inet unveil rpath cpath wpath stdio fattr", NULL)) { + ERR(&sess, "pledge"); + goto out; + } + + /* + * Iterate over all addresses, trying to connect. + * When we succeed, then continue using the connected socket. + */ + + assert(srcsz); + for (i = 0; i < srcsz; i++) { + c = inet_connect(&sess, &sd, &src[i], f->host); + if (c < 0) { + ERRX1(&sess, "inet_connect"); + goto out; + } else if (c > 0) + break; + } + + /* Drop the inet pledge. */ + + if (-1 == pledge("unveil rpath cpath wpath stdio fattr", NULL)) { + ERR(&sess, "pledge"); + goto out; + } + + if (i == srcsz) { + ERRX(&sess, "cannot connect to host: %s", f->host); + goto out; + } + + /* Initiate with the rsyncd version and module request. */ + + LOG2(&sess, "connected: %s, %s", src[i].ip, f->host); + + (void)snprintf(buf, sizeof(buf), "@RSYNCD: %d", sess.lver); + if ( ! io_write_line(&sess, sd, buf)) { + ERRX1(&sess, "io_write_line"); + goto out; + } + + LOG2(&sess, "requesting module: %s, %s", f->module, f->host); + + if ( ! io_write_line(&sess, sd, f->module)) { + ERRX1(&sess, "io_write_line"); + goto out; + } + + /* + * Now we read the server's response, byte-by-byte, one newline + * terminated at a time, limited to BUFSIZ line length. + * For this protocol version, this consists of either @RSYNCD + * followed by some text (just "ok" and the remote version) or + * the message of the day. + */ + + for (;;) { + for (i = 0; i < sizeof(buf); i++) { + if ( ! io_read_byte(&sess, sd, &byte)) { + ERRX1(&sess, "io_read_byte"); + goto out; + } + if ('\n' == (buf[i] = byte)) + break; + } + if (i == sizeof(buf)) { + ERRX(&sess, "line buffer overrun"); + goto out; + } else if (0 == i) + continue; + + /* + * The rsyncd protocol isn't very clear as to whether we + * get a CRLF or not: I don't actually see this being + * transmitted over the wire. + */ + + assert(i > 0); + buf[i] = '\0'; + if ('\r' == buf[i - 1]) + buf[i - 1] = '\0'; + + if ((c = protocol_line(&sess, f->host, buf)) < 0) { + ERRX1(&sess, "protocol_line"); + goto out; + } else if (c > 0) + break; + } + + /* + * Now we've exchanged all of our protocol information. + * We want to send our command-line arguments over the wire, + * each with a newline termination. + * Use the same arguments when invoking the server, but leave + * off the binary name(s). + * Emit a standalone newline afterward. + */ + + if (FARGS_RECEIVER == f->mode || FARGS_SENDER == f->mode) + i = 3; /* ssh host rsync... */ + else + i = 1; /* rsync... */ + + for ( ; NULL != args[i]; i++) + if ( ! io_write_line(&sess, sd, args[i])) { + ERRX1(&sess, "io_write_line"); + goto out; + } + if ( ! io_write_byte(&sess, sd, '\n')) { + ERRX1(&sess, "io_write_line"); + goto out; + } + + /* + * All data after this point is going to be multiplexed, so turn + * on the multiplexer for our reads and writes. + */ + + /* Protocol exchange: get the random seed. */ + + if ( ! io_read_int(&sess, sd, &sess.seed)) { + ERRX1(&sess, "io_read_int"); + goto out; + } + + /* Now we've completed the handshake. */ + + if (sess.rver < sess.lver) { + ERRX(&sess, "remote protocol is older " + "than our own (%" PRId32 " < %" PRId32 "): " + "this is not supported", + sess.rver, sess.lver); + goto out; + } + + sess.mplex_reads = 1; + LOG2(&sess, "read multiplexing enabled"); + + LOG2(&sess, "socket detected client version %" PRId32 + ", server version %" PRId32 ", seed %" PRId32, + sess.lver, sess.rver, sess.seed); + + assert(FARGS_RECEIVER == f->mode); + + LOG2(&sess, "client starting receiver: %s", f->host); + if ( ! rsync_receiver(&sess, sd, sd, f->sink)) { + ERRX1(&sess, "rsync_receiver"); + goto out; + } + +#if 0 + /* Probably the EOF. */ + if (io_read_check(&sess, sd)) + WARNX(&sess, "data remains in read pipe"); +#endif + + rc = 1; +out: + free(src); + free(args); + if (-1 != sd) + close(sd); + return rc; +} diff --git a/usr.bin/rsync/symlinks.c b/usr.bin/rsync/symlinks.c new file mode 100644 index 00000000000..b85d3866fc9 --- /dev/null +++ b/usr.bin/rsync/symlinks.c @@ -0,0 +1,102 @@ +/* $Id: symlinks.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/param.h> + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include "extern.h" + +/* + * Allocate space for a readlink(2) invocation. + * Returns NULL on failure or a buffer otherwise. + * The buffer must be passed to free() by the caller. + */ +char * +symlink_read(struct sess *sess, const char *path) +{ + char *buf = NULL; + size_t sz; + ssize_t nsz = 0; + void *pp; + + for (sz = MAXPATHLEN; ; sz *= 2) { + if (NULL == (pp = realloc(buf, sz + 1))) { + ERR(sess, "realloc"); + free(buf); + return NULL; + } + buf = pp; + + if (-1 == (nsz = readlink(path, buf, sz))) { + ERR(sess, "%s: readlink", path); + free(buf); + return NULL; + } else if (0 == nsz) { + ERRX(sess, "%s: empty link", path); + free(buf); + return NULL; + } else if ((size_t)nsz < sz) + break; + } + + assert(NULL != buf); + assert(nsz > 0); + buf[nsz] = '\0'; + return buf; +} + +/* + * Allocate space for a readlinkat(2) invocation. + * Returns NULL on failure or a buffer otherwise. + * The buffer must be passed to free() by the caller. + */ +char * +symlinkat_read(struct sess *sess, int fd, const char *path) +{ + char *buf = NULL; + size_t sz; + ssize_t nsz = 0; + void *pp; + + for (sz = MAXPATHLEN; ; sz *= 2) { + if (NULL == (pp = realloc(buf, sz + 1))) { + ERR(sess, "realloc"); + free(buf); + return NULL; + } + buf = pp; + + if (-1 == (nsz = readlinkat(fd, path, buf, sz))) { + ERR(sess, "%s: readlinkat", path); + free(buf); + return NULL; + } else if (0 == nsz) { + ERRX(sess, "%s: empty link", path); + free(buf); + return NULL; + } else if ((size_t)nsz < sz) + break; + } + + assert(NULL != buf); + assert(nsz > 0); + buf[nsz] = '\0'; + return buf; +} diff --git a/usr.bin/rsync/uploader.c b/usr.bin/rsync/uploader.c new file mode 100644 index 00000000000..5017d6b2571 --- /dev/null +++ b/usr.bin/rsync/uploader.c @@ -0,0 +1,772 @@ +/* $Id: uploader.c,v 1.1 2019/02/10 23:18:28 benno Exp $ */ +/* + * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/mman.h> +#include <sys/stat.h> + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "extern.h" + +enum uploadst { + UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */ + UPLOAD_WRITE_LOCAL, /* wait to write to sender */ + UPLOAD_READ_LOCAL, /* wait to read from local file */ + UPLOAD_FINISHED /* nothing more to do in phase */ +}; + +/* + * Used to keep track of data flowing from the receiver to the sender. + * This is managed by the receiver process. + */ +struct upload { + enum uploadst state; + char *buf; /* if not NULL, pending upload */ + size_t bufsz; /* size of buf */ + size_t bufmax; /* maximum size of buf */ + size_t bufpos; /* position in buf */ + size_t idx; /* current transfer index */ + mode_t oumask; /* umask for creating files */ + int rootfd; /* destination directory */ + size_t csumlen; /* checksum length */ + int fdout; /* write descriptor to sender */ + const struct flist *fl; /* file list */ + size_t flsz; /* size of file list */ + int *newdir; /* non-zero if mkdir'd */ +}; + +/* + * Log a directory by emitting the file and a trailing slash, just to + * show the operator that we're a directory. + */ +static void +log_dir(struct sess *sess, const struct flist *f) +{ + size_t sz; + + if (sess->opts->server) + return; + sz = strlen(f->path); + assert(sz > 0); + LOG1(sess, "%s%s", f->path, + '/' == f->path[sz - 1] ? "" : "/"); +} + +/* + * Log a link by emitting the file and the target, just to show the + * operator that we're a link. + */ +static void +log_link(struct sess *sess, const struct flist *f) +{ + + if ( ! sess->opts->server) + LOG1(sess, "%s -> %s", f->path, f->link); +} + +/* + * Simply log the filename. + */ +static void +log_file(struct sess *sess, const struct flist *f) +{ + + if ( ! sess->opts->server) + LOG1(sess, "%s", f->path); +} + +/* + * Prepare the overall block set's metadata. + * We always have at least one block. + * The block size is an important part of the algorithm. + * I use the same heuristic as the reference rsync, but implemented in a + * bit more of a straightforward way. + * In general, the individual block length is the rounded square root of + * the total file size. + * The minimum block length is 700. + */ +static void +init_blkset(struct blkset *p, off_t sz) +{ + double v; + + if (sz >= (BLOCK_SIZE_MIN * BLOCK_SIZE_MIN)) { + /* Simple rounded-up integer square root. */ + + v = sqrt(sz); + p->len = ceil(v); + + /* + * Always be a multiple of eight. + * There's no reason to do this, but rsync does. + */ + + if ((p->len % 8) > 0) + p->len += 8 - (p->len % 8); + } else + p->len = BLOCK_SIZE_MIN; + + p->size = sz; + if (0 == (p->blksz = sz / p->len)) + p->rem = sz; + else + p->rem = sz % p->len; + + /* If we have a remainder, then we need an extra block. */ + + if (p->rem) + p->blksz++; +} + +/* + * For each block, prepare the block's metadata. + * We use the mapped "map" file to set our checksums. + */ +static void +init_blk(struct blk *p, const struct blkset *set, off_t offs, + size_t idx, const void *map, const struct sess *sess) +{ + + assert(MAP_FAILED != map); + + /* Block length inherits for all but the last. */ + + p->idx = idx; + p->len = idx < set->blksz - 1 ? set->len : set->rem; + p->offs = offs; + + p->chksum_short = hash_fast(map + offs, p->len); + hash_slow(map + offs, p->len, p->chksum_long, sess); +} + +/* + * Return <0 on failure 0 on success. + */ +static int +pre_link(struct upload *p, struct sess *sess) +{ + int rc, newlink = 0; + char *b; + struct stat st; + struct timespec tv[2]; + const struct flist *f; + + f = &p->fl[p->idx]; + assert(S_ISLNK(f->st.mode)); + + if ( ! sess->opts->preserve_links) { + WARNX(sess, "%s: ignoring symlink", f->path); + return 0; + } else if (sess->opts->dry_run) { + log_link(sess, f); + return 0; + } + + /* See if the symlink already exists. */ + + assert(-1 != p->rootfd); + rc = fstatat(p->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW); + if (-1 != rc && ! S_ISLNK(st.st_mode)) { + WARNX(sess, "%s: not a symlink", f->path); + return -1; + } else if (-1 == rc && ENOENT != errno) { + WARN(sess, "%s: fstatat", f->path); + return -1; + } + + /* + * If the symbolic link already exists, then make sure that it + * points to the correct place. + * FIXME: does symlinkat() set permissions on the link using the + * destination file or the default umask? + * Do we need a fchmod in here as well? + */ + + if (-1 == rc) { + LOG3(sess, "%s: creating " + "symlink: %s", f->path, f->link); + if (-1 == symlinkat(f->link, p->rootfd, f->path)) { + WARN(sess, "%s: symlinkat", f->path); + return -1; + } + newlink = 1; + } else { + b = symlinkat_read(sess, p->rootfd, f->path); + if (NULL == b) { + ERRX1(sess, "%s: symlinkat_read", f->path); + return -1; + } + if (strcmp(f->link, b)) { + free(b); + b = NULL; + LOG3(sess, "%s: updating " + "symlink: %s", f->path, f->link); + if (-1 == unlinkat(p->rootfd, f->path, 0)) { + WARN(sess, "%s: unlinkat", f->path); + return -1; + } + if (-1 == symlinkat(f->link, p->rootfd, f->path)) { + WARN(sess, "%s: symlinkat", f->path); + return -1; + } + newlink = 1; + } + free(b); + } + + /* Optionally preserve times/perms on the symlink. */ + + if (sess->opts->preserve_times) { + tv[0].tv_sec = time(NULL); + tv[0].tv_nsec = 0; + tv[1].tv_sec = f->st.mtime; + tv[1].tv_nsec = 0; + rc = utimensat(p->rootfd, + f->path, tv, AT_SYMLINK_NOFOLLOW); + if (-1 == rc) { + ERR(sess, "%s: utimensat", f->path); + return -1; + } + LOG4(sess, "%s: updated symlink date", f->path); + } + + /* + * FIXME: if newlink is set because we updated the symlink, we + * want to carry over the permissions from the last. + */ + + if (newlink || sess->opts->preserve_perms) { + rc = fchmodat(p->rootfd, f->path, + f->st.mode, AT_SYMLINK_NOFOLLOW); + if (-1 == rc) { + ERR(sess, "%s: fchmodat", f->path); + return -1; + } + LOG4(sess, "%s: updated symlink mode", f->path); + } + + log_link(sess, f); + return 0; +} + +/* + * If not found, create the destination directory in prefix order. + * Create directories using the existing umask. + * Return <0 on failure 0 on success. + */ +static int +pre_dir(const struct upload *p, struct sess *sess) +{ + struct stat st; + int rc; + const struct flist *f; + + f = &p->fl[p->idx]; + assert(S_ISDIR(f->st.mode)); + + if ( ! sess->opts->recursive) { + WARNX(sess, "%s: ignoring directory", f->path); + return 0; + } else if (sess->opts->dry_run) { + log_dir(sess, f); + return 0; + } + + assert(-1 != p->rootfd); + rc = fstatat(p->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW); + if (-1 == rc && ENOENT != errno) { + WARN(sess, "%s: fstatat", f->path); + return -1; + } else if (-1 != rc && ! S_ISDIR(st.st_mode)) { + WARNX(sess, "%s: not a directory", f->path); + return -1; + } else if (-1 != rc) { + /* + * FIXME: we should fchmod the permissions here as well, + * as we may locally have shut down writing into the + * directory and that doesn't work. + */ + LOG3(sess, "%s: updating directory", f->path); + return 0; + } + + /* + * We want to make the directory with default permissions (using + * our old umask, which we've since unset), then adjust + * permissions (assuming preserve_perms or new) afterward in + * case it's u-w or something. + */ + + LOG3(sess, "%s: creating directory", f->path); + if (-1 == mkdirat(p->rootfd, f->path, 0777 & ~p->oumask)) { + WARN(sess, "%s: mkdirat", f->path); + return -1; + } + + p->newdir[p->idx] = 1; + log_dir(sess, f); + return 0; +} + +/* + * Process the directory time and mode for "idx" in the file list. + * Returns zero on failure, non-zero on success. + */ +static int +post_dir(struct sess *sess, const struct upload *u, size_t idx) +{ + struct timespec tv[2]; + int rc; + struct stat st; + const struct flist *f; + + f = &u->fl[idx]; + assert(S_ISDIR(f->st.mode)); + + /* We already warned about the directory in pre_process_dir(). */ + + if ( ! sess->opts->recursive) + return 1; + else if (sess->opts->dry_run) + return 1; + + if (-1 == fstatat(u->rootfd, f->path, &st, AT_SYMLINK_NOFOLLOW)) { + ERR(sess, "%s: fstatat", f->path); + return 0; + } else if ( ! S_ISDIR(st.st_mode)) { + WARNX(sess, "%s: not a directory", f->path); + return 0; + } + + /* + * Update the modification time if we're a new directory *or* if + * we're preserving times and the time has changed. + */ + + if (u->newdir[idx] || + (sess->opts->preserve_times && + st.st_mtime != f->st.mtime)) { + tv[0].tv_sec = time(NULL); + tv[0].tv_nsec = 0; + tv[1].tv_sec = f->st.mtime; + tv[1].tv_nsec = 0; + rc = utimensat(u->rootfd, f->path, tv, 0); + if (-1 == rc) { + ERR(sess, "%s: utimensat", f->path); + return 0; + } + LOG4(sess, "%s: updated date", f->path); + } + + /* + * Update the mode if we're a new directory *or* if we're + * preserving modes and it has changed. + */ + + if (u->newdir[idx] || + (sess->opts->preserve_perms && + st.st_mode != f->st.mode)) { + rc = fchmodat(u->rootfd, f->path, f->st.mode, 0); + if (-1 == rc) { + ERR(sess, "%s: fchmodat", f->path); + return 0; + } + LOG4(sess, "%s: updated mode", f->path); + } + + return 1; +} + +/* + * Try to open the file at the current index. + * If the file does not exist, returns with success. + * Return <0 on failure, 0 on success w/nothing to be done, >0 on + * success and the file needs attention. + */ +static int +pre_file(const struct upload *p, int *filefd, struct sess *sess) +{ + const struct flist *f; + + f = &p->fl[p->idx]; + assert(S_ISREG(f->st.mode)); + + if (sess->opts->dry_run) { + log_file(sess, f); + if ( ! io_write_int(sess, p->fdout, p->idx)) { + ERRX1(sess, "io_write_int"); + return -1; + } + return 0; + } + + /* + * For non dry-run cases, we'll write the acknowledgement later + * in the rsync_uploader() function because we need to wait for + * the open() call to complete. + * If the call to openat() fails with ENOENT, there's a + * fast-path between here and the write function, so we won't do + * any blocking between now and then. + */ + + *filefd = openat(p->rootfd, f->path, + O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0); + if (-1 != *filefd || ENOENT == errno) + return 1; + ERR(sess, "%s: openat", f->path); + return -1; +} + +/* + * Allocate an uploader object in the correct state to start. + * Returns NULL on failure or the pointer otherwise. + * On success, upload_free() must be called with the allocated pointer. + */ +struct upload * +upload_alloc(struct sess *sess, int rootfd, int fdout, + size_t clen, const struct flist *fl, size_t flsz, mode_t msk) +{ + struct upload *p; + + if (NULL == (p = calloc(1, sizeof(struct upload)))) { + ERR(sess, "calloc"); + return NULL; + } + + p->state = UPLOAD_FIND_NEXT; + p->oumask = msk; + p->rootfd = rootfd; + p->csumlen = clen; + p->fdout = fdout; + p->fl = fl; + p->flsz = flsz; + p->newdir = calloc(flsz, sizeof(int)); + if (NULL == p->newdir) { + ERR(sess, "calloc"); + free(p); + return NULL; + } + return p; +} + +/* + * Perform all cleanups and free. + * Passing a NULL to this function is ok. + */ +void +upload_free(struct upload *p) +{ + + if (NULL == p) + return; + free(p->newdir); + free(p->buf); + free(p); +} + +/* + * Iterates through all available files and conditionally gets the file + * ready for processing to check whether it's up to date. + * If not up to date or empty, sends file information to the sender. + * If returns 0, we've processed all files there are to process. + * If returns >0, we're waiting for POLLIN or POLLOUT data. + * Otherwise returns <0, which is an error. + */ +int +rsync_uploader(struct upload *u, int *fileinfd, + struct sess *sess, int *fileoutfd) +{ + struct blkset blk; + struct stat st; + void *map, *bufp; + size_t i, mapsz, pos, sz; + off_t offs; + int c; + + /* This should never get called. */ + + assert(UPLOAD_FINISHED != u->state); + + /* + * If we have an upload in progress, then keep writing until the + * buffer has been fully written. + * We must only have the output file descriptor working and also + * have a valid buffer to write. + */ + + if (UPLOAD_WRITE_LOCAL == u->state) { + assert(NULL != u->buf); + assert(-1 != *fileoutfd); + assert(-1 == *fileinfd); + + /* + * Unfortunately, we need to chunk these: if we're + * the server side of things, then we're multiplexing + * output and need to wrap this in chunks. + * This is a major deficiency of rsync. + * FIXME: add a "fast-path" mode that simply dumps out + * the buffer non-blocking if we're not mplexing. + */ + + if (u->bufpos < u->bufsz) { + sz = MAX_CHUNK < (u->bufsz - u->bufpos) ? + MAX_CHUNK : (u->bufsz - u->bufpos); + c = io_write_buf(sess, u->fdout, + u->buf + u->bufpos, sz); + if (0 == c) { + ERRX1(sess, "io_write_nonblocking"); + return -1; + } + u->bufpos += sz; + if (u->bufpos < u->bufsz) + return 1; + } + + /* + * Let the UPLOAD_FIND_NEXT state handle things if we + * finish, as we'll need to write a POLLOUT message and + * not have a writable descriptor yet. + */ + + u->state = UPLOAD_FIND_NEXT; + u->idx++; + return 1; + } + + /* + * If we invoke the uploader without a file currently open, then + * we iterate through til the next available regular file and + * start the opening process. + * This means we must have the output file descriptor working. + */ + + if (UPLOAD_FIND_NEXT == u->state) { + assert(-1 == *fileinfd); + assert(-1 != *fileoutfd); + + for ( ; u->idx < u->flsz; u->idx++) { + if (S_ISDIR(u->fl[u->idx].st.mode)) + c = pre_dir(u, sess); + else if (S_ISLNK(u->fl[u->idx].st.mode)) + c = pre_link(u, sess); + else if (S_ISREG(u->fl[u->idx].st.mode)) + c = pre_file(u, fileinfd, sess); + else + c = 0; + + if (c < 0) + return -1; + else if (c > 0) + break; + } + + /* + * Whether we've finished writing files or not, we + * disable polling on the output channel. + */ + + *fileoutfd = -1; + if (u->idx == u->flsz) { + assert(-1 == *fileinfd); + if ( ! io_write_int(sess, u->fdout, -1)) { + ERRX1(sess, "io_write_int"); + return -1; + } + u->state = UPLOAD_FINISHED; + LOG4(sess, "uploader: finished"); + return 0; + } + + /* Go back to the event loop, if necessary. */ + + u->state = -1 == *fileinfd ? + UPLOAD_WRITE_LOCAL : UPLOAD_READ_LOCAL; + if (UPLOAD_READ_LOCAL == u->state) + return 1; + } + + /* + * If an input file is open, stat it and see if it's already up + * to date, in which case close it and go to the next one. + * Either way, we don't have a write channel open. + */ + + if (UPLOAD_READ_LOCAL == u->state) { + assert (-1 != *fileinfd); + assert(-1 == *fileoutfd); + + if (-1 == fstat(*fileinfd, &st)) { + WARN(sess, "%s: fstat", u->fl[u->idx].path); + close(*fileinfd); + *fileinfd = -1; + return -1; + } else if ( ! S_ISREG(st.st_mode)) { + WARNX(sess, "%s: not regular", u->fl[u->idx].path); + close(*fileinfd); + *fileinfd = -1; + return -1; + } + + if (st.st_size == u->fl[u->idx].st.size && + st.st_mtime == u->fl[u->idx].st.mtime) { + LOG3(sess, "%s: skipping: " + "up to date", u->fl[u->idx].path); + close(*fileinfd); + *fileinfd = -1; + *fileoutfd = u->fdout; + u->state = UPLOAD_FIND_NEXT; + u->idx++; + return 1; + } + + /* Fallthrough... */ + + u->state = UPLOAD_WRITE_LOCAL; + } + + /* Initialies our blocks. */ + + assert(UPLOAD_WRITE_LOCAL == u->state); + memset(&blk, 0, sizeof(struct blkset)); + blk.csum = u->csumlen; + + if (-1 != *fileinfd && st.st_size > 0) { + mapsz = st.st_size; + map = mmap(NULL, mapsz, + PROT_READ, MAP_SHARED, *fileinfd, 0); + if (MAP_FAILED == map) { + WARN(sess, "%s: mmap", u->fl[u->idx].path); + close(*fileinfd); + *fileinfd = -1; + return -1; + } + + init_blkset(&blk, st.st_size); + assert(blk.blksz); + + blk.blks = calloc(blk.blksz, sizeof(struct blk)); + if (NULL == blk.blks) { + ERR(sess, "calloc"); + munmap(map, mapsz); + close(*fileinfd); + *fileinfd = -1; + return -1; + } + + offs = 0; + for (i = 0; i < blk.blksz; i++) { + init_blk(&blk.blks[i], + &blk, offs, i, map, sess); + offs += blk.len; + } + + munmap(map, mapsz); + close(*fileinfd); + *fileinfd = -1; + LOG3(sess, "%s: mapped %jd B with %zu blocks", + u->fl[u->idx].path, (intmax_t)blk.size, + blk.blksz); + } else { + if (-1 != *fileinfd) { + close(*fileinfd); + *fileinfd = -1; + } + blk.len = MAX_CHUNK; /* Doesn't matter. */ + LOG3(sess, "%s: not mapped", u->fl[u->idx].path); + } + + assert(-1 == *fileinfd); + + /* Make sure the block metadata buffer is big enough. */ + + u->bufsz = + sizeof(int32_t) + /* identifier */ + sizeof(int32_t) + /* block count */ + sizeof(int32_t) + /* block length */ + sizeof(int32_t) + /* checksum length */ + sizeof(int32_t) + /* block remainder */ + blk.blksz * + (sizeof(int32_t) + /* short checksum */ + blk.csum); /* long checksum */ + + if (u->bufsz > u->bufmax) { + if (NULL == (bufp = realloc(u->buf, u->bufsz))) { + ERR(sess, "realloc"); + return -1; + } + u->buf = bufp; + u->bufmax = u->bufsz; + } + + u->bufpos = pos = 0; + io_buffer_int(sess, u->buf, &pos, u->bufsz, u->idx); + io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.blksz); + io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.len); + io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.csum); + io_buffer_int(sess, u->buf, &pos, u->bufsz, blk.rem); + for (i = 0; i < blk.blksz; i++) { + io_buffer_int(sess, u->buf, &pos, u->bufsz, + blk.blks[i].chksum_short); + io_buffer_buf(sess, u->buf, &pos, u->bufsz, + blk.blks[i].chksum_long, blk.csum); + } + assert(pos == u->bufsz); + + /* Reenable the output poller and clean up. */ + + *fileoutfd = u->fdout; + free(blk.blks); + return 1; +} + +/* + * Fix up the directory permissions and times post-order. + * We can't fix up directory permissions in place because the server may + * want us to have overly-tight permissions---say, those that don't + * allow writing into the directory. + * We also need to do our directory times post-order because making + * files within the directory will change modification times. + * Returns zero on failure, non-zero on success. + */ +int +rsync_uploader_tail(struct upload *u, struct sess *sess) +{ + size_t i; + + + if ( ! sess->opts->preserve_times && + ! sess->opts->preserve_perms) + return 1; + + LOG2(sess, "fixing up directory times and permissions"); + + for (i = 0; i < u->flsz; i++) + if (S_ISDIR(u->fl[i].st.mode)) + if ( ! post_dir(sess, u, i)) + return 0; + + return 1; +} |