hurl

Gopher/HTTP/HTTPS file grabber
git clone git://git.codemadness.org/hurl
Log | Files | Refs | README | LICENSE

commit 2ab1b868fbac189f3c7b8e2d4af14838aad1227f
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun, 11 Nov 2018 13:34:20 +0100

initial repo

Diffstat:
A.gitignore | 2++
ALICENSE | 15+++++++++++++++
AMakefile | 5+++++
AREADME | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ATODO | 5+++++
Aarg.h | 37+++++++++++++++++++++++++++++++++++++
Abget.c | 524+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 644 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +bget +*.o diff --git a/LICENSE b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2018 Hiltjo Posthuma <hiltjo@codemadness.org> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,5 @@ +build: clean + cc -o bget bget.c -ltls ${CFLAGS} ${LDFLAGS} + +clean: + rm -f bget *.o diff --git a/README b/README @@ -0,0 +1,56 @@ +bget +==== + +Relatively simple HTTP, HTTPS and Gopher client/file grabber. + + +Why? +---- + +curl is a nice swish-army knife, but it's not a very sharp tool. Every week +there is a new curl CVE. Other tools like OpenBSD ftp are much better, but +still do too much. + +Sometimes (or most of the time?) you just want to fetch a file via the HTTP, +HTTPS or Gopher protocol. + +The focus of this tool is on security and simplicity. + + +Dependencies +------------ + +- LibreSSL +- libtls + + +Features +-------- + +- Uses OpenBSD pledge(2) and unveil(2). Allow no filesystem access (write to + stdout). +- Impose timeout and maximum size limits. +- Use well-defined exitcodes for reliable scripting (curl sucks at this). +- Send as little information as possible (no User-Agent etc by default). +- Allow to send custom headers / header string if wanted. + + +Anti-features +------------- + +Not by default, but possible with custom headers: +- No HTTP byte range support. +- No HTTP User-Agent. +- No HTTP If-Modified-Since/If-* support. +- No HTTP auth support. + +Other: +- No HTTP keep-alive. +- No HTTP chunked-encoding support. +- No HTTP redirect support. +- No (GZIP) compression support. +- No cookie-jar or cookie parsing support. + +- No Gopher text handling (".\r\n"). + +- ... etc... diff --git a/TODO b/TODO @@ -0,0 +1,5 @@ +- man page documentation. +- add compat functions, for example strlcpy. +- separate program error with other error. +? HTTP proxy support? + diff --git a/arg.h b/arg.h @@ -0,0 +1,37 @@ +#ifndef ARG_H +#define ARG_H + +#define USED(x) ((void)(x)) + +extern char *argv0; + +#define ARGBEGIN for(argv0 = *argv, argv++, argc--;\ + argv[0] && argv[0][0] == '-'\ + && argv[0][1];\ + argc--, argv++) {\ + char _argc;\ + char **_argv;\ + if(argv[0][1] == '-' && argv[0][2] == '\0') {\ + argv++;\ + argc--;\ + break;\ + }\ + int i_;\ + for(i_ = 1, _argv = argv; argv[0][i_];\ + i_++) {\ + if(_argv != argv)\ + break;\ + _argc = argv[0][i_];\ + switch(_argc) + +#define ARGEND }\ + USED(_argc);\ + }\ + USED(argv);\ + USED(argc); + +#define EARGF(x) ((argv[1] == NULL)? ((x), abort(), (char *)0) :\ + (argc--, argv++, argv[0])) + +#endif + diff --git a/bget.c b/bget.c @@ -0,0 +1,524 @@ +#include <sys/socket.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <netdb.h> +#include <locale.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include <tls.h> + +#include "arg.h" + +#define READ_BUF_SIZ 16384 + +#ifndef __OpenBSD__ +#define pledge(p1,p2) 0 +#define unveil(p1,p2) 0 +#endif + +#ifndef TLS_CA_CERT_FILE +#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem" +#endif + +/* uri */ +struct uri { + char proto[48]; + char host[256]; + char path[2048]; + char port[6]; /* numeric port */ +}; + +char *argv0; + +/* max response size in bytes, 0 is unlimited */ +static size_t config_maxresponsesiz = 0; +/* time-out in seconds */ +static time_t config_timeout = 10; +/* custom HTTP header */ +static char *config_custom; +/* parsed uri */ +static struct uri u; +/* raw command-line argument */ +static char *url; + +void +die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +parseuri(const char *s, struct uri *u) +{ + const char *p = s, *b; + char *endptr = NULL; + size_t i; + unsigned long l; + + u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0'; + if (!*p) + return 0; + + /* protocol part */ + for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + *p == '+' || *p == '-' || *p == '.'); p++) + ; + if (!strncmp(p, "://", 3)) { + if ((size_t)(p - s) >= sizeof(u->proto)) + return -1; /* protocol too long */ + memcpy(u->proto, s, p - s); + u->proto[p - s] = '\0'; + p += 3; /* skip "://" */ + } else { + return -1; /* no protocol specified */ + } + + /* IPv6 address */ + if (*p == '[') { + /* bracket not found or host too long */ + if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeof(u->host)) + return -1; + memcpy(u->host, p + 1, b - p - 1); + u->host[b - p - 1] = '\0'; + p = b + 1; + } else { + /* domain / host part, skip until port, path or end. */ + if ((i = strcspn(p, ":/")) >= sizeof(u->host)) + return -1; /* host too long */ + memcpy(u->host, p, i); + u->host[i] = '\0'; + p = &p[i]; + } + /* port */ + if (*p == ':') { + if ((i = strcspn(++p, "/")) >= sizeof(u->port)) + return -1; /* port too long */ + memcpy(u->port, p, i); + u->port[i] = '\0'; + /* check for valid port: range 1 - 65535 */ + errno = 0; + l = strtoul(u->port, &endptr, 10); + if (errno || u->port[0] == '\0' || *endptr || + !l || l > 65535) + return -1; + p = &p[i]; + } + if (u->host[0]) { + p = &p[strspn(p, "/")]; + strlcpy(u->path, "/", sizeof(u->path)); + } else { + return -1; + } + /* treat truncation as an error */ + if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path)) + return -1; + return 0; +} + +int +edial(const char *host, const char *port) +{ + struct addrinfo hints, *res, *res0; + int error, save_errno, s; + const char *cause = NULL; + struct timeval timeout; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_NUMERICSERV; /* numeric port only */ + if ((error = getaddrinfo(host, port, &hints, &res0))) + die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port); + s = -1; + for (res = res0; res; res = res->ai_next) { + s = socket(res->ai_family, res->ai_socktype, + res->ai_protocol); + if (s == -1) { + cause = "socket"; + continue; + } + + timeout.tv_sec = config_timeout; + timeout.tv_usec = 0; + if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1) + die("%s: setsockopt: %s\n", __func__, strerror(errno)); + + timeout.tv_sec = config_timeout; + timeout.tv_usec = 0; + if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1) + die("%s: setsockopt: %s\n", __func__, strerror(errno)); + + if (connect(s, res->ai_addr, res->ai_addrlen) == -1) { + cause = "connect"; + save_errno = errno; + close(s); + errno = save_errno; + s = -1; + continue; + } + break; + } + if (s == -1) + die("%s: %s: %s:%s\n", __func__, cause, host, port); + freeaddrinfo(res0); + + return s; +} + +int +https_request(void) +{ + struct tls *t = NULL; + char buf[READ_BUF_SIZ], *p; + size_t n, len = 0; + ssize_t r; + int fd = -1, httpok = 0, ret = 1; + + if (pledge("stdio dns inet rpath unveil", NULL) == -1) + err(1, "pledge"); + + if (unveil(TLS_CA_CERT_FILE, "r") == -1) + err(1, "unveil: %s", TLS_CA_CERT_FILE); + if (unveil(NULL, NULL) == -1) + err(1, "unveil"); + + if (!(t = tls_client())) { + fprintf(stderr, "tls_client: %s\n", tls_error(t)); + goto err; + } + + fd = edial(u.host, u.port); + if (tls_connect_socket(t, fd, u.host) == -1) + die("tls_connect: %s\n", tls_error(t)); + + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); + + /* create and send HTTP header */ + snprintf(buf, sizeof(buf), + "GET %s HTTP/1.0\r\n" + "Host: %s\r\n" + "Connection: close\r\n" + "%s" + "\r\n", u.path, u.host, config_custom ? config_custom : ""); + if ((r = tls_write(t, buf, strlen(buf))) == -1) { + fprintf(stderr, "tls_write: %s\n", tls_error(t)); + exit(1); + } + + /* NOTE: HTTP header must fit in the buffer */ + r = tls_read(t, &buf, sizeof(buf)); + if (r == 0) { + fprintf(stderr, "nothing read\n"); + goto err; + } + if (r == -1) { + fprintf(stderr, "tls_read: %s\n", tls_error(t)); + goto err; + } + len += r; + + if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) || + !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1)) + httpok = 1; + + if (!(p = strstr(buf, "\r\n\r\n"))) { + fprintf(stderr, "no HTTP header found or header too big\n"); + goto err; + } + *p = '\0'; /* NUL terminate header part */ + p += strlen("\r\n\r\n"); + + if (httpok) { + n = r - (p - buf); + r = fwrite(p, 1, n, stdout); + if (ferror(stdout)) { + fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno)); + goto err; + } + } else { + /* if not 200 OK print header */ + fputs(buf, stderr); + /* TODO: exit or continue reading, probably nicer to continue read */ + } + + while (1) { + r = tls_read(t, &buf, sizeof(buf)); + if (r == 0) + break; + if (r == -1) { + fprintf(stderr, "tls_read: %s\n", tls_error(t)); + goto err; + } + len += r; + + if (httpok) { + r = fwrite(buf, 1, r, stdout); + if (ferror(stdout)) { + fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno)); + goto err; + } + } + + if (config_maxresponsesiz && len >= config_maxresponsesiz) + break; + } + if (config_maxresponsesiz && len >= config_maxresponsesiz) { + fprintf(stderr, "tls_read: response too big: %zu >= %zu\n", + len, config_maxresponsesiz); + goto err; + } + ret = 0; + +err: + if (t) { + tls_close(t); + tls_free(t); + } + + return httpok ? ret : 2; +} + +int +http_request(void) +{ + char buf[READ_BUF_SIZ], *p; + size_t n, len = 0; + ssize_t r; + int fd = -1, httpok = 0, ret = 1; + + if (pledge("stdio dns inet", NULL) == -1) + err(1, "pledge"); + + fd = edial(u.host, u.port); + + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); + + /* create and send HTTP header */ + snprintf(buf, sizeof(buf), + "GET %s HTTP/1.0\r\n" + "Host: %s\r\n" + "Connection: close\r\n" + "%s" + "\r\n", u.path, u.host, config_custom ? config_custom : ""); + if ((r = write(fd, buf, strlen(buf))) == -1) { + fprintf(stderr, "write: %s\n", strerror(errno)); + goto err; + } + + /* NOTE: HTTP header must fit in the buffer */ + r = read(fd, &buf, sizeof(buf)); + if (r == 0) { + fprintf(stderr, "nothing read\n"); + goto err; + } + if (r == -1) { + fprintf(stderr, "read: %s\n", strerror(errno)); + goto err; + } + len += r; + + if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) || + !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1)) + httpok = 1; + + if (!(p = strstr(buf, "\r\n\r\n"))) { + fprintf(stderr, "no HTTP header found or header too big\n"); + goto err; + } + *p = '\0'; /* NUL terminate header part */ + p += strlen("\r\n\r\n"); + + if (httpok) { + n = r - (p - buf); + r = fwrite(p, 1, n, stdout); + if (ferror(stdout)) { + fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno)); + goto err; + } + } else { + /* if not 200 OK print header */ + fputs(buf, stderr); + /* TODO: exit or continue reading, probably nicer to continue read */ + } + + while (1) { + r = read(fd, &buf, sizeof(buf)); + if (r == 0) + break; + if (r == -1) { + fprintf(stderr, "read: %s\n", strerror(errno)); + goto err; + } + len += r; + + if (httpok) { + r = fwrite(buf, 1, r, stdout); + if (ferror(stdout)) { + fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno)); + goto err; + } + } + + if (config_maxresponsesiz && len >= config_maxresponsesiz) + break; + } + if (config_maxresponsesiz && len >= config_maxresponsesiz) { + fprintf(stderr, "read: response too big: %zu >= %zu\n", + len, config_maxresponsesiz); + goto err; + } + ret = 0; + +err: + if (fd != -1) + close(fd); + return httpok ? ret : 2; +} + +int +gopher_request(void) +{ + char buf[READ_BUF_SIZ]; + size_t len = 0; + ssize_t r; + int fd = -1, ret = 1; + + if (pledge("stdio dns inet", NULL) == -1) + err(1, "pledge"); + + fd = edial(u.host, u.port); + + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); + + /* create and send path, skip type part */ + snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2); + if ((r = write(fd, buf, strlen(buf))) == -1) { + fprintf(stderr, "write: %s\n", strerror(errno)); + goto err; + } + + while (1) { + r = read(fd, &buf, READ_BUF_SIZ); + if (r == 0) + break; + if (r == -1) { + fprintf(stderr, "read: %s\n", strerror(errno)); + goto err; + } + len += r; + + r = fwrite(buf, 1, r, stdout); + if (ferror(stdout)) { + fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno)); + goto err; + } + + if (config_maxresponsesiz && len >= config_maxresponsesiz) + break; + } + if (config_maxresponsesiz && len >= config_maxresponsesiz) { + fprintf(stderr, "tls_read: response too big: %zu >= %zu\n", + len, config_maxresponsesiz); + goto err; + } + ret = 0; + +err: + if (fd != -1) + close(fd); + return ret; +} + +void +usage(void) +{ + fprintf(stderr, "usage: %s [ -H header | -m maxresponse | -t timeout ]\n", + argv0); + exit(1); +} + +int +main(int argc, char **argv) +{ + int statuscode; + + ARGBEGIN { + case 'H': /* custom HTTP headers */ + config_custom = EARGF(usage()); + break; + case 'm': /* max filesize */ + /* TODO: strtonum */ + config_maxresponsesiz = atoll(EARGF(usage())); + break; + case 't': /* timeout */ + /* TODO: strtonum */ + config_timeout = atoll(EARGF(usage())); + break; + default: + usage(); + } ARGEND + + if (argc != 1) + usage(); + + url = argv[0]; + if (parseuri(url, &u) == -1) { + fprintf(stderr, "invalid url: %s\n", url); + exit(1); + } + + if (!u.proto[0]) { + fprintf(stderr, "no protocol specified\n"); + exit(1); + } + + if (!strcmp(u.proto, "https")) { + if (!strcmp(u.proto, "https")) + memcpy(u.port, "443", 4); + statuscode = https_request(); + } else if (!strcmp(u.proto, "http")) { + if (!u.port[0]) + memcpy(u.port, "80", 3); + statuscode = http_request(); + } else if (!strcmp(u.proto, "gopher")) { + if (config_custom) { + fprintf(stderr, "no custom header supported with gopher protocol\n"); + exit(1); + } + + if (!u.port[0]) + memcpy(u.port, "70", 3); + + if (u.path[0] != '/' || u.path[1] == '\0') { + fprintf(stderr, "must specify type\n"); + exit(1); + } + + statuscode = gopher_request(); + } else { + if (u.proto[0]) + fprintf(stderr, "unsupported protocol specified: %s\n", u.proto); + else + fprintf(stderr, "no protocol specified\n"); + exit(1); + } + + return statuscode; +}