hurl

Gopher/HTTP/HTTPS file grabber
git clone git://git.codemadness.org/hurl
Log | Files | Refs | README | LICENSE

hurl.c (11622B)


      1 #include <sys/socket.h>
      2 #include <sys/time.h>
      3 
      4 #include <ctype.h>
      5 #include <err.h>
      6 #include <errno.h>
      7 #include <netdb.h>
      8 #include <locale.h>
      9 #include <stdarg.h>
     10 #include <stdio.h>
     11 #include <stdint.h>
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <time.h>
     15 #include <unistd.h>
     16 
     17 #include <tls.h>
     18 
     19 #include "arg.h"
     20 
     21 #define READ_BUF_SIZ	16384
     22 
     23 #ifndef __OpenBSD__
     24 #define pledge(p1,p2) 0
     25 #define unveil(p1,p2) 0
     26 #endif
     27 
     28 #ifndef TLS_CA_CERT_FILE
     29 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
     30 #endif
     31 
     32 /* uri */
     33 struct uri {
     34 	char proto[48];
     35 	char host[256];
     36 	char path[2048];
     37 	char port[6];     /* numeric port */
     38 };
     39 
     40 char *argv0;
     41 
     42 /* max response size in bytes, 0 is unlimited */
     43 static size_t config_maxresponsesiz = 0;
     44 /* time-out in seconds */
     45 static time_t config_timeout = 10;
     46 /* parsed uri */
     47 static struct uri u;
     48 /* raw command-line argument */
     49 static char *url;
     50 /* TLS config */
     51 static struct tls_config *tls_config;
     52 
     53 int
     54 parseuri(const char *s, struct uri *u)
     55 {
     56 	const char *p = s, *b;
     57 	char *endptr = NULL;
     58 	size_t i;
     59 	unsigned long l;
     60 
     61 	u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
     62 	if (!*p)
     63 		return 0;
     64 
     65 	/* protocol part */
     66 	for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
     67 		       *p == '+' || *p == '-' || *p == '.'); p++)
     68 		;
     69 	if (!strncmp(p, "://", 3)) {
     70 		if ((size_t)(p - s) >= sizeof(u->proto))
     71 			return -1; /* protocol too long */
     72 		memcpy(u->proto, s, p - s);
     73 		u->proto[p - s] = '\0';
     74 		p += 3; /* skip "://" */
     75 	} else {
     76 		return -1; /* no protocol specified */
     77 	}
     78 
     79 	/* IPv6 address */
     80 	if (*p == '[') {
     81 		/* bracket not found or host too long */
     82 		if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeof(u->host))
     83 			return -1;
     84 		memcpy(u->host, p + 1, b - p - 1);
     85 		u->host[b - p - 1] = '\0';
     86 		p = b + 1;
     87 	} else {
     88 		/* domain / host part, skip until port, path or end. */
     89 		if ((i = strcspn(p, ":/")) >= sizeof(u->host))
     90 			return -1; /* host too long */
     91 		memcpy(u->host, p, i);
     92 		u->host[i] = '\0';
     93 		p = &p[i];
     94 	}
     95 	/* port */
     96 	if (*p == ':') {
     97 		if ((i = strcspn(++p, "/")) >= sizeof(u->port))
     98 			return -1; /* port too long */
     99 		memcpy(u->port, p, i);
    100 		u->port[i] = '\0';
    101 		/* check for valid port: range 1 - 65535 */
    102 		errno = 0;
    103 		l = strtoul(u->port, &endptr, 10);
    104 		if (errno || u->port[0] == '\0' || *endptr ||
    105 		    !l || l > 65535)
    106 			return -1;
    107 		p = &p[i];
    108 	}
    109 	if (u->host[0]) {
    110 		p = &p[strspn(p, "/")];
    111 		strlcpy(u->path, "/", sizeof(u->path));
    112 	} else {
    113 		return -1;
    114 	}
    115 	/* treat truncation as an error */
    116 	if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
    117 		return -1;
    118 	return 0;
    119 }
    120 
    121 int
    122 edial(const char *host, const char *port)
    123 {
    124 	struct addrinfo hints, *res, *res0;
    125 	int error, save_errno, s;
    126 	const char *cause = NULL;
    127 	struct timeval timeout;
    128 
    129 	memset(&hints, 0, sizeof(hints));
    130 	hints.ai_family = AF_UNSPEC;
    131 	hints.ai_socktype = SOCK_STREAM;
    132 	hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
    133 	if ((error = getaddrinfo(host, port, &hints, &res0)))
    134 		errx(1, "%s: %s: %s:%s", __func__, gai_strerror(error), host, port);
    135 	s = -1;
    136 	for (res = res0; res; res = res->ai_next) {
    137 		s = socket(res->ai_family, res->ai_socktype,
    138 		           res->ai_protocol);
    139 		if (s == -1) {
    140 			cause = "socket";
    141 			continue;
    142 		}
    143 
    144 		timeout.tv_sec = config_timeout;
    145 		timeout.tv_usec = 0;
    146 		if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1)
    147 			err(1, "%s: setsockopt", __func__);
    148 
    149 		timeout.tv_sec = config_timeout;
    150 		timeout.tv_usec = 0;
    151 		if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1)
    152 			err(1, "%s: setsockopt", __func__);
    153 
    154 		if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
    155 			cause = "connect";
    156 			save_errno = errno;
    157 			close(s);
    158 			errno = save_errno;
    159 			s = -1;
    160 			continue;
    161 		}
    162 		break;
    163 	}
    164 	if (s == -1)
    165 		errx(1, "%s: %s: %s:%s", __func__, cause, host, port);
    166 	freeaddrinfo(res0);
    167 
    168 	return s;
    169 }
    170 
    171 int
    172 https_request(void)
    173 {
    174 	struct tls *t = NULL;
    175 	char buf[READ_BUF_SIZ], *p;
    176 	const char *errstr;
    177 	size_t n, len;
    178 	ssize_t r;
    179 	int fd = -1, httpok = 0, ret = 1;
    180 
    181 	if (pledge("stdio dns inet rpath unveil", NULL) == -1)
    182 		err(1, "pledge");
    183 
    184 	if (unveil(TLS_CA_CERT_FILE, "r") == -1)
    185 		err(1, "unveil: %s", TLS_CA_CERT_FILE);
    186 	if (unveil(NULL, NULL) == -1)
    187 		err(1, "unveil");
    188 
    189 	if (!(t = tls_client())) {
    190 		fprintf(stderr, "tls_client: %s\n", tls_error(t));
    191 		goto err;
    192 	}
    193 	if (tls_configure(t, tls_config)) {
    194 		fprintf(stderr, "tls_configure: %s\n", tls_error(t));
    195 		goto err;
    196 	}
    197 
    198 	fd = edial(u.host, u.port);
    199 	if (tls_connect_socket(t, fd, u.host) == -1)
    200 		errx(1, "tls_connect: %s", tls_error(t));
    201 
    202 	if (pledge("stdio", NULL) == -1)
    203 		err(1, "pledge");
    204 
    205 	/* create and send HTTP header */
    206 	snprintf(buf, sizeof(buf),
    207 		"GET %s HTTP/1.0\r\n"
    208 		"Host: %s\r\n"
    209 		"Connection: close\r\n"
    210 		"\r\n", u.path, u.host);
    211 	if ((r = tls_write(t, buf, strlen(buf))) < 0) {
    212 		fprintf(stderr, "tls_write: %s\n", tls_error(t));
    213 		goto err;
    214 	}
    215 
    216 	/* NOTE: HTTP header must fit in the buffer */
    217 	for (len = 0; len < sizeof(buf); len += r) {
    218 		/* NOTE: buffer size is -1 to NUL terminate the buffer for a
    219 		         string comparison. */
    220 		if ((r = tls_read(t, &buf[len], sizeof(buf) - len - 1)) == 0)
    221 			break;
    222 		if (r < 0) {
    223 			errstr = tls_error(t);
    224 			fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
    225 			goto err;
    226 		}
    227 	}
    228 	buf[len] = '\0';
    229 
    230 	if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
    231 	    !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
    232 		httpok = 1;
    233 
    234 	if (!(p = strstr(buf, "\r\n\r\n"))) {
    235 		fprintf(stderr, "no HTTP header found or header too big\n");
    236 		goto err;
    237 	}
    238 	*p = '\0'; /* NUL terminate header part */
    239 	p += strlen("\r\n\r\n");
    240 
    241 	if (httpok) {
    242 		n = len - (p - buf);
    243 		r = fwrite(p, 1, n, stdout);
    244 		if (ferror(stdout)) {
    245 			fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
    246 			goto err;
    247 		}
    248 	} else {
    249 		/* if not 200 OK print header */
    250 		fputs(buf, stderr);
    251 		fputs("\r\n\r\n", stderr);
    252 		/* TODO: exit or continue reading, probably nicer to continue read */
    253 	}
    254 
    255 	while (1) {
    256 		r = tls_read(t, &buf, sizeof(buf));
    257 		if (r == 0)
    258 			break;
    259 		if (r < 0) {
    260 			errstr = tls_error(t);
    261 			fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
    262 			goto err;
    263 		}
    264 		len += r;
    265 
    266 		if (httpok) {
    267 			r = fwrite(buf, 1, r, stdout);
    268 			if (ferror(stdout)) {
    269 				fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
    270 				goto err;
    271 			}
    272 		}
    273 
    274 		if (config_maxresponsesiz && len >= config_maxresponsesiz)
    275 			break;
    276 	}
    277 	if (config_maxresponsesiz && len >= config_maxresponsesiz) {
    278 		fprintf(stderr, "tls_read: response too big: %zu >= %zu\n",
    279 		        len, config_maxresponsesiz);
    280 		goto err;
    281 	}
    282 	ret = 0;
    283 
    284 err:
    285 	if (t) {
    286 		tls_close(t);
    287 		tls_free(t);
    288 	}
    289 
    290 	return httpok ? ret : 2;
    291 }
    292 
    293 int
    294 http_request(void)
    295 {
    296 	char buf[READ_BUF_SIZ], *p;
    297 	size_t n, len;
    298 	ssize_t r;
    299 	int fd = -1, httpok = 0, ret = 1;
    300 
    301 	if (pledge("stdio dns inet", NULL) == -1)
    302 		err(1, "pledge");
    303 
    304 	fd = edial(u.host, u.port);
    305 
    306 	if (pledge("stdio", NULL) == -1)
    307 		err(1, "pledge");
    308 
    309 	/* create and send HTTP header */
    310 	snprintf(buf, sizeof(buf),
    311 		"GET %s HTTP/1.0\r\n"
    312 		"Host: %s\r\n"
    313 		"Connection: close\r\n"
    314 		"\r\n", u.path, u.host);
    315 	if ((r = write(fd, buf, strlen(buf))) == -1) {
    316 		fprintf(stderr, "write: %s\n", strerror(errno));
    317 		goto err;
    318 	}
    319 
    320 	/* NOTE: HTTP header must fit in the buffer */
    321 	for (len = 0; len < sizeof(buf); len += r) {
    322 		/* NOTE: buffer size is -1 to NUL terminate the buffer for a
    323 		         string comparison. */
    324 		if ((r = read(fd, &buf[len], sizeof(buf) - len - 1)) == 0)
    325 			break;
    326 		if (r == -1) {
    327 			fprintf(stderr, "read: %s\n", strerror(errno));
    328 			goto err;
    329 		}
    330 	}
    331 	buf[len] = '\0';
    332 
    333 	if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
    334 	    !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
    335 		httpok = 1;
    336 
    337 	if (!(p = strstr(buf, "\r\n\r\n"))) {
    338 		fprintf(stderr, "no HTTP header found or header too big\n");
    339 		goto err;
    340 	}
    341 	*p = '\0'; /* NUL terminate header part */
    342 	p += strlen("\r\n\r\n");
    343 
    344 	if (httpok) {
    345 		n = len - (p - buf);
    346 		r = fwrite(p, 1, n, stdout);
    347 		if (ferror(stdout)) {
    348 			fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
    349 			goto err;
    350 		}
    351 	} else {
    352 		/* if not 200 OK print header */
    353 		fputs(buf, stderr);
    354 		fputs("\r\n\r\n", stderr);
    355 		/* TODO: exit or continue reading, probably nicer to continue read */
    356 	}
    357 
    358 	while (1) {
    359 		r = read(fd, &buf, sizeof(buf));
    360 		if (r == 0)
    361 			break;
    362 		if (r == -1) {
    363 			fprintf(stderr, "read: %s\n", strerror(errno));
    364 			goto err;
    365 		}
    366 		len += r;
    367 
    368 		if (httpok) {
    369 			r = fwrite(buf, 1, r, stdout);
    370 			if (ferror(stdout)) {
    371 				fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
    372 				goto err;
    373 			}
    374 		}
    375 
    376 		if (config_maxresponsesiz && len >= config_maxresponsesiz)
    377 			break;
    378 	}
    379 	if (config_maxresponsesiz && len >= config_maxresponsesiz) {
    380 		fprintf(stderr, "read: response too big: %zu >= %zu\n",
    381 		        len, config_maxresponsesiz);
    382 		goto err;
    383 	}
    384 	ret = 0;
    385 
    386 err:
    387 	if (fd != -1)
    388 		close(fd);
    389 	return httpok ? ret : 2;
    390 }
    391 
    392 int
    393 gopher_request(void)
    394 {
    395 	char buf[READ_BUF_SIZ];
    396 	size_t len = 0;
    397 	ssize_t r;
    398 	int fd = -1, ret = 1;
    399 
    400 	if (pledge("stdio dns inet", NULL) == -1)
    401 		err(1, "pledge");
    402 
    403 	fd = edial(u.host, u.port);
    404 
    405 	if (pledge("stdio", NULL) == -1)
    406 		err(1, "pledge");
    407 
    408 	/* create and send path, skip type part */
    409 	snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2);
    410 	if ((r = write(fd, buf, strlen(buf))) == -1) {
    411 		fprintf(stderr, "write: %s\n", strerror(errno));
    412 		goto err;
    413 	}
    414 
    415 	while (1) {
    416 		r = read(fd, &buf, sizeof(buf));
    417 		if (r == 0)
    418 			break;
    419 		if (r == -1) {
    420 			fprintf(stderr, "read: %s\n", strerror(errno));
    421 			goto err;
    422 		}
    423 		len += r;
    424 
    425 		r = fwrite(buf, 1, r, stdout);
    426 		if (ferror(stdout)) {
    427 			fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
    428 			goto err;
    429 		}
    430 
    431 		if (config_maxresponsesiz && len >= config_maxresponsesiz)
    432 			break;
    433 	}
    434 	if (config_maxresponsesiz && len >= config_maxresponsesiz) {
    435 		fprintf(stderr, "tls_read: response too big: %zu >= %zu\n",
    436 		        len, config_maxresponsesiz);
    437 		goto err;
    438 	}
    439 	ret = 0;
    440 
    441 err:
    442 	if (fd != -1)
    443 		close(fd);
    444 	return ret;
    445 }
    446 
    447 void
    448 usage(void)
    449 {
    450 	fprintf(stderr, "usage: %s [-m maxresponse] [-t timeout] url\n",
    451 	        argv0);
    452 	exit(1);
    453 }
    454 
    455 int
    456 main(int argc, char **argv)
    457 {
    458 	char *end;
    459 	int statuscode;
    460 	long long l;
    461 
    462 	ARGBEGIN {
    463 	case 'm': /* max filesize */
    464 		errno = 0;
    465 		l = strtoll(EARGF(usage()), &end, 10);
    466 		if (errno || *end != '\0' || l < 0)
    467 			usage();
    468 		config_maxresponsesiz = l;
    469 		break;
    470 	case 't': /* timeout */
    471 		errno = 0;
    472 		l = strtoll(EARGF(usage()), &end, 10);
    473 		if (errno || *end != '\0' || l < 0)
    474 			usage();
    475 		config_timeout = l;
    476 		break;
    477 	default:
    478 		usage();
    479 	} ARGEND
    480 
    481 	if (argc != 1)
    482 		usage();
    483 
    484 	url = argv[0];
    485 	if (parseuri(url, &u) == -1)
    486 		errx(1, "invalid url: %s", url);
    487 
    488 	if (!strcmp(u.proto, "https")) {
    489 		if (tls_init())
    490 			errx(1, "tls_init failed");
    491 		if (!(tls_config = tls_config_new()))
    492 			errx(1, "tls config failed");
    493 #ifdef SUPPORT_LEGACY
    494 		/* enable legacy cipher and negotiation. */
    495 		if (tls_config_set_ciphers(tls_config, "legacy"))
    496 			errx(1, "tls set ciphers failed: %s",
    497 			     tls_config_error(tls_config));
    498 #endif
    499 		if (!strcmp(u.proto, "https"))
    500 			memcpy(u.port, "443", 4);
    501 		statuscode = https_request();
    502 	} else if (!strcmp(u.proto, "http")) {
    503 		if (!u.port[0])
    504 			memcpy(u.port, "80", 3);
    505 		statuscode = http_request();
    506 	} else if (!strcmp(u.proto, "gopher")) {
    507 		if (!u.port[0])
    508 			memcpy(u.port, "70", 3);
    509 
    510 		if (u.path[0] != '/' || u.path[1] == '\0')
    511 			errx(1, "must specify type");
    512 
    513 		statuscode = gopher_request();
    514 	} else {
    515 		if (u.proto[0])
    516 			errx(1, "unsupported protocol specified: %s", u.proto);
    517 		else
    518 			errx(1, "no protocol specified");
    519 	}
    520 
    521 	return statuscode;
    522 }