commit d10243c22f062c08b10bf2e59adefda40e1293fb
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 29 Dec 2018 16:09:33 +0100
initial repo
Diffstat:
A | LICENSE | | | 15 | +++++++++++++++ |
A | Makefile | | | 19 | +++++++++++++++++++ |
A | README | | | 44 | ++++++++++++++++++++++++++++++++++++++++++++ |
A | TODO | | | 16 | ++++++++++++++++ |
A | cli.c | | | 221 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | main.c | | | 464 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | xml.c | | | 474 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | xml.h | | | 40 | ++++++++++++++++++++++++++++++++++++++++ |
A | youtube.c | | | 535 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | youtube.h | | | 17 | +++++++++++++++++ |
10 files changed, 1845 insertions(+), 0 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2018 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,19 @@
+build: clean
+ cc -c xml.c ${CFLAGS} -Wall
+ cc -c youtube.c ${CFLAGS} -Wall
+ # UIs
+ # HTML
+ cc -c main.c ${CFLAGS} -Wall
+ # CLI
+ cc -c cli.c ${CFLAGS} -Wall
+ # Link HTML CGI (static)
+ cc -o main xml.o youtube.o main.o \
+ ${LDFLAGS} \
+ -ltls -lssl -lcrypto -static
+ # Link CLI UI
+ cc -o cli xml.o youtube.o cli.o \
+ ${LDFLAGS} \
+ -ltls
+
+clean:
+ rm -f main cli *.o
diff --git a/README b/README
@@ -0,0 +1,44 @@
+Dependencies:
+-------------
+
+- C compiler.
+- LibreSSL + libtls.
+
+
+Compile
+-------
+
+- make
+- doas make install
+
+
+Install HTTP CGI
+----------------
+
+Nginx + slowcgi example:
+
+ location /idiotbox/css/.* {
+ root /home/www/domains/www.codemadness.org/htdocs/idiotbox/css;
+ }
+
+ location ~ ^/idiotbox(/|/\?.*)$ {
+ include /etc/nginx/fastcgi_params;
+ fastcgi_pass unix:/run/slowcgi.sock;
+ fastcgi_param SCRIPT_FILENAME /cgi-bin/idiotbox;
+ fastcgi_param SCRIPT_NAME /cgi-bin/idiotbox;
+ fastcgi_param REQUEST_URI /cgi-bin/idiotbox;
+ }
+
+httpd + slowcgi example:
+
+ location match "/idiotbox" {
+ root "/cgi-bin/idiotbox.cgi"
+ fastcgi
+ }
+
+
+When using a chroot make sure to copy /etc/resolv.conf and /etc/ssl/cert.pem.
+
+To test from the command-line you can do:
+
+ QUERY_STRING="q=funny+cat+video" ./main | sed 1,2d | lynx -stdin
diff --git a/TODO b/TODO
@@ -0,0 +1,16 @@
+- decodeparam, getparam etc: cast ctype functions to (unsigned char).
+
+- order by views does not work in searching channel.
+- pagination does not work in searching in channel.
+- searching in channel works, but not search in user.
+
+? some way to show duration for playlists?
+- show published date in a consistent way? ("non-human friendly": YYYY-mm-dd HH:MM:SS TZ)
+
+- separate code parsing and views for frontend.
+- frontends:
+ - CGI HTTP
+ - CGI gopher
+ - dmenu / console (TSV output?) (no HTTP client, just parsing).
+
+- test/tweak Accept-Language header.
diff --git a/cli.c b/cli.c
@@ -0,0 +1,221 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "youtube.h"
+
+#ifndef __OpenBSD__
+#define pledge(p1,p2) 0
+#define unveil(p1,p2) 0
+#endif
+
+#ifndef TLS_CA_CERT_FILE
+#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
+#endif
+
+/* TODO: escape control-characters etc */
+#define OUT(s) (fputs((s), stdout))
+
+struct video *videos;
+static int nvideos;
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+int
+hexdigit(int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ return 0;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparam(char *buf, size_t bufsiz, const char *s)
+{
+ size_t i;
+
+ if (!bufsiz)
+ return -1;
+
+ for (i = 0; *s && *s != '&'; s++) {
+ if (i + 3 >= bufsiz)
+ return -1;
+ switch (*s) {
+ case '%':
+ if (!isxdigit(*(s+1)) || !isxdigit(*(s+2)))
+ return -1;
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
+ s += 2;
+ break;
+ case '+':
+ buf[i++] = ' ';
+ break;
+ default:
+ buf[i++] = *s;
+ break;
+ }
+ }
+ buf[i] = '\0';
+
+ return i;
+}
+
+char *
+getparam(const char *query, const char *s)
+{
+ const char *p, *last = NULL;
+ size_t len;
+
+ len = strlen(s);
+ for (p = query; (p = strstr(p, s)); p += len) {
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?'))
+ last = p + len + 1;
+ }
+
+ return (char *)last;
+}
+
+int
+render(void)
+{
+ int i;
+
+ if (pledge("stdio", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nvideos; i++) {
+ /* TODO: better printing of other types */
+ switch (videos[i].linktype) {
+ case Channel:
+ OUT("[Channel] ");
+ OUT(videos[i].channeltitle);
+ break;
+ case Movie:
+ OUT("[Movie] ");
+ OUT(videos[i].title);
+ break;
+ case Playlist:
+ OUT("[Playlist] ");
+ OUT(videos[i].title);
+ break;
+ default:
+ OUT(videos[i].title);
+ break;
+ }
+ OUT("\n");
+
+ if (videos[i].id[0]) {
+ OUT("URL: https://www.youtube.com/embed/");
+ OUT(videos[i].id);
+ OUT("\n");
+ }
+
+ if (videos[i].channelid[0] || videos[i].userid[0]) {
+ OUT("Atom feed: https://www.youtube.com/feeds/videos.xml?");
+ if (videos[i].channelid[0]) {
+ OUT("channel_id=");
+ OUT(videos[i].channelid);
+ } else if (videos[i].userid[0]) {
+ OUT("user=");
+ OUT(videos[i].userid);
+ }
+ OUT("\n");
+ }
+
+ if (videos[i].channelid[0] || videos[i].userid[0]) {
+ OUT("Channel title: ");
+ OUT(videos[i].channeltitle);
+ OUT("\n");
+ if (videos[i].channelid[0]) {
+ OUT("Channelid: ");
+ OUT(videos[i].channelid);
+ OUT("\n");
+ } else if (videos[i].userid[0]) {
+ OUT("Userid: ");
+ OUT(videos[i].userid);
+ OUT("\n");
+ }
+ }
+ if (videos[i].publishedat[0]) {
+ OUT("Published: ");
+ OUT(videos[i].publishedat);
+ OUT("\n");
+ }
+ if (videos[i].viewcount[0]) {
+ OUT("Viewcount: ");
+ OUT(videos[i].viewcount);
+ OUT("\n");
+ }
+ if (videos[i].duration[0]) {
+ OUT("Duration: " );
+ OUT(videos[i].duration);
+ OUT("\n");
+ }
+ OUT("===\n");
+ }
+
+ return 0;
+}
+
+static void
+usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s <keywords>\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(NULL, NULL) == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ if (argc < 2 || !argv[1][0])
+ usage(argv[0]);
+
+ videos = youtube_search(&nvideos, argv[1], "", "", "", "relevance");
+ if (!videos || nvideos <= 0) {
+ OUT("No videos found\n");
+ exit(1);
+ }
+
+ render();
+
+ return 0;
+}
diff --git a/main.c b/main.c
@@ -0,0 +1,464 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "youtube.h"
+
+#ifndef __OpenBSD__
+#define pledge(p1,p2) 0
+#define unveil(p1,p2) 0
+#endif
+
+#ifndef TLS_CA_CERT_FILE
+#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
+#endif
+
+#define OUT(s) (fputs((s), stdout))
+
+extern char **environ;
+
+struct video *videos;
+static int curpage = 1, nvideos;
+
+/* CGI parameters */
+static char rawsearch[4096], search[4096], mode[16], order[16], page[64];
+static char chan[1024], user[1024];
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+int
+hexdigit(int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ return 0;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparam(char *buf, size_t bufsiz, const char *s)
+{
+ size_t i;
+
+ if (!bufsiz)
+ return -1;
+
+ for (i = 0; *s && *s != '&'; s++) {
+ if (i + 3 >= bufsiz)
+ return -1;
+ switch (*s) {
+ case '%':
+ if (!isxdigit(*(s+1)) || !isxdigit(*(s+2)))
+ return -1;
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
+ s += 2;
+ break;
+ case '+':
+ buf[i++] = ' ';
+ break;
+ default:
+ buf[i++] = *s;
+ break;
+ }
+ }
+ buf[i] = '\0';
+
+ return i;
+}
+
+char *
+getparam(const char *query, const char *s)
+{
+ const char *p, *last = NULL;
+ size_t len;
+
+ len = strlen(s);
+ for (p = query; (p = strstr(p, s)); p += len) {
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?'))
+ last = p + len + 1;
+ }
+
+ return (char *)last;
+}
+
+/* Escape characters below as HTML 2.0 / XML 1.0. */
+void
+xmlencode(const char *s)
+{
+ for (; *s; s++) {
+ switch(*s) {
+ case '<': OUT("<"); break;
+ case '>': OUT(">"); break;
+ case '\'': OUT("'"); break;
+ case '&': OUT("&"); break;
+ case '"': OUT("""); break;
+ default: putchar(*s);
+ }
+ }
+}
+
+void
+parsecgi(void)
+{
+ char *query, *p;
+ size_t len;
+
+ if (!(query = getenv("QUERY_STRING")))
+ query = "";
+
+ /* channel: search in channel */
+ if ((p = getparam(query, "chan"))) {
+ if (decodeparam(chan, sizeof(chan), p) == -1)
+ chan[0] = '\0';
+ }
+ /* user: search in user */
+ if ((p = getparam(query, "user"))) {
+ if (decodeparam(user, sizeof(user), p) == -1)
+ user[0] = '\0';
+ }
+ if (!strcmp(chan, "Search all") || !strcmp(user, "Search all")) {
+ chan[0] = '\0';
+ user[0] = '\0';
+ }
+
+ /* order */
+ if ((p = getparam(query, "o"))) {
+ if (decodeparam(order, sizeof(order), p) == -1 ||
+ (strcmp(order, "date") &&
+ strcmp(order, "relevance") &&
+ strcmp(order, "views")))
+ order[0] = '\0';
+ }
+ if (!order[0])
+ snprintf(order, sizeof(order), chan[0] || user[0] ? "date" : "relevance");
+
+ /* page */
+ if ((p = getparam(query, "page"))) {
+ if (decodeparam(page, sizeof(page), p) == -1)
+ page[0] = '\0';
+ /* check if it's a number > 0 and < 100 */
+ errno = 0;
+ curpage = strtol(page, NULL, 10);
+ if (errno || curpage < 0 || curpage > 100) {
+ curpage = 1;
+ page[0] = '\0';
+ }
+ }
+
+ /* mode */
+ if ((p = getparam(query, "m"))) {
+ if (decodeparam(mode, sizeof(mode), p) != -1) {
+ /* fixup first character (label) for matching */
+ if (mode[0])
+ mode[0] = tolower((unsigned char)mode[0]);
+ /* allowed themes */
+ if (strcmp(mode, "light") &&
+ strcmp(mode, "dark") &&
+ strcmp(mode, "pink") &&
+ strcmp(mode, "templeos"))
+ mode[0] = '\0';
+ }
+ }
+ if (!mode[0])
+ snprintf(mode, sizeof(mode), "light");
+
+ /* search */
+ if ((p = getparam(query, "q"))) {
+ if ((len = strcspn(p, "&")) && len + 1 < sizeof(rawsearch)) {
+ memcpy(rawsearch, p, len);
+ rawsearch[len] = '\0';
+ }
+
+ if (decodeparam(search, sizeof(search), p) == -1) {
+ OUT("Status: 401 Bad Request\r\n\r\n");
+ exit(1);
+ }
+ }
+}
+
+int
+render(void)
+{
+ char tmp[64];
+ int i;
+
+ if (pledge("stdio", NULL) == -1) {
+ OUT("Status: 500 Internal Server Error\r\n\r\n");
+ exit(1);
+ }
+
+ OUT(
+ "Content-Type: text/html; charset=utf-8\r\n\r\n"
+ "<!DOCTYPE html>\n<html>\n<head>\n"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
+ "<title>Search: \"");
+ xmlencode(search);
+ OUT("\"");
+ if (nvideos) {
+ if (videos[0].channelid[0])
+ printf(" in %s", videos[0].channeltitle);
+ else if (videos[0].userid[0])
+ printf(" in %s", videos[0].userid);
+ }
+ printf(" sorted by %s</title>\n", order);
+ OUT(
+ "<link rel=\"stylesheet\" href=\"css/");
+ xmlencode(mode);
+ OUT(
+ ".css\" type=\"text/css\" media=\"screen\" />\n"
+ "<link rel=\"icon\" type=\"image/png\" href=\"/favicon.png\" />\n"
+ "<meta content=\"width=device-width\" name=\"viewport\" />\n"
+ "</head>\n"
+ "<body class=\"search\">\n"
+ "<form method=\"get\" action=\"\">\n");
+
+ OUT("<input type=\"hidden\" name=\"m\" value=\"");
+ xmlencode(mode);
+ OUT("\" />\n");
+ if (chan[0]) {
+ OUT("<input type=\"hidden\" name=\"chan\" value=\"");
+ xmlencode(chan);
+ OUT("\" />\n");
+ }
+
+ OUT(
+ "<table class=\"search\" width=\"100%\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\">\n"
+ "<tr>\n"
+ " <td width=\"100%\" class=\"input\">\n"
+ " <input type=\"search\" name=\"q\" value=\"");
+ xmlencode(search);
+ OUT(
+ "\" placeholder=\"Search...\" size=\"72\" autofocus=\"autofocus\" class=\"search\" accesskey=\"f\" />\n"
+ " </td>\n"
+ " <td nowrap class=\"nowrap\">\n"
+ " <input type=\"submit\" value=\"Search\" class=\"button\"/>\n");
+
+ if (chan[0])
+ OUT(" <input type=\"submit\" name=\"chan\" value=\"Search all\" title=\"Search globally and not in the selected channel\" accesskey=\"c\" />\n");
+
+ OUT(
+ " <select name=\"o\" title=\"Order by\" accesskey=\"o\">\n");
+ printf(" <option value=\"date\"%s>Creation date</option>\n", !strcmp(order, "date") ? " selected=\"selected\"" : "");
+ printf(" <option value=\"relevance\"%s>Relevance</option>\n", !strcmp(order, "relevance") ? " selected=\"selected\"" : "");
+ printf(" <option value=\"views\"%s>Views</option>\n", !strcmp(order, "views") ? " selected=\"selected\"" : "");
+ OUT(
+ " </select>\n"
+ " <label for=\"m\">Style: </label>\n");
+
+ if (!strcmp(mode, "light"))
+ OUT("\t\t<input type=\"submit\" name=\"m\" value=\"Dark\" title=\"Dark mode\" id=\"m\" accesskey=\"s\"/>\n");
+ else
+ OUT("\t\t<input type=\"submit\" name=\"m\" value=\"Light\" title=\"Light mode\" id=\"m\" accesskey=\"s\"/>\n");
+
+ OUT(
+ " </td>\n"
+ "</tr>\n"
+ "</table>\n"
+ "</form>\n");
+
+ if (nvideos) {
+ OUT(
+ "<hr/>\n"
+ "<table class=\"videos\" width=\"100%\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\">\n"
+ "<tbody>\n");
+
+ for (i = 0; i < nvideos; i++) {
+ OUT(
+ "<tr class=\"v\">\n"
+ " <td class=\"thumb\" width=\"120\" align=\"center\">\n"
+ " <a href=\"https://www.youtube.com/embed/");
+ xmlencode(videos[i].id);
+ /* TODO: for channel show channel picture in some way? */
+ OUT("\"><img src=\"https://i.ytimg.com/vi/");
+ xmlencode(videos[i].id);
+ OUT(
+ "/default.jpg\" alt=\"\" height=\"90\" border=\"0\" /></a>\n"
+ " </td>\n"
+ " <td>\n"
+ " <span class=\"title\"><a href=\"https://www.youtube.com/embed/");
+ xmlencode(videos[i].id);
+ printf("\" accesskey=\"%d\">", i);
+
+ /* TODO: better printing of other types */
+ switch (videos[i].linktype) {
+ case Channel:
+ OUT("[Channel] ");
+ xmlencode(videos[i].channeltitle);
+ break;
+ case Movie:
+ OUT("[Movie] ");
+ xmlencode(videos[i].title);
+ break;
+ case Playlist:
+ OUT("[Playlist] ");
+ xmlencode(videos[i].title);
+ break;
+ default:
+ xmlencode(videos[i].title);
+ break;
+ }
+
+ OUT(
+ "</a></span><br/>\n"
+ " <span class=\"channel\">");
+
+ OUT("<a title=\"Search in ");
+ xmlencode(videos[i].channeltitle);
+ OUT("\" href=\"?");
+ if (videos[i].channelid[0]) {
+ OUT("chan=");
+ xmlencode(videos[i].channelid);
+ } else if (videos[i].userid[0]) {
+ OUT("user=");
+ xmlencode(videos[i].userid);
+ }
+ OUT("&m=");
+ xmlencode(mode);
+ OUT("\">");
+ xmlencode(videos[i].channeltitle);
+ OUT("</a>");
+ if (videos[i].channelid[0] || videos[i].userid[0]) {
+ OUT(" | <a title=\"");
+ xmlencode(videos[i].channeltitle);
+ OUT(" Atom feed\" href=\"https://www.youtube.com/feeds/videos.xml?");
+ if (videos[i].channelid[0]) {
+ OUT("channel_id=");
+ xmlencode(videos[i].channelid);
+ } else if (videos[i].userid[0]) {
+ OUT("user=");
+ xmlencode(videos[i].userid);
+ }
+ OUT("\">Atom feed</a>");
+ }
+ OUT("</span><br/>\n");
+ if (videos[i].publishedat[0]) {
+ OUT(" <span class=\"publishedat\">Published: ");
+ OUT(videos[i].publishedat);
+ }
+ OUT(
+ "</span><br/>\n"
+ " <span class=\"stats\">");
+ OUT(videos[i].viewcount);
+ OUT(
+ "</span><br/>\n"
+ " </td>\n"
+ " <td align=\"right\" class=\"a-r\">\n"
+ " <span class=\"duration\">");
+ OUT(videos[i].duration);
+ OUT(
+ "</span>\n"
+ " </td>\n"
+ "</tr>\n"
+ "<tr class=\"hr\">\n"
+ " <td colspan=\"3\"><hr/></td>\n"
+ "</tr>\n");
+ }
+ OUT("</tbody>\n");
+
+ OUT(
+ "<tfoot>\n"
+ "<tr>\n"
+ "\t<td align=\"left\" class=\"nowrap\" nowrap>\n");
+ if (curpage > 0) {
+ OUT("\t\t<a href=\"?q=");
+ xmlencode(search);
+ OUT("&page=");
+ snprintf(tmp, sizeof(tmp), "%d", curpage - 1);
+ xmlencode(tmp);
+ OUT("&m=");
+ xmlencode(mode);
+ OUT("&o=");
+ xmlencode(order);
+ if (chan[0]) {
+ OUT("&chan=");
+ xmlencode(chan);
+ }
+ OUT("\" rel=\"prev\" accesskey=\"p\">← prev</a>\n");
+ }
+ OUT(
+ "\t</td>\n\t<td></td>\n"
+ "\t<td align=\"right\" class=\"a-r nowrap\" nowrap>\n");
+
+ OUT("\t\t<a href=\"?q=");
+ xmlencode(search);
+ OUT("&page=");
+ snprintf(tmp, sizeof(tmp), "%d", curpage + 1);
+ xmlencode(tmp);
+ OUT("&m=");
+ xmlencode(mode);
+ OUT("&o=");
+ xmlencode(order);
+ if (chan[0]) {
+ OUT("&chan=");
+ xmlencode(chan);
+ }
+ OUT("\" rel=\"next\" accesskey=\"n\">next →</a>\n");
+
+ OUT(
+ "\t</td>\n"
+ "</tr>\n"
+ "</tfoot>\n");
+
+ OUT("</table>\n");
+ }
+
+ OUT("</body>\n</html>\n");
+
+ return 0;
+}
+
+int
+main(void)
+{
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
+ OUT("Status: 500 Internal Server Error\r\n\r\n");
+ exit(1);
+ }
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) {
+ OUT("Status: 500 Internal Server Error\r\n\r\n");
+ exit(1);
+ }
+ if (unveil(NULL, NULL) == -1) {
+ OUT("Status: 500 Internal Server Error\r\n\r\n");
+ exit(1);
+ }
+
+ parsecgi();
+
+ if (!rawsearch[0] && !chan[0] && !user[0])
+ goto show;
+
+ videos = youtube_search(&nvideos, rawsearch, chan, user, page, order);
+ if (!videos || nvideos <= 0) {
+ OUT("Status: 500 Internal Server Error\r\n\r\n");
+ exit(1);
+ }
+
+show:
+ render();
+
+ return 0;
+}
diff --git a/xml.c b/xml.c
@@ -0,0 +1,474 @@
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xml.h"
+
+static void
+xml_parseattrs(XMLParser *x)
+{
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0, valuestart = 0;
+
+ while ((c = x->getnext()) != EOF) {
+ if (isspace(c)) {
+ if (namelen)
+ endname = 1;
+ continue;
+ } else if (c == '?')
+ ; /* ignore */
+ else if (c == '=') {
+ x->name[namelen] = '\0';
+ valuestart = 1;
+ endname = 1;
+ } else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) {
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if (namelen && valuestart) {
+ /* attribute with value */
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+
+ valuelen = 0;
+ if (c == '\'' || c == '"') {
+ endsep = c;
+ } else {
+ endsep = ' '; /* isspace() */
+ goto startvalue;
+ }
+
+ while ((c = x->getnext()) != EOF) {
+startvalue:
+ if (c == '&') { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before entity if there is data */
+ if (valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ while ((c = x->getnext()) != EOF) {
+ if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
+ break;
+ if (valuelen < sizeof(x->data) - 1)
+ x->data[valuelen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[valuelen] = '\0';
+ if (x->xmlattrentity)
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) {
+ if (valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ break;
+ }
+ }
+ namelen = endname = valuestart = 0;
+ } else if (namelen < sizeof(x->name) - 1) {
+ x->name[namelen++] = c;
+ }
+ if (c == '>') {
+ break;
+ } else if (c == '/') {
+ x->isshorttag = 1;
+ x->name[0] = '\0';
+ namelen = 0;
+ }
+ }
+}
+
+static void
+xml_parsecomment(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if (x->xmlcommentstart)
+ x->xmlcommentstart(x);
+ while ((c = x->getnext()) != EOF) {
+ if (c == '-' || c == '>') {
+ if (x->xmlcomment) {
+ x->data[datalen] = '\0';
+ x->xmlcomment(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == '-') {
+ if (++i > 2) {
+ if (x->xmlcomment)
+ for (; i > 2; i--)
+ x->xmlcomment(x, "-", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcommentend)
+ x->xmlcommentend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcomment) {
+ for (; i > 0; i--)
+ x->xmlcomment(x, "-", 1);
+ }
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcomment)
+ x->xmlcomment(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static void
+xml_parsecdata(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if (x->xmlcdatastart)
+ x->xmlcdatastart(x);
+ while ((c = x->getnext()) != EOF) {
+ if (c == ']' || c == '>') {
+ if (x->xmlcdata) {
+ x->data[datalen] = '\0';
+ x->xmlcdata(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == ']') {
+ if (++i > 2) {
+ if (x->xmlcdata)
+ for (; i > 2; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcdataend)
+ x->xmlcdataend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcdata)
+ for (; i > 0; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static int
+codepointtoutf8(long r, char *s)
+{
+ if (r == 0) {
+ return 0; /* NUL byte */
+ } else if (r <= 0x7F) {
+ /* 1 byte: 0aaaaaaa */
+ s[0] = r;
+ return 1;
+ } else if (r <= 0x07FF) {
+ /* 2 bytes: 00000aaa aabbbbbb */
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
+ return 2;
+ } else if (r <= 0xFFFF) {
+ /* 3 bytes: aaaabbbb bbcccccc */
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
+ return 3;
+ } else {
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
+ return 4;
+ }
+}
+
+static int
+namedentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ static const struct {
+ char *entity;
+ int c;
+ } entities[] = {
+ { "&", '&' },
+ { "<", '<' },
+ { ">", '>' },
+ { "'", '\'' },
+ { """, '"' },
+ { "&", '&' },
+ { "<", '<' },
+ { ">", '>' },
+ { "&APOS;", '\'' },
+ { """, '"' }
+ };
+ size_t i;
+
+ /* buffer is too small */
+ if (bufsiz < 2)
+ return -1;
+
+ /* doesn't start with &: can't match */
+ if (*e != '&')
+ return 0;
+
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
+ if (!strcmp(e, entities[i].entity)) {
+ buf[0] = entities[i].c;
+ buf[1] = '\0';
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+numericentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ long l;
+ int len;
+ char *end;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ /* not a numeric entity */
+ if (e[0] != '&' || e[1] != '#')
+ return 0;
+
+ /* e[1] == '#', numeric / hexadecimal entity */
+ e += 2; /* skip "&#" */
+ errno = 0;
+ /* hex (16) or decimal (10) */
+ if (*e == 'x')
+ l = strtoul(e + 1, &end, 16);
+ else
+ l = strtoul(e, &end, 10);
+ /* invalid value or not a well-formed entity or too high codepoint */
+ if (errno || *end != ';' || l > 0x10FFFF)
+ return 0;
+ len = codepointtoutf8(l, buf);
+ buf[len] = '\0';
+
+ return len;
+}
+
+/* convert named- or numeric entity string to buffer string
+ * returns byte-length of string. */
+int
+xml_entitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+ /* doesn't start with & */
+ if (e[0] != '&')
+ return 0;
+ /* named entity */
+ if (e[1] != '#')
+ return namedentitytostr(e, buf, bufsiz);
+ else /* numeric entity */
+ return numericentitytostr(e, buf, bufsiz);
+}
+
+void
+xml_parse(XMLParser *x)
+{
+ size_t datalen, tagdatalen;
+ int c, isend;
+
+ if (!x->getnext)
+ return;
+ while ((c = x->getnext()) != EOF && c != '<')
+ ; /* skip until < */
+
+ while (c != EOF) {
+ if (c == '<') { /* parse tag */
+ if ((c = x->getnext()) == EOF)
+ return;
+
+ if (c == '!') { /* cdata and comments */
+ for (tagdatalen = 0; (c = x->getnext()) != EOF;) {
+ /* NOTE: sizeof(x->data) must be atleast sizeof("[CDATA[") */
+ if (tagdatalen <= sizeof("[CDATA[") - 1)
+ x->data[tagdatalen++] = c;
+ if (c == '>')
+ break;
+ else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
+ (x->data[0] == '-')) {
+ xml_parsecomment(x);
+ break;
+ } else if (c == '[') {
+ if (tagdatalen == sizeof("[CDATA[") - 1 &&
+ !strncmp(x->data, "[CDATA[", tagdatalen)) {
+ xml_parsecdata(x);
+ break;
+ }
+ }
+ }
+ } else {
+ /* normal tag (open, short open, close), processing instruction. */
+ x->tag[0] = c;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, don't strip "?" prefix. */
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = x->getnext()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
+ while ((c = x->getnext()) != EOF) {
+ if (c == '/')
+ x->isshorttag = 1; /* short tag */
+ else if (c == '>' || isspace(c)) {
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, starts with </ */
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ } else {
+ /* start tag */
+ if (x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen);
+ if (isspace(c))
+ xml_parseattrs(x);
+ if (x->xmltagstartparsed)
+ x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
+ }
+ /* call tagend for shortform or processing instruction */
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
+ break;
+ } else if (x->taglen < sizeof(x->tag) - 1)
+ x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
+ }
+ }
+ } else {
+ /* parse tag data */
+ datalen = 0;
+ if (x->xmldatastart)
+ x->xmldatastart(x);
+ while ((c = x->getnext()) != EOF) {
+ if (c == '&') {
+ if (datalen) {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while ((c = x->getnext()) != EOF) {
+ if (c == '<')
+ break;
+ if (datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[datalen] = '\0';
+ if (x->xmldataentity)
+ x->xmldataentity(x, x->data, datalen);
+ datalen = 0;
+ break;
+ }
+ }
+ } else if (c != '<') {
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if (c == '<') {
+ x->data[datalen] = '\0';
+ if (x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ if (x->xmldataend)
+ x->xmldataend(x);
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/xml.h b/xml.h
@@ -0,0 +1,40 @@
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmlattr)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlcdatastart)(struct xmlparser *);
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t);
+ void (*xmlcdataend)(struct xmlparser *);
+ void (*xmlcommentstart)(struct xmlparser *);
+ void (*xmlcomment)(struct xmlparser *, const char *, size_t);
+ void (*xmlcommentend)(struct xmlparser *);
+ void (*xmldata)(struct xmlparser *, const char *, size_t);
+ void (*xmldataend)(struct xmlparser *);
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t);
+ void (*xmldatastart)(struct xmlparser *);
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+ void (*xmltagstartparsed)(struct xmlparser *, const char *,
+ size_t, int);
+
+ int (*getnext)(void);
+
+ /* current tag */
+ char tag[1024];
+ size_t taglen;
+ /* current tag is in short form ? <tag /> */
+ int isshorttag;
+ /* current attribute name */
+ char name[1024];
+ /* data buffer used for tag data, cdata and attribute data */
+ char data[BUFSIZ];
+} XMLParser;
+
+int xml_entitytostr(const char *, char *, size_t);
+void xml_parse(XMLParser *);
diff --git a/youtube.c b/youtube.c
@@ -0,0 +1,535 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <tls.h>
+
+#include "youtube.h"
+#include "xml.h"
+
+#define READ_BUF_SIZ 16384 /* read buffer in bytes */
+#define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */
+#define MAX_RESPONSESIZ 500000 /* max download size in bytes */
+
+#define STRP(s) s,sizeof(s)-1
+
+static const int maxvideos = 30;
+static struct video videos[maxvideos + 1];
+static int nvideos;
+
+/* temporary variables to copy for states */
+static char id[256], userid[256];
+
+/* states */
+static int metainfocount;
+static enum ItemState {
+ None = 0,
+ Item = 1, Pager = 2,
+ Metainfo = 4, Title = 8, User = 16, Videotime = 32,
+} state;
+
+/* data buffers, size and offset used for parsing XML, see getnext() */
+static char *responsedata;
+static size_t responsesize;
+static size_t responseoff;
+
+/* ? TODO: don't die in youtube.c ? */
+static void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+static int
+hexdigit(int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ return 0;
+}
+
+/* decode until NUL separator or end of "key". */
+static int
+decodeparam(char *buf, size_t bufsiz, const char *s)
+{
+ size_t i;
+
+ if (!bufsiz)
+ return -1;
+
+ for (i = 0; *s && *s != '&'; s++) {
+ if (i + 3 >= bufsiz)
+ return -1;
+ switch (*s) {
+ case '%':
+ if (!isxdigit(*(s+1)) || !isxdigit(*(s+2)))
+ return -1;
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
+ s += 2;
+ break;
+ case '+':
+ buf[i++] = ' ';
+ break;
+ default:
+ buf[i++] = *s;
+ break;
+ }
+ }
+ buf[i] = '\0';
+
+ return i;
+}
+
+static char *
+getparam(const char *query, const char *s)
+{
+ const char *p, *last = NULL;
+ size_t len;
+
+ len = strlen(s);
+ for (p = query; (p = strstr(p, s)); p += len) {
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?'))
+ last = p + len + 1;
+ }
+
+ return (char *)last;
+}
+
+static int
+isclassmatch(const char *classes, const char *clss, size_t len)
+{
+ const char *p;
+
+ if (!(p = strstr(classes, clss)))
+ return 0;
+ return (p == classes || isspace((unsigned char)p[-1])) &&
+ (isspace((unsigned char)p[len]) || !p[len]);
+}
+
+/* XML/HTML entity conversion */
+static const char *
+entitytostr(const char *s)
+{
+ static char buf[16];
+ ssize_t len;
+
+ if ((len = xml_entitytostr(s, buf, sizeof(buf))) > 0)
+ return buf;
+
+ return s;
+}
+
+void
+xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
+ const char *v, size_t vl)
+{
+ /* grouped channel index, used for channelid and channel title */
+ static int grouped = -1;
+
+ if (!strcmp(t, "div") && !strcmp(a, "class") && isclassmatch(v, STRP("search-pager"))) {
+ /* last video */
+ if (videos[nvideos].linktype && nvideos < maxvideos) {
+ if (grouped != -1 && !videos[nvideos].channelid[0]) {
+ strlcpy(videos[nvideos].channelid, videos[grouped].channelid, sizeof(videos[nvideos].channelid));
+ strlcpy(videos[nvideos].channeltitle, videos[grouped].channeltitle, sizeof(videos[nvideos].channeltitle));
+ }
+ nvideos++;
+ }
+ state &= ~Item;
+ state |= Pager;
+ }
+
+ if (nvideos >= maxvideos)
+ return;
+
+ if (!strcmp(t, "div") && !strcmp(a, "class") &&
+ isclassmatch(v, STRP("yt-lockup"))) {
+ state |= Item;
+ if (videos[nvideos].linktype) {
+ if (videos[nvideos].channelid[0] || videos[nvideos].userid[0] ||
+ videos[nvideos].linktype != Video)
+ grouped = -1;
+ if (videos[nvideos].linktype == Channel)
+ grouped = nvideos;
+ if (grouped != -1 && !videos[nvideos].channelid[0]) {
+ strlcpy(videos[nvideos].channelid, videos[grouped].channelid, sizeof(videos[nvideos].channelid));
+ strlcpy(videos[nvideos].channeltitle, videos[grouped].channeltitle, sizeof(videos[nvideos].channeltitle));
+ }
+ nvideos++;
+ }
+ if (strstr(v, " yt-lockup-channel "))
+ videos[nvideos].linktype = Channel;
+ else if (strstr(v, "yt-lockup-movie-"))
+ videos[nvideos].linktype = Movie;
+ else if (strstr(v, " yt-lockup-playlist "))
+ videos[nvideos].linktype = Playlist;
+ if (strstr(v, " yt-lockup-video "))
+ videos[nvideos].linktype = Video;
+ }
+ if (!(state & Item))
+ return;
+
+ if (!strcmp(t, "span") && !strcmp(a, "class") && isclassmatch(v, STRP("video-time")))
+ state |= Videotime;
+ if (!strcmp(t, "ul") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-meta-info"))) {
+ state |= Metainfo;
+ metainfocount = 0;
+ }
+ if (!strcmp(t, "h3") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-title")))
+ state |= Title;
+ if (!strcmp(t, "div") && !strcmp(a, "class") && isclassmatch(v, STRP("yt-lockup-byline")))
+ state |= User;
+
+ if ((state & Title) && !strcmp(t, "a") && !strcmp(a, "title")) {
+ if (videos[nvideos].linktype == Channel)
+ strlcat(videos[nvideos].channeltitle, v, sizeof(videos[nvideos].channeltitle));
+ else
+ strlcat(videos[nvideos].title, v, sizeof(videos[nvideos].title));
+ }
+
+ if ((state & Title) && !strcmp(t, "a") && !strcmp(a, "href"))
+ strlcat(id, v, sizeof(id));
+
+ if (!strcmp(t, "button") && !strcmp(a, "data-channel-external-id"))
+ strlcat(videos[nvideos].channelid, v, sizeof(videos[nvideos].channelid));
+
+ if ((state & User) && !strcmp(t, "a") && !strcmp(a, "href"))
+ strlcat(userid, v, sizeof(userid));
+}
+
+void
+xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
+ const char *v, size_t vl)
+{
+ const char *s;
+
+ if (!(state & Pager) && nvideos >= maxvideos)
+ return;
+
+ s = entitytostr(v);
+ xmlattr(x, t, tl, a, al, s, strlen(s));
+}
+
+void
+xmldata(XMLParser *x, const char *d, size_t dl)
+{
+ if ((state & Pager))
+ return;
+
+ /* optimization: no need to process and must not process videos after this */
+ if (!state || nvideos >= maxvideos)
+ return;
+
+ /* use parsed link type for meta info since this metainfo differs per type like:
+ channel, playlist, video */
+ if ((state & Metainfo)) {
+ switch (videos[nvideos].linktype) {
+ case Channel:
+ if (metainfocount == 1)
+ strlcat(videos[nvideos].channelvideos, d, sizeof(videos[nvideos].channelvideos));
+ break;
+ default:
+ if (metainfocount == 1)
+ strlcat(videos[nvideos].publishedat, d, sizeof(videos[nvideos].publishedat));
+ else if (metainfocount == 2)
+ strlcat(videos[nvideos].viewcount, d, sizeof(videos[nvideos].viewcount));
+ }
+ }
+ if ((state & Videotime) && !strcmp(x->tag, "span"))
+ strlcat(videos[nvideos].duration, d, sizeof(videos[nvideos].duration));
+ if ((state & User) && !strcmp(x->tag, "a"))
+ strlcat(videos[nvideos].channeltitle, d, sizeof(videos[nvideos].channeltitle));
+}
+
+void
+xmldataentity(XMLParser *x, const char *d, size_t dl)
+{
+ const char *s;
+
+ /* optimization: no need for entity conversion */
+ if (!state || nvideos >= maxvideos)
+ return;
+
+ s = entitytostr(d);
+ xmldata(x, s, strlen(s));
+}
+
+void
+xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
+{
+ char *p;
+
+ if ((state & Metainfo) && !strcmp(t, "ul"))
+ state &= ~Metainfo;
+ if ((state & Title) && !strcmp(t, "h3")) {
+ state &= ~Title;
+
+ if (nvideos >= maxvideos)
+ return;
+
+ if (!strncmp(id, "/watch", sizeof("/watch") - 1)) {
+ if (!videos[nvideos].linktype)
+ videos[nvideos].linktype = Video;
+ if ((p = getparam(id, "v"))) {
+ if (decodeparam(videos[nvideos].id, sizeof(videos[nvideos].id), p) == -1)
+ videos[nvideos].id[0] = '\0';
+ }
+ }
+
+ id[0] = '\0';
+ }
+ if ((state & User)) {
+ state &= ~User;
+
+ if (nvideos >= maxvideos)
+ return;
+
+ /* can be user or channel */
+ if (!strncmp(userid, "/channel/", sizeof("/channel/") - 1)) {
+ strlcpy(videos[nvideos].channelid,
+ userid + sizeof("/channel/") - 1,
+ sizeof(videos[nvideos].channelid));
+ } else if (!strncmp(userid, "/user/", sizeof("/user/") - 1)) {
+ strlcpy(videos[nvideos].userid,
+ userid + sizeof("/user/") - 1,
+ sizeof(videos[nvideos].userid));
+ }
+
+ userid[0] = '\0';
+ }
+ if ((state & Videotime))
+ state &= ~Videotime;
+}
+
+void
+xmltagstart(XMLParser *x, const char *t, size_t tl)
+{
+ if ((state & Metainfo) && !strcmp(t, "li"))
+ metainfocount++;
+}
+
+char *
+readtls(struct tls *t)
+{
+ char *buf;
+ size_t len = 0, size = 0;
+ ssize_t r;
+
+ /* always allocate an empty buffer */
+ if (!(buf = calloc(1, size + 1)))
+ die("calloc: %s\n", strerror(errno));
+
+ while (1) {
+ if (len + READ_BUF_SIZ + 1 > size) {
+ /* allocate size: common case is small textfiles */
+ size += READ_BUF_SIZ;
+ if (!(buf = realloc(buf, size + 1)))
+ die("realloc: %s\n", strerror(errno));
+ }
+ if ((r = tls_read(t, &buf[len], READ_BUF_SIZ)) <= 0)
+ break;
+ len += r;
+ buf[len] = '\0';
+ if (len > MAX_RESPONSESIZ)
+ die("response is too big: > %zu bytes\n", MAX_RESPONSESIZ);
+ }
+ if (r < 0)
+ die("tls_read: %s\n", tls_error(t));
+
+ return buf;
+}
+
+int
+edial(const char *host, const char *port)
+{
+ struct addrinfo hints, *res, *res0;
+ int error, save_errno, s;
+ const char *cause = NULL;
+ struct timeval timeout;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
+ if ((error = getaddrinfo(host, port, &hints, &res0)))
+ die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
+ s = -1;
+ for (res = res0; res; res = res->ai_next) {
+ s = socket(res->ai_family, res->ai_socktype,
+ res->ai_protocol);
+ if (s == -1) {
+ cause = "socket";
+ continue;
+ }
+
+ timeout.tv_sec = MAX_RESPONSETIMEOUT;
+ timeout.tv_usec = 0;
+ if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1)
+ die("%s: setsockopt: %s\n", __func__, strerror(errno));
+
+ timeout.tv_sec = MAX_RESPONSETIMEOUT;
+ timeout.tv_usec = 0;
+ if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1)
+ die("%s: setsockopt: %s\n", __func__, strerror(errno));
+
+ if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
+ cause = "connect";
+ save_errno = errno;
+ close(s);
+ errno = save_errno;
+ s = -1;
+ continue;
+ }
+ break;
+ }
+ if (s == -1)
+ die("%s: %s: %s:%s\n", __func__, cause, host, port);
+ freeaddrinfo(res0);
+
+ return s;
+}
+
+char *
+request(const char *path)
+{
+ struct tls *t;
+ const char *host = "www.youtube.com";
+ char request[4096];
+ char *data;
+ ssize_t w;
+ int fd;
+
+ /* use HTTP/1.0, don't use HTTP/1.1 using ugly chunked-encoding */
+ snprintf(request, sizeof(request),
+ "GET %s HTTP/1.0\r\n"
+ "Host: %s\r\n"
+ "Accept-Language: en-US\r\n" // TODO: better one.
+ "Connection: close\r\n"
+ "\r\n", path, host);
+
+ if (tls_init() == -1)
+ die("tls_init\n");
+
+ if (!(t = tls_client()))
+ die("tls_client: %s\n", tls_error(t));
+
+ fd = edial(host, "443");
+
+ if (tls_connect_socket(t, fd, host) == -1)
+ die("tls_connect: %s\n", tls_error(t));
+
+ if ((w = tls_write(t, request, strlen(request))) < 0)
+ die("tls_write: %s\n", tls_error(t));
+
+ data = readtls(t);
+
+ tls_close(t);
+ tls_free(t);
+
+ return data;
+}
+
+char *
+request_search(const char *s, const char *chan, const char *user,
+ const char *page, const char *order)
+{
+ char path[4096];
+
+ /* when searching in channel or user but the search string is empty:
+ fake a search with a single space. */
+ if ((chan[0] || user[0]) && !s[0])
+ s = "+";
+
+ if (user[0])
+ snprintf(path, sizeof(path), "/user/%s/search?query=%s", user, s);
+ else if (chan[0])
+ snprintf(path, sizeof(path), "/channel/%s/search?query=%s", chan, s);
+ else
+ snprintf(path, sizeof(path), "/results?search_query=%s", s);
+
+ if (page[0]) {
+ strlcat(path, "&page=", sizeof(path));
+ strlcat(path, page, sizeof(path));
+ }
+
+ if (order[0]) {
+ strlcat(path, "&search_sort=", sizeof(path));
+ if (!strcmp(order, "date"))
+ strlcat(path, "video_date_uploaded", sizeof(path));
+ else if (!strcmp(order, "relevance"))
+ strlcat(path, "video_relevance", sizeof(path));
+ else if (!strcmp(order, "views"))
+ strlcat(path, "video_view_count", sizeof(path));
+ }
+
+ /* check if request is too long */
+ if (strlen(path) >= sizeof(path) - 1)
+ return NULL;
+
+ return request(path);
+}
+
+int
+getnext(void)
+{
+ if (responseoff >= responsesize)
+ return EOF;
+ return responsedata[responseoff++];
+}
+
+/* TODO: ? keep search state in some separate context
+ like responsedata, responsesize.
+ */
+struct video *
+youtube_search(int *nretvideos,
+ const char *rawsearch, const char *chan, const char *user,
+ const char *page, const char *order)
+{
+ XMLParser x = { 0 };
+ char *data, *s;
+
+ *nretvideos = -1;
+
+ if (!(data = request_search(rawsearch, chan, user, page, order)))
+ return NULL;
+ if (!(s = strstr(data, "\r\n\r\n")))
+ return NULL; /* invalid response */
+ s += strlen("\r\n\r\n");
+
+ responsedata = s;
+ responsesize = strlen(s);
+
+ x.xmlattr = xmlattr;
+ x.xmlattrentity = xmlattrentity;
+ x.xmldata = xmldata;
+ x.xmldataentity = xmldataentity;
+ x.xmltagend = xmltagend;
+ x.xmltagstart = xmltagstart;
+
+ x.getnext = getnext;
+
+ xml_parse(&x);
+
+ *nretvideos = nvideos;
+
+ return videos;
+}
diff --git a/youtube.h b/youtube.h
@@ -0,0 +1,17 @@
+struct video {
+ enum LinkType { Unknown = 0, Channel, Movie, Playlist, Video } linktype;
+ char id[32];
+ char title[1024];
+ char channeltitle[1024];
+ char channelid[256];
+ char userid[256];
+ char publishedat[32];
+ char viewcount[32];
+ char duration[32];
+ char channelvideos[32]; /* for channel */
+};
+
+struct video *
+youtube_search(int *nretvideos,
+ const char *rawsearch, const char *chan, const char *user,
+ const char *page, const char *order);