Buy commercial curl support from WolfSSL. We help you work
out your issues, debug your libcurl applications, use the API, port to new
platforms, add new features and more. With a team lead by the curl founder
himself.
[PATCH] New protocol: gemini
- Contemporary messages sorted: [ by date ] [ by thread ] [ by subject ] [ by author ] [ by messages with attachments ]
From: Dmitry Bogatov via curl-library <curl-library_at_cool.haxx.se>
Date: Fri, 27 Nov 2020 00:00:02 -0500
This patch implements gemini protocol as described in specification[^1],
with following pieces missing:
* Redirects. 3x status codes are not handled any specially from any
other status code that has no body, -L option has no effect on gemini
URLs.
* Certificate verification. Gemini servers rarely use certificates with
trust chain from certificates in /etc/ssl/certs; self-signed
certificates are the norm. Option -k should be the default for gemini
protocol.
Other than that, things works:
$ make
$ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
20 text/gemini
# Directory listing
=> /~kaction ..
=> flake-dhall/ flake-dhall/ Nov 22 2020
[^1]: https://gemini.circumlunar.space/docs/specification.html
Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v1_at_kaction.cc>
Date: Fri, 27 Nov 2020 00:00:02 -0500
This patch implements gemini protocol as described in specification[^1],
with following pieces missing:
* Redirects. 3x status codes are not handled any specially from any
other status code that has no body, -L option has no effect on gemini
URLs.
* Certificate verification. Gemini servers rarely use certificates with
trust chain from certificates in /etc/ssl/certs; self-signed
certificates are the norm. Option -k should be the default for gemini
protocol.
Other than that, things works:
$ make
$ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
20 text/gemini
# Directory listing
=> /~kaction ..
=> flake-dhall/ flake-dhall/ Nov 22 2020
[^1]: https://gemini.circumlunar.space/docs/specification.html
Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v1_at_kaction.cc>
--- Notes: I would like to point to following lines in gemini.c: err = Curl_read(conn, sockfd, into, more, &amount); /* XXX: This conditional can probably be eliminated by fixing * doing_get_proto function, but I do not know how. */ if(err == CURLE_AGAIN) return CURLE_OK; I read gopher.c and http.c, and there is no check for CURLE_AGAIN there, so probably gemini.c can be improved; yet, if I remove this check curl(1) fails with error that socket is not ready for send/recv. Also, about redirects, I tried to call Curl_follow if (status == '3'), but it seems to be specific to HTTP(s) and did not result in expected behaviour with -L flag. Review and advices on how to improve patch are welcome. include/curl/curl.h | 1 + lib/Makefile.inc | 4 +- lib/gemini.c | 269 ++++++++++++++++++++++++++++++++++++++++++++ lib/gemini.h | 55 +++++++++ lib/url.c | 5 + lib/urldata.h | 2 + 6 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 lib/gemini.c create mode 100644 lib/gemini.h diff --git a/include/curl/curl.h b/include/curl/curl.h index a73418dce..d2b29ab91 100644 --- a/include/curl/curl.h +++ b/include/curl/curl.h _at__at_ -1015,6 +1015,7 _at__at_ typedef CURLSTScode (*curl_hstswrite_callback)(CURL *easy, #define CURLPROTO_SMB (1<<26) #define CURLPROTO_SMBS (1<<27) #define CURLPROTO_MQTT (1<<28) +#define CURLPROTO_GEMINI (1<<29) #define CURLPROTO_ALL (~0) /* enable everything */ /* long may be 32 or 64 bits, but we should never depend on anything else diff --git a/lib/Makefile.inc b/lib/Makefile.inc index 6d35704c0..4a9a3145e 100644 --- a/lib/Makefile.inc +++ b/lib/Makefile.inc _at__at_ -61,7 +61,7 _at__at_ LIB_CFILES = altsvc.c amigaos.c asyn-ares.c asyn-thread.c base64.c \ socks_gssapi.c socks_sspi.c speedcheck.c splay.c strcase.c strdup.c \ strerror.c strtok.c strtoofft.c system_win32.c telnet.c tftp.c timeval.c \ transfer.c urlapi.c version.c warnless.c wildcard.c x509asn1.c dynbuf.c \ - version_win32.c easyoptions.c easygetopt.c hsts.c + version_win32.c easyoptions.c easygetopt.c hsts.c gemini.c LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h \ content_encoding.h cookie.h curl_addrinfo.h curl_base64.h curl_ctype.h \ _at__at_ -80,7 +80,7 _at__at_ LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h \ smb.h smtp.h sockaddr.h socketpair.h socks.h speedcheck.h splay.h strcase.h \ strdup.h strerror.h strtok.h strtoofft.h system_win32.h telnet.h tftp.h \ timeval.h transfer.h urlapi-int.h urldata.h warnless.h wildcard.h \ - x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h + x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h gemini.h LIB_RCFILES = libcurl.rc diff --git a/lib/gemini.c b/lib/gemini.c new file mode 100644 index 000000000..b1eb9aeff --- /dev/null +++ b/lib/gemini.c _at__at_ -0,0 +1,269 _at__at_ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#include "curl_setup.h" + +#if !defined CURL_DISABLE_GEMINI && defined USE_SSL + +#include <ctype.h> +#include <string.h> +#include "gemini.h" +#include "urldata.h" +#include "vtls/vtls.h" +#include <curl/curl.h> +#include "transfer.h" +#include "sendf.h" +#include "connect.h" +#include "multiif.h" +#include "progress.h" +#include "gopher.h" +#include "select.h" +#include "strdup.h" +#include "url.h" +#include "escape.h" +#include "warnless.h" +#include "curl_printf.h" +#include "curl_memory.h" +/* The last #include file should be: */ +#include "memdebug.h" + +static char *gemini_request(const struct urlpieces *up) +{ + if(up->query) + return aprintf("gemini://%s%s?%s\r\n", up->hostname, up->path, up->query); + else + return aprintf("gemini://%s%s\r\n", up->hostname, up->path); +} + +static CURLcode gemini_setup_connection(struct connectdata *conn) +{ + struct GEMINI *gemini; + struct Curl_easy *data = conn->data; + DEBUGASSERT(data->req.p.gemini == NULL); + + gemini = calloc(1, sizeof(struct GEMINI)); + if(!gemini) + return CURLE_OUT_OF_MEMORY; + data->req.p.gemini = gemini; + return CURLE_OK; +} + +static CURLcode gemini_connecting(struct connectdata *conn, bool *done) +{ + return Curl_ssl_connect_nonblocking(conn, FIRSTSOCKET, done); +} + +static CURLcode gemini_do_it(struct connectdata *conn, bool *done) +{ + struct GEMINI *gemini; + struct Curl_easy *data; + char *request; + + data = conn->data; + request = gemini_request(&data->state.up); + + if(!request) + return CURLE_OUT_OF_MEMORY; + + gemini = data->req.p.gemini; + gemini->request.data = request; + gemini->request.amount_total = strlen(request); + gemini->request.amount_sent = 0; + + /* Real work happens in gemini_doing, so we can use non-blocking + * functions and avoid busy loops. + */ + + + return CURLE_OK; +} + +static CURLcode gemini_doing_finish(struct connectdata *, bool *); +static CURLcode gemini_doing(struct connectdata *conn, bool *done) +{ + CURLcode err; + curl_socket_t sockfd; + struct GEMINI *gemini; + size_t more; + size_t sent; + size_t amount; + + gemini = conn->data->req.p.gemini; + sockfd = conn->sock[FIRSTSOCKET]; + + /* stage1: send request */ + sent = gemini->request.amount_sent; + more = gemini->request.amount_total - sent; + if(more) { + char *from; + + from = gemini->request.data + sent; + err = Curl_write(conn, sockfd, from, more, &amount); + if(err) + return err; + + gemini->request.amount_sent += amount; + more -= amount; + + if(more) + return CURLE_OK; + } + + /* stage2: read block big enough to contain header */ + if(!gemini->block.done) { + char *into; + + into = gemini->block.data + gemini->block.amount; + more = GEMINI_RESPONSE_BUFSIZE - gemini->block.amount; + + err = Curl_read(conn, sockfd, into, more, &amount); + + /* XXX: This conditional can probably be eliminated by fixing + * doing_get_proto function, but I do not know how. + */ + if(err == CURLE_AGAIN) + return CURLE_OK; + + if(err) + return err; + + gemini->block.amount += amount; + more -= amount; + + /* !more means that we succesfully read GEMINI_RESPONSE_BUFSIZE bytes. + * !amount means that there is no more data. It is quite possible + * for whole response, header + body combined to be less than + * GEMINI_RESPONSE_BUFSIZE bytes big. + */ + + if(!amount || !more) + gemini->block.done = TRUE; + + /* Optimization: We check for LF, and skip reading more when it is + * found. Curl main engine adds noticable delays between + * invokactions of "doing" function, so it is desirable to get + * things done in as little calls to "doing" function, as possible, + * but without busy looping on socket that is not yet ready. + * + * For many servers first read returns exacly header, because it is + * natural thing to do on server side, although we can't rely on it. + * But this is reason why it does not worth to optimize search by + * keeping track of old {amount} value and searching only in bytes + * just read. + */ + gemini->block.lf = memchr(gemini->block.data, '\n', gemini->block.amount); + if(!gemini->block.lf && gemini->block.done) + return CURLE_WEIRD_SERVER_REPLY; + + if(gemini->block.lf) + gemini->block.done = TRUE; + + if(!gemini->block.done) + return CURLE_OK; + } + + return gemini_doing_finish(conn, done); +} + +static CURLcode gemini_doing_finish(struct connectdata *conn, bool *done) +{ + CURLcode err; + struct GEMINI *gemini; + char *block; + struct Curl_easy *data; + size_t amount; + size_t hsize; + char status; + char *lf; + + data = conn->data; + gemini = data->req.p.gemini; + block = gemini->block.data; + amount = gemini->block.amount; + lf = gemini->block.lf; + + if(!amount) + return CURLE_GOT_NOTHING; + + /* Two digit status, space, empty meta string and \r\n at least. */ + if(amount < 5) + return CURLE_WEIRD_SERVER_REPLY; + if(block[2] != ' ' || !isdigit(block[0]) || !isdigit(block[1])) + return CURLE_WEIRD_SERVER_REPLY; + + /* We already checked that first byte is digit, so {lf} can't point to + * first byte of buffer and {cr} can't underrun buffer. + */ + if(*(lf - 1) != '\r') { + return CURLE_WEIRD_SERVER_REPLY; + } + + hsize = lf - block + 1; + err = Curl_client_write(conn, CLIENTWRITE_HEADER, block, hsize); + if(err) + return err; + + status = block[0]; + if(status != '2') { /* TODO: handle redirects */ + *done = TRUE; + return CURLE_OK; + } + + err = Curl_client_write(conn, CLIENTWRITE_BODY, block + hsize, + amount - hsize); + if(err) + return err; + + *done = TRUE; + Curl_setup_transfer(data, FIRSTSOCKET, -1, FALSE, -1); + return CURLE_OK; +} + +static int gemini_doing_getsock(struct connectdata *conn, curl_socket_t *socks) +{ + socks[0] = conn->sock[FIRSTSOCKET]; + return GETSOCK_READSOCK(0) | GETSOCK_WRITESOCK(0); +} + +const struct Curl_handler Curl_handler_gemini = { + "GEMINI", /* scheme */ + gemini_setup_connection, /* setup_connection */ + gemini_do_it, /* do_it */ + ZERO_NULL, /* done */ + ZERO_NULL, /* do_more */ + gemini_connecting, /* connect_it */ + gemini_connecting, /* connecting */ + gemini_doing, /* doing */ + Curl_ssl_getsock, /* proto_getsock */ + gemini_doing_getsock, /* doing_getsock */ + ZERO_NULL, /* domore_getsock */ + ZERO_NULL, /* perform_getsock */ + ZERO_NULL, /* disconnect */ + ZERO_NULL, /* readwrite */ + ZERO_NULL, /* connection_check */ + PORT_GEMINI, /* defport */ + CURLPROTO_GEMINI, /* protocol */ + CURLPROTO_GEMINI, /* family */ + PROTOPT_SSL /* flags */ +}; + +#endif /*CURL_DISABLE_GEMINI*/ diff --git a/lib/gemini.h b/lib/gemini.h new file mode 100644 index 000000000..b9f27ee70 --- /dev/null +++ b/lib/gemini.h _at__at_ -0,0 +1,55 _at__at_ +#ifndef HEADER_CURL_GEMINI_H +#define HEADER_CURL_GEMINI_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#ifndef CURL_DISABLE_GEMINI +extern const struct Curl_handler Curl_handler_gemini; +#endif + +/* + * According to specification, response has following format: + * + * <STATUS><SPACE><META><CR><LF> + * + * and <META> is UTF-8 string up to 1024 bytes long, so buffer of + * size >= (2 + 1 + 1024 + 1 + 1) = 1029 is enough to read whole + * response header into memory. It is more efficient than reading + * byte-after-byte until \n is found. + */ +#define GEMINI_RESPONSE_BUFSIZE 1029 + +struct GEMINI { + struct { + char data[GEMINI_RESPONSE_BUFSIZE]; + size_t amount; /* Count of bytes read */ + bool done; + char *lf; /* Pointer to linefeed character in {data} */ + } block; + struct { + char *data; /* Allocated string */ + size_t amount_total; /* How many bytes in {data} */ + size_t amount_sent; /* How many bytes of it we already sent */ + } request; +}; + +#endif /* HEADER_CURL_GEMINI_H */ diff --git a/lib/url.c b/lib/url.c index f8b2a0030..7d4028161 100644 --- a/lib/url.c +++ b/lib/url.c _at__at_ -115,6 +115,7 _at__at_ bool curl_win32_idn_to_ascii(const char *in, char **out); #include "http_ntlm.h" #include "curl_rtmp.h" #include "gopher.h" +#include "gemini.h" #include "mqtt.h" #include "http_proxy.h" #include "conncache.h" _at__at_ -253,6 +254,10 _at__at_ static const struct Curl_handler * const protocols[] = { &Curl_handler_gopher, #endif +#if !defined CURL_DISABLE_GEMINI && defined USE_SSL + &Curl_handler_gemini, +#endif + #ifdef USE_LIBRTMP &Curl_handler_rtmp, &Curl_handler_rtmpt, diff --git a/lib/urldata.h b/lib/urldata.h index f085c093c..76ad00856 100644 --- a/lib/urldata.h +++ b/lib/urldata.h _at__at_ -49,6 +49,7 _at__at_ #define PORT_RTMPT PORT_HTTP #define PORT_RTMPS PORT_HTTPS #define PORT_GOPHER 70 +#define PORT_GEMINI 1965 #define PORT_MQTT 1883 #define DICT_MATCH "/MATCH:" _at__at_ -659,6 +660,7 _at__at_ struct SingleRequest { struct SMTP *smtp; struct SSHPROTO *ssh; struct TELNET *telnet; + struct GEMINI *gemini; } p; #ifndef CURL_DISABLE_DOH struct dohdata doh; /* DoH specific data for this request */ -- If possible, please keep mailing list in CC. It is public. ------------------------------------------------------------------- Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library Etiquette: https://curl.se/mail/etiquette.htmlReceived on 2020-11-28