Buy commercial curl support from WolfSSL. We help you work
out your issues, debug your libcurl applications, use the API, port to new
platforms, add new features and more. With a team lead by the curl founder
himself.
[PATCH] New protocol: gemini
- Contemporary messages sorted: [ by date ] [ by thread ] [ by subject ] [ by author ] [ by messages with attachments ]
From: Dmitry Bogatov via curl-library <curl-library_at_cool.haxx.se>
Date: Fri, 27 Nov 2020 00:00:02 -0500
This patch implements gemini protocol as described in specification[^1],
with following pieces missing:
* Redirects. 3x status codes are not handled any specially from any
other status code that has no body, -L option has no effect on gemini
URLs.
* Certificate verification. Gemini servers rarely use certificates with
trust chain from certificates in /etc/ssl/certs; self-signed
certificates are the norm. Option -k should be the default for gemini
protocol.
Other than that, things works:
$ make
$ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
20 text/gemini
# Directory listing
=> /~kaction ..
=> flake-dhall/ flake-dhall/ Nov 22 2020
[^1]: https://gemini.circumlunar.space/docs/specification.html
Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v1_at_kaction.cc>
Date: Fri, 27 Nov 2020 00:00:02 -0500
This patch implements gemini protocol as described in specification[^1],
with following pieces missing:
* Redirects. 3x status codes are not handled any specially from any
other status code that has no body, -L option has no effect on gemini
URLs.
* Certificate verification. Gemini servers rarely use certificates with
trust chain from certificates in /etc/ssl/certs; self-signed
certificates are the norm. Option -k should be the default for gemini
protocol.
Other than that, things works:
$ make
$ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
20 text/gemini
# Directory listing
=> /~kaction ..
=> flake-dhall/ flake-dhall/ Nov 22 2020
[^1]: https://gemini.circumlunar.space/docs/specification.html
Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v1_at_kaction.cc>
---
Notes:
I would like to point to following lines in gemini.c:
err = Curl_read(conn, sockfd, into, more, &amount);
/* XXX: This conditional can probably be eliminated by fixing
* doing_get_proto function, but I do not know how.
*/
if(err == CURLE_AGAIN)
return CURLE_OK;
I read gopher.c and http.c, and there is no check for CURLE_AGAIN there,
so probably gemini.c can be improved; yet, if I remove this check
curl(1) fails with error that socket is not ready for send/recv.
Also, about redirects, I tried to call Curl_follow if (status == '3'),
but it seems to be specific to HTTP(s) and did not result in expected
behaviour with -L flag.
Review and advices on how to improve patch are welcome.
include/curl/curl.h | 1 +
lib/Makefile.inc | 4 +-
lib/gemini.c | 269 ++++++++++++++++++++++++++++++++++++++++++++
lib/gemini.h | 55 +++++++++
lib/url.c | 5 +
lib/urldata.h | 2 +
6 files changed, 334 insertions(+), 2 deletions(-)
create mode 100644 lib/gemini.c
create mode 100644 lib/gemini.h
diff --git a/include/curl/curl.h b/include/curl/curl.h
index a73418dce..d2b29ab91 100644
--- a/include/curl/curl.h
+++ b/include/curl/curl.h
_at__at_ -1015,6 +1015,7 _at__at_ typedef CURLSTScode (*curl_hstswrite_callback)(CURL *easy,
#define CURLPROTO_SMB (1<<26)
#define CURLPROTO_SMBS (1<<27)
#define CURLPROTO_MQTT (1<<28)
+#define CURLPROTO_GEMINI (1<<29)
#define CURLPROTO_ALL (~0) /* enable everything */
/* long may be 32 or 64 bits, but we should never depend on anything else
diff --git a/lib/Makefile.inc b/lib/Makefile.inc
index 6d35704c0..4a9a3145e 100644
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
_at__at_ -61,7 +61,7 _at__at_ LIB_CFILES = altsvc.c amigaos.c asyn-ares.c asyn-thread.c base64.c \
socks_gssapi.c socks_sspi.c speedcheck.c splay.c strcase.c strdup.c \
strerror.c strtok.c strtoofft.c system_win32.c telnet.c tftp.c timeval.c \
transfer.c urlapi.c version.c warnless.c wildcard.c x509asn1.c dynbuf.c \
- version_win32.c easyoptions.c easygetopt.c hsts.c
+ version_win32.c easyoptions.c easygetopt.c hsts.c gemini.c
LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h \
content_encoding.h cookie.h curl_addrinfo.h curl_base64.h curl_ctype.h \
_at__at_ -80,7 +80,7 _at__at_ LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h \
smb.h smtp.h sockaddr.h socketpair.h socks.h speedcheck.h splay.h strcase.h \
strdup.h strerror.h strtok.h strtoofft.h system_win32.h telnet.h tftp.h \
timeval.h transfer.h urlapi-int.h urldata.h warnless.h wildcard.h \
- x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h
+ x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h gemini.h
LIB_RCFILES = libcurl.rc
diff --git a/lib/gemini.c b/lib/gemini.c
new file mode 100644
index 000000000..b1eb9aeff
--- /dev/null
+++ b/lib/gemini.c
_at__at_ -0,0 +1,269 _at__at_
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+
+#include <ctype.h>
+#include <string.h>
+#include "gemini.h"
+#include "urldata.h"
+#include "vtls/vtls.h"
+#include <curl/curl.h>
+#include "transfer.h"
+#include "sendf.h"
+#include "connect.h"
+#include "multiif.h"
+#include "progress.h"
+#include "gopher.h"
+#include "select.h"
+#include "strdup.h"
+#include "url.h"
+#include "escape.h"
+#include "warnless.h"
+#include "curl_printf.h"
+#include "curl_memory.h"
+/* The last #include file should be: */
+#include "memdebug.h"
+
+static char *gemini_request(const struct urlpieces *up)
+{
+ if(up->query)
+ return aprintf("gemini://%s%s?%s\r\n", up->hostname, up->path, up->query);
+ else
+ return aprintf("gemini://%s%s\r\n", up->hostname, up->path);
+}
+
+static CURLcode gemini_setup_connection(struct connectdata *conn)
+{
+ struct GEMINI *gemini;
+ struct Curl_easy *data = conn->data;
+ DEBUGASSERT(data->req.p.gemini == NULL);
+
+ gemini = calloc(1, sizeof(struct GEMINI));
+ if(!gemini)
+ return CURLE_OUT_OF_MEMORY;
+ data->req.p.gemini = gemini;
+ return CURLE_OK;
+}
+
+static CURLcode gemini_connecting(struct connectdata *conn, bool *done)
+{
+ return Curl_ssl_connect_nonblocking(conn, FIRSTSOCKET, done);
+}
+
+static CURLcode gemini_do_it(struct connectdata *conn, bool *done)
+{
+ struct GEMINI *gemini;
+ struct Curl_easy *data;
+ char *request;
+
+ data = conn->data;
+ request = gemini_request(&data->state.up);
+
+ if(!request)
+ return CURLE_OUT_OF_MEMORY;
+
+ gemini = data->req.p.gemini;
+ gemini->request.data = request;
+ gemini->request.amount_total = strlen(request);
+ gemini->request.amount_sent = 0;
+
+ /* Real work happens in gemini_doing, so we can use non-blocking
+ * functions and avoid busy loops.
+ */
+
+
+ return CURLE_OK;
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *, bool *);
+static CURLcode gemini_doing(struct connectdata *conn, bool *done)
+{
+ CURLcode err;
+ curl_socket_t sockfd;
+ struct GEMINI *gemini;
+ size_t more;
+ size_t sent;
+ size_t amount;
+
+ gemini = conn->data->req.p.gemini;
+ sockfd = conn->sock[FIRSTSOCKET];
+
+ /* stage1: send request */
+ sent = gemini->request.amount_sent;
+ more = gemini->request.amount_total - sent;
+ if(more) {
+ char *from;
+
+ from = gemini->request.data + sent;
+ err = Curl_write(conn, sockfd, from, more, &amount);
+ if(err)
+ return err;
+
+ gemini->request.amount_sent += amount;
+ more -= amount;
+
+ if(more)
+ return CURLE_OK;
+ }
+
+ /* stage2: read block big enough to contain header */
+ if(!gemini->block.done) {
+ char *into;
+
+ into = gemini->block.data + gemini->block.amount;
+ more = GEMINI_RESPONSE_BUFSIZE - gemini->block.amount;
+
+ err = Curl_read(conn, sockfd, into, more, &amount);
+
+ /* XXX: This conditional can probably be eliminated by fixing
+ * doing_get_proto function, but I do not know how.
+ */
+ if(err == CURLE_AGAIN)
+ return CURLE_OK;
+
+ if(err)
+ return err;
+
+ gemini->block.amount += amount;
+ more -= amount;
+
+ /* !more means that we succesfully read GEMINI_RESPONSE_BUFSIZE bytes.
+ * !amount means that there is no more data. It is quite possible
+ * for whole response, header + body combined to be less than
+ * GEMINI_RESPONSE_BUFSIZE bytes big.
+ */
+
+ if(!amount || !more)
+ gemini->block.done = TRUE;
+
+ /* Optimization: We check for LF, and skip reading more when it is
+ * found. Curl main engine adds noticable delays between
+ * invokactions of "doing" function, so it is desirable to get
+ * things done in as little calls to "doing" function, as possible,
+ * but without busy looping on socket that is not yet ready.
+ *
+ * For many servers first read returns exacly header, because it is
+ * natural thing to do on server side, although we can't rely on it.
+ * But this is reason why it does not worth to optimize search by
+ * keeping track of old {amount} value and searching only in bytes
+ * just read.
+ */
+ gemini->block.lf = memchr(gemini->block.data, '\n', gemini->block.amount);
+ if(!gemini->block.lf && gemini->block.done)
+ return CURLE_WEIRD_SERVER_REPLY;
+
+ if(gemini->block.lf)
+ gemini->block.done = TRUE;
+
+ if(!gemini->block.done)
+ return CURLE_OK;
+ }
+
+ return gemini_doing_finish(conn, done);
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *conn, bool *done)
+{
+ CURLcode err;
+ struct GEMINI *gemini;
+ char *block;
+ struct Curl_easy *data;
+ size_t amount;
+ size_t hsize;
+ char status;
+ char *lf;
+
+ data = conn->data;
+ gemini = data->req.p.gemini;
+ block = gemini->block.data;
+ amount = gemini->block.amount;
+ lf = gemini->block.lf;
+
+ if(!amount)
+ return CURLE_GOT_NOTHING;
+
+ /* Two digit status, space, empty meta string and \r\n at least. */
+ if(amount < 5)
+ return CURLE_WEIRD_SERVER_REPLY;
+ if(block[2] != ' ' || !isdigit(block[0]) || !isdigit(block[1]))
+ return CURLE_WEIRD_SERVER_REPLY;
+
+ /* We already checked that first byte is digit, so {lf} can't point to
+ * first byte of buffer and {cr} can't underrun buffer.
+ */
+ if(*(lf - 1) != '\r') {
+ return CURLE_WEIRD_SERVER_REPLY;
+ }
+
+ hsize = lf - block + 1;
+ err = Curl_client_write(conn, CLIENTWRITE_HEADER, block, hsize);
+ if(err)
+ return err;
+
+ status = block[0];
+ if(status != '2') { /* TODO: handle redirects */
+ *done = TRUE;
+ return CURLE_OK;
+ }
+
+ err = Curl_client_write(conn, CLIENTWRITE_BODY, block + hsize,
+ amount - hsize);
+ if(err)
+ return err;
+
+ *done = TRUE;
+ Curl_setup_transfer(data, FIRSTSOCKET, -1, FALSE, -1);
+ return CURLE_OK;
+}
+
+static int gemini_doing_getsock(struct connectdata *conn, curl_socket_t *socks)
+{
+ socks[0] = conn->sock[FIRSTSOCKET];
+ return GETSOCK_READSOCK(0) | GETSOCK_WRITESOCK(0);
+}
+
+const struct Curl_handler Curl_handler_gemini = {
+ "GEMINI", /* scheme */
+ gemini_setup_connection, /* setup_connection */
+ gemini_do_it, /* do_it */
+ ZERO_NULL, /* done */
+ ZERO_NULL, /* do_more */
+ gemini_connecting, /* connect_it */
+ gemini_connecting, /* connecting */
+ gemini_doing, /* doing */
+ Curl_ssl_getsock, /* proto_getsock */
+ gemini_doing_getsock, /* doing_getsock */
+ ZERO_NULL, /* domore_getsock */
+ ZERO_NULL, /* perform_getsock */
+ ZERO_NULL, /* disconnect */
+ ZERO_NULL, /* readwrite */
+ ZERO_NULL, /* connection_check */
+ PORT_GEMINI, /* defport */
+ CURLPROTO_GEMINI, /* protocol */
+ CURLPROTO_GEMINI, /* family */
+ PROTOPT_SSL /* flags */
+};
+
+#endif /*CURL_DISABLE_GEMINI*/
diff --git a/lib/gemini.h b/lib/gemini.h
new file mode 100644
index 000000000..b9f27ee70
--- /dev/null
+++ b/lib/gemini.h
_at__at_ -0,0 +1,55 _at__at_
+#ifndef HEADER_CURL_GEMINI_H
+#define HEADER_CURL_GEMINI_H
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#ifndef CURL_DISABLE_GEMINI
+extern const struct Curl_handler Curl_handler_gemini;
+#endif
+
+/*
+ * According to specification, response has following format:
+ *
+ * <STATUS><SPACE><META><CR><LF>
+ *
+ * and <META> is UTF-8 string up to 1024 bytes long, so buffer of
+ * size >= (2 + 1 + 1024 + 1 + 1) = 1029 is enough to read whole
+ * response header into memory. It is more efficient than reading
+ * byte-after-byte until \n is found.
+ */
+#define GEMINI_RESPONSE_BUFSIZE 1029
+
+struct GEMINI {
+ struct {
+ char data[GEMINI_RESPONSE_BUFSIZE];
+ size_t amount; /* Count of bytes read */
+ bool done;
+ char *lf; /* Pointer to linefeed character in {data} */
+ } block;
+ struct {
+ char *data; /* Allocated string */
+ size_t amount_total; /* How many bytes in {data} */
+ size_t amount_sent; /* How many bytes of it we already sent */
+ } request;
+};
+
+#endif /* HEADER_CURL_GEMINI_H */
diff --git a/lib/url.c b/lib/url.c
index f8b2a0030..7d4028161 100644
--- a/lib/url.c
+++ b/lib/url.c
_at__at_ -115,6 +115,7 _at__at_ bool curl_win32_idn_to_ascii(const char *in, char **out);
#include "http_ntlm.h"
#include "curl_rtmp.h"
#include "gopher.h"
+#include "gemini.h"
#include "mqtt.h"
#include "http_proxy.h"
#include "conncache.h"
_at__at_ -253,6 +254,10 _at__at_ static const struct Curl_handler * const protocols[] = {
&Curl_handler_gopher,
#endif
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+ &Curl_handler_gemini,
+#endif
+
#ifdef USE_LIBRTMP
&Curl_handler_rtmp,
&Curl_handler_rtmpt,
diff --git a/lib/urldata.h b/lib/urldata.h
index f085c093c..76ad00856 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
_at__at_ -49,6 +49,7 _at__at_
#define PORT_RTMPT PORT_HTTP
#define PORT_RTMPS PORT_HTTPS
#define PORT_GOPHER 70
+#define PORT_GEMINI 1965
#define PORT_MQTT 1883
#define DICT_MATCH "/MATCH:"
_at__at_ -659,6 +660,7 _at__at_ struct SingleRequest {
struct SMTP *smtp;
struct SSHPROTO *ssh;
struct TELNET *telnet;
+ struct GEMINI *gemini;
} p;
#ifndef CURL_DISABLE_DOH
struct dohdata doh; /* DoH specific data for this request */
--
If possible, please keep mailing list in CC. It is public.
-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette: https://curl.se/mail/etiquette.html
Received on 2020-11-28