curl / Mailing Lists / curl-library / Single Mail
Buy commercial curl support from WolfSSL. We help you work out your issues, debug your libcurl applications, use the API, port to new platforms, add new features and more. With a team lead by the curl founder himself.

[PATCH] New protocol: gemini

From: Dmitry Bogatov via curl-library <curl-library_at_cool.haxx.se>
Date: Fri, 27 Nov 2020 00:00:02 -0500

This patch implements gemini protocol as described in specification[^1],
with following pieces missing:

 * Redirects. 3x status codes are not handled any specially from any
   other status code that has no body, -L option has no effect on gemini
   URLs.

 * Certificate verification. Gemini servers rarely use certificates with
   trust chain from certificates in /etc/ssl/certs; self-signed
   certificates are the norm. Option -k should be the default for gemini
   protocol.

Other than that, things works:

        $ make
        $ ./src/curl -k -D- gemini://tilde.pink/~kaction/dist/
        20 text/gemini
        # Directory listing

        => /~kaction ..
        => flake-dhall/ flake-dhall/ Nov 22 2020

 [^1]: https://gemini.circumlunar.space/docs/specification.html

Signed-off-by: Dmitry Bogatov <curl-library#cool.haxx.se#v1_at_kaction.cc>
---
Notes:
    I would like to point to following lines in gemini.c:
    
    	err = Curl_read(conn, sockfd, into, more, &amount);
    
    	/* XXX: This conditional can probably be eliminated by fixing
    	* doing_get_proto function, but I do not know how.
    	*/
    	if(err == CURLE_AGAIN)
    	  return CURLE_OK;
    
    I read gopher.c and http.c, and there is no check for CURLE_AGAIN there,
    so probably gemini.c can be improved; yet, if I remove this check
    curl(1) fails with error that socket is not ready for send/recv.
    
    Also, about redirects, I tried to call Curl_follow if (status == '3'),
    but it seems to be specific to HTTP(s) and did not result in expected
    behaviour with -L flag.
    
    Review and advices on how to improve patch are welcome.
 include/curl/curl.h |   1 +
 lib/Makefile.inc    |   4 +-
 lib/gemini.c        | 269 ++++++++++++++++++++++++++++++++++++++++++++
 lib/gemini.h        |  55 +++++++++
 lib/url.c           |   5 +
 lib/urldata.h       |   2 +
 6 files changed, 334 insertions(+), 2 deletions(-)
 create mode 100644 lib/gemini.c
 create mode 100644 lib/gemini.h
diff --git a/include/curl/curl.h b/include/curl/curl.h
index a73418dce..d2b29ab91 100644
--- a/include/curl/curl.h
+++ b/include/curl/curl.h
_at__at_ -1015,6 +1015,7 _at__at_ typedef CURLSTScode (*curl_hstswrite_callback)(CURL *easy,
 #define CURLPROTO_SMB    (1<<26)
 #define CURLPROTO_SMBS   (1<<27)
 #define CURLPROTO_MQTT   (1<<28)
+#define CURLPROTO_GEMINI (1<<29)
 #define CURLPROTO_ALL    (~0) /* enable everything */
 
 /* long may be 32 or 64 bits, but we should never depend on anything else
diff --git a/lib/Makefile.inc b/lib/Makefile.inc
index 6d35704c0..4a9a3145e 100644
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
_at__at_ -61,7 +61,7 _at__at_ LIB_CFILES = altsvc.c amigaos.c asyn-ares.c asyn-thread.c base64.c            \
   socks_gssapi.c socks_sspi.c speedcheck.c splay.c strcase.c strdup.c         \
   strerror.c strtok.c strtoofft.c system_win32.c telnet.c tftp.c timeval.c    \
   transfer.c urlapi.c version.c warnless.c wildcard.c x509asn1.c dynbuf.c     \
-  version_win32.c easyoptions.c easygetopt.c hsts.c
+  version_win32.c easyoptions.c easygetopt.c hsts.c gemini.c
 
 LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h    \
   content_encoding.h cookie.h curl_addrinfo.h curl_base64.h curl_ctype.h      \
_at__at_ -80,7 +80,7 _at__at_ LIB_HFILES = altsvc.h amigaos.h arpa_telnet.h asyn.h conncache.h connect.h    \
   smb.h smtp.h sockaddr.h socketpair.h socks.h speedcheck.h splay.h strcase.h \
   strdup.h strerror.h strtok.h strtoofft.h system_win32.h telnet.h tftp.h     \
   timeval.h transfer.h urlapi-int.h urldata.h warnless.h wildcard.h           \
-  x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h
+  x509asn1.h dynbuf.h version_win32.h easyoptions.h hsts.h gemini.h
 
 LIB_RCFILES = libcurl.rc
 
diff --git a/lib/gemini.c b/lib/gemini.c
new file mode 100644
index 000000000..b1eb9aeff
--- /dev/null
+++ b/lib/gemini.c
_at__at_ -0,0 +1,269 _at__at_
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+
+#include <ctype.h>
+#include <string.h>
+#include "gemini.h"
+#include "urldata.h"
+#include "vtls/vtls.h"
+#include <curl/curl.h>
+#include "transfer.h"
+#include "sendf.h"
+#include "connect.h"
+#include "multiif.h"
+#include "progress.h"
+#include "gopher.h"
+#include "select.h"
+#include "strdup.h"
+#include "url.h"
+#include "escape.h"
+#include "warnless.h"
+#include "curl_printf.h"
+#include "curl_memory.h"
+/* The last #include file should be: */
+#include "memdebug.h"
+
+static char *gemini_request(const struct urlpieces *up)
+{
+  if(up->query)
+    return aprintf("gemini://%s%s?%s\r\n", up->hostname, up->path, up->query);
+  else
+    return aprintf("gemini://%s%s\r\n", up->hostname, up->path);
+}
+
+static CURLcode gemini_setup_connection(struct connectdata *conn)
+{
+  struct GEMINI *gemini;
+  struct Curl_easy *data = conn->data;
+  DEBUGASSERT(data->req.p.gemini == NULL);
+
+  gemini = calloc(1, sizeof(struct GEMINI));
+  if(!gemini)
+    return CURLE_OUT_OF_MEMORY;
+  data->req.p.gemini = gemini;
+  return CURLE_OK;
+}
+
+static CURLcode gemini_connecting(struct connectdata *conn, bool *done)
+{
+  return Curl_ssl_connect_nonblocking(conn, FIRSTSOCKET, done);
+}
+
+static CURLcode gemini_do_it(struct connectdata *conn, bool *done)
+{
+  struct GEMINI *gemini;
+  struct Curl_easy *data;
+  char *request;
+
+  data = conn->data;
+  request = gemini_request(&data->state.up);
+
+  if(!request)
+    return CURLE_OUT_OF_MEMORY;
+
+  gemini = data->req.p.gemini;
+  gemini->request.data = request;
+  gemini->request.amount_total = strlen(request);
+  gemini->request.amount_sent = 0;
+
+  /* Real work happens in gemini_doing, so we can use non-blocking
+   * functions and avoid busy loops.
+   */
+
+
+  return CURLE_OK;
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *, bool *);
+static CURLcode gemini_doing(struct connectdata *conn, bool *done)
+{
+  CURLcode err;
+  curl_socket_t sockfd;
+  struct GEMINI *gemini;
+  size_t more;
+  size_t sent;
+  size_t amount;
+
+  gemini = conn->data->req.p.gemini;
+  sockfd = conn->sock[FIRSTSOCKET];
+
+  /* stage1: send request */
+  sent = gemini->request.amount_sent;
+  more = gemini->request.amount_total - sent;
+  if(more) {
+    char *from;
+
+    from = gemini->request.data + sent;
+    err = Curl_write(conn, sockfd, from, more, &amount);
+    if(err)
+      return err;
+
+    gemini->request.amount_sent += amount;
+    more -= amount;
+
+    if(more)
+      return CURLE_OK;
+  }
+
+  /* stage2: read block big enough to contain header */
+  if(!gemini->block.done) {
+    char *into;
+
+    into = gemini->block.data + gemini->block.amount;
+    more = GEMINI_RESPONSE_BUFSIZE - gemini->block.amount;
+
+    err = Curl_read(conn, sockfd, into, more, &amount);
+
+    /* XXX: This conditional can probably be eliminated by fixing
+     * doing_get_proto function, but I do not know how.
+     */
+    if(err == CURLE_AGAIN)
+      return CURLE_OK;
+
+    if(err)
+      return err;
+
+    gemini->block.amount += amount;
+    more -= amount;
+
+    /* !more means that we succesfully read GEMINI_RESPONSE_BUFSIZE bytes.
+     * !amount means that there is no more data. It is quite possible
+     * for whole response, header + body combined to be less than
+     * GEMINI_RESPONSE_BUFSIZE bytes big.
+     */
+
+    if(!amount || !more)
+      gemini->block.done = TRUE;
+
+    /* Optimization: We check for LF, and skip reading more when it is
+     * found. Curl main engine adds noticable delays between
+     * invokactions of "doing" function, so it is desirable to get
+     * things done in as little calls to "doing" function, as possible,
+     * but without busy looping on socket that is not yet ready.
+     *
+     * For many servers first read returns exacly header, because it is
+     * natural thing to do on server side, although we can't rely on it.
+     * But this is reason why it does not worth to optimize search by
+     * keeping track of old {amount} value and searching only in bytes
+     * just read.
+     */
+    gemini->block.lf = memchr(gemini->block.data, '\n', gemini->block.amount);
+    if(!gemini->block.lf && gemini->block.done)
+      return CURLE_WEIRD_SERVER_REPLY;
+
+    if(gemini->block.lf)
+      gemini->block.done = TRUE;
+
+    if(!gemini->block.done)
+      return CURLE_OK;
+  }
+
+  return gemini_doing_finish(conn, done);
+}
+
+static CURLcode gemini_doing_finish(struct connectdata *conn, bool *done)
+{
+  CURLcode err;
+  struct GEMINI *gemini;
+  char *block;
+  struct Curl_easy *data;
+  size_t amount;
+  size_t hsize;
+  char status;
+  char *lf;
+
+  data = conn->data;
+  gemini = data->req.p.gemini;
+  block = gemini->block.data;
+  amount = gemini->block.amount;
+  lf = gemini->block.lf;
+
+  if(!amount)
+    return CURLE_GOT_NOTHING;
+
+  /* Two digit status, space, empty meta string and \r\n at least. */
+  if(amount < 5)
+    return CURLE_WEIRD_SERVER_REPLY;
+  if(block[2] != ' ' || !isdigit(block[0]) || !isdigit(block[1]))
+    return CURLE_WEIRD_SERVER_REPLY;
+
+  /* We already checked that first byte is digit, so {lf} can't point to
+   * first byte of buffer and {cr} can't underrun buffer.
+   */
+  if(*(lf - 1) != '\r') {
+    return CURLE_WEIRD_SERVER_REPLY;
+  }
+
+  hsize = lf - block + 1;
+  err = Curl_client_write(conn, CLIENTWRITE_HEADER, block, hsize);
+  if(err)
+    return err;
+
+  status = block[0];
+  if(status != '2') { /* TODO: handle redirects */
+    *done = TRUE;
+    return CURLE_OK;
+  }
+
+  err = Curl_client_write(conn, CLIENTWRITE_BODY, block + hsize,
+                          amount - hsize);
+  if(err)
+    return err;
+
+  *done = TRUE;
+  Curl_setup_transfer(data, FIRSTSOCKET, -1, FALSE, -1);
+  return CURLE_OK;
+}
+
+static int gemini_doing_getsock(struct connectdata *conn, curl_socket_t *socks)
+{
+  socks[0] = conn->sock[FIRSTSOCKET];
+  return GETSOCK_READSOCK(0) | GETSOCK_WRITESOCK(0);
+}
+
+const struct Curl_handler Curl_handler_gemini = {
+  "GEMINI",                             /* scheme */
+  gemini_setup_connection,              /* setup_connection */
+  gemini_do_it,                         /* do_it */
+  ZERO_NULL,                            /* done */
+  ZERO_NULL,                            /* do_more */
+  gemini_connecting,                    /* connect_it */
+  gemini_connecting,                    /* connecting */
+  gemini_doing,                         /* doing */
+  Curl_ssl_getsock,                     /* proto_getsock */
+  gemini_doing_getsock,                 /* doing_getsock */
+  ZERO_NULL,                            /* domore_getsock */
+  ZERO_NULL,                            /* perform_getsock */
+  ZERO_NULL,                            /* disconnect */
+  ZERO_NULL,                            /* readwrite */
+  ZERO_NULL,                            /* connection_check */
+  PORT_GEMINI,                          /* defport */
+  CURLPROTO_GEMINI,                     /* protocol */
+  CURLPROTO_GEMINI,                     /* family */
+  PROTOPT_SSL                           /* flags */
+};
+
+#endif /*CURL_DISABLE_GEMINI*/
diff --git a/lib/gemini.h b/lib/gemini.h
new file mode 100644
index 000000000..b9f27ee70
--- /dev/null
+++ b/lib/gemini.h
_at__at_ -0,0 +1,55 _at__at_
+#ifndef HEADER_CURL_GEMINI_H
+#define HEADER_CURL_GEMINI_H
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel_at_haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#ifndef CURL_DISABLE_GEMINI
+extern const struct Curl_handler Curl_handler_gemini;
+#endif
+
+/*
+ * According to specification, response has following format:
+ *
+ *     <STATUS><SPACE><META><CR><LF>
+ *
+ * and <META> is UTF-8 string up to 1024 bytes long, so buffer of
+ * size >= (2 + 1 + 1024 + 1 + 1) = 1029 is enough to read whole
+ * response header into memory. It is more efficient than reading
+ * byte-after-byte until \n is found.
+ */
+#define GEMINI_RESPONSE_BUFSIZE 1029
+
+struct GEMINI {
+  struct {
+    char data[GEMINI_RESPONSE_BUFSIZE];
+    size_t amount; /* Count of bytes read */
+    bool done;
+    char *lf; /* Pointer to linefeed character in {data} */
+  } block;
+  struct {
+    char *data; /* Allocated string */
+    size_t amount_total; /* How many bytes in {data} */
+    size_t amount_sent; /* How many bytes of it we already sent */
+  } request;
+};
+
+#endif /* HEADER_CURL_GEMINI_H */
diff --git a/lib/url.c b/lib/url.c
index f8b2a0030..7d4028161 100644
--- a/lib/url.c
+++ b/lib/url.c
_at__at_ -115,6 +115,7 _at__at_ bool curl_win32_idn_to_ascii(const char *in, char **out);
 #include "http_ntlm.h"
 #include "curl_rtmp.h"
 #include "gopher.h"
+#include "gemini.h"
 #include "mqtt.h"
 #include "http_proxy.h"
 #include "conncache.h"
_at__at_ -253,6 +254,10 _at__at_ static const struct Curl_handler * const protocols[] = {
   &Curl_handler_gopher,
 #endif
 
+#if !defined CURL_DISABLE_GEMINI && defined USE_SSL
+  &Curl_handler_gemini,
+#endif
+
 #ifdef USE_LIBRTMP
   &Curl_handler_rtmp,
   &Curl_handler_rtmpt,
diff --git a/lib/urldata.h b/lib/urldata.h
index f085c093c..76ad00856 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
_at__at_ -49,6 +49,7 _at__at_
 #define PORT_RTMPT PORT_HTTP
 #define PORT_RTMPS PORT_HTTPS
 #define PORT_GOPHER 70
+#define PORT_GEMINI 1965
 #define PORT_MQTT 1883
 
 #define DICT_MATCH "/MATCH:"
_at__at_ -659,6 +660,7 _at__at_ struct SingleRequest {
     struct SMTP *smtp;
     struct SSHPROTO *ssh;
     struct TELNET *telnet;
+    struct GEMINI *gemini;
   } p;
 #ifndef CURL_DISABLE_DOH
   struct dohdata doh; /* DoH specific data for this request */
-- 
If possible, please keep mailing list in CC. It is public.
-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette:   https://curl.se/mail/etiquette.html
Received on 2020-11-28