diff -u --recursive htdig-3.1.4.org/CONFIG htdig-3.1.4/CONFIG --- htdig-3.1.4.org/CONFIG Fri Dec 10 01:29:30 1999 +++ htdig-3.1.4/CONFIG Wed Jan 19 14:39:56 2000 @@ -8,7 +8,7 @@ # These variables are set by configure # # This specifies the root of the directory tree to be used by ht://Dig -prefix= /opt/www/htdig +prefix= /opt/htdig # This specifies the root of the directory tree to be used for programs # installed by ht://Dig diff -u --recursive htdig-3.1.4.org/Makefile.config.in htdig-3.1.4/Makefile.config.in --- htdig-3.1.4.org/Makefile.config.in Fri Dec 10 01:28:21 1999 +++ htdig-3.1.4/Makefile.config.in Wed Jan 19 14:39:35 2000 @@ -24,13 +24,13 @@ SENDMAIL= @SENDMAIL@ DEFINES= -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" -LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib +LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib -L/opt/ssl/lib INCS= -I$(top_srcdir)/htlib -I$(top_srcdir)/htcommon \ - -I../db/dist -I../include + -I../db/dist -I../include -I/opt/ssl/include HTLIBS= ../htcommon/libcommon.a \ ../htlib/libht.a \ ../db/dist/libdb.a -LIBS= $(HTLIBS) @LIBS@ +LIBS= $(HTLIBS) @LIBS@ -lssl -lcrypto DIST= @PACKAGE@-@VERSION@ DISTDIR= $(top_srcdir)/../$(DIST) diff -u --recursive htdig-3.1.4.org/htcommon/DocumentDB.cc htdig-3.1.4/htcommon/DocumentDB.cc --- htdig-3.1.4.org/htcommon/DocumentDB.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htcommon/DocumentDB.cc Thu Jan 20 10:16:44 2000 @@ -217,7 +217,7 @@ while ((key = dbf->Get_Next())) { dbf->Get(key, data); - if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0) + if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 || strncmp(HtURLCodec::instance()->decode(key), "https:", 6) == 0) { ref = new DocumentRef; ref->Deserialize(data); @@ -284,7 +284,7 @@ while ((coded_key = dbf->Get_Next())) { String key = HtURLCodec::instance()->decode(coded_key); - if (mystrncasecmp(key, "http:", 5) == 0) + if (mystrncasecmp(key, "http:", 5) == 0 || mystrncasecmp(key, "https:", 6) == 0) { DocumentRef *ref = (*this)[key]; if (ref) diff -u --recursive htdig-3.1.4.org/htcommon/defaults.cc htdig-3.1.4/htcommon/defaults.cc --- htdig-3.1.4.org/htcommon/defaults.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htcommon/defaults.cc Thu Jan 20 10:16:37 2000 @@ -37,7 +37,7 @@ {"bad_querystr", ""}, {"bad_word_list", "${common_dir}/bad_words"}, {"case_sensitive", "true"}, - {"common_url_parts", "http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"}, + {"common_url_parts", "https:// https://www. http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"}, {"create_image_list", "false"}, {"create_url_list", "false"}, {"compression_level", "0"}, diff -u --recursive htdig-3.1.4.org/htdig/Document.cc htdig-3.1.4/htdig/Document.cc --- htdig-3.1.4.org/htdig/Document.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htdig/Document.cc Fri Jan 21 10:33:46 2000 @@ -220,6 +220,7 @@ tm.tm_year += 1900; tm.tm_yday = 0; // clear these to prevent problems in strftime() tm.tm_wday = 0; + tm.tm_isdst = -1; if (debug > 2) { @@ -328,7 +329,7 @@ return Document_no_host; } } - + c.assign_ssl(strcmp(url->service(), "https") == 0); if (c.connect(1) == NOTOK) { if (debug) diff -u --recursive htdig-3.1.4.org/htdig/Images.cc htdig-3.1.4/htdig/Images.cc --- htdig-3.1.4.org/htdig/Images.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htdig/Images.cc Thu Jan 20 10:15:16 2000 @@ -61,7 +61,7 @@ { String u = url; URL Url(url); - if (strcmp(Url.service(), "http") != 0) + if (strcmp(Url.service(), "http") != 0 && strcmp(Url.service(), "https") != 0) return 0; u.lowercase(); @@ -81,6 +81,7 @@ return 0; if (c.assign_server(Url.host()) == NOTOK) return 0; + c.assign_ssl(strcmp(Url.service(), "https") == 0); if (c.connect(1) == NOTOK) { diff -u --recursive htdig-3.1.4.org/htdig/Retriever.cc htdig-3.1.4/htdig/Retriever.cc --- htdig-3.1.4.org/htdig/Retriever.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htdig/Retriever.cc Thu Jan 20 16:09:36 2000 @@ -117,8 +117,7 @@ // from == 2 add url from db.log // from == 3 urls in db.docs and there was a db.log // -void -Retriever::Initial(char *list, int from) +void Retriever::Initial(char *list, int from) { // // Split the list of urls up into individual urls. @@ -137,10 +136,10 @@ cout << "\t" << from << ":" << (int) log << ":" << url; if (!server) { - String robotsURL = "http://"; - robotsURL << u.host() << "/robots.txt"; + String robotsURL = u.service(); + robotsURL << "://" << u.host() << "/robots.txt"; String *localRobotsFile = GetLocal(robotsURL.get()); - server = new Server(u.host(), u.port(), localRobotsFile); + server = new Server(u.host(), u.port(), strcmp(u.service(), "https") == 0, localRobotsFile); servers.Add(u.signature(), server); delete localRobotsFile; } @@ -668,10 +667,10 @@ // Currently, we only deal with HTTP URLs. Gopher and ftp will // come later... ***FIX*** // - if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0) + if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "https://", 8) != 0)) { if (debug > 2) - cout << endl <<" Rejected: Not an http or relative link!"; + cout << endl <<" Rejected: Not an http, https or relative link!"; return FALSE; } @@ -1172,10 +1171,10 @@ // // Hadn't seen this server, yet. Register it // - String robotsURL = "http://"; - robotsURL << url.host() << "/robots.txt"; + String robotsURL = url.service(); + robotsURL << "://" << url.host() << "/robots.txt"; String *localRobotsFile = GetLocal(robotsURL.get()); - server = new Server(url.host(), url.port(), localRobotsFile); + server = new Server(url.host(), url.port(), strcmp(url.service(), "https") == 0, localRobotsFile); servers.Add(url.signature(), server); delete localRobotsFile; } @@ -1305,10 +1304,10 @@ // // Hadn't seen this server, yet. Register it // - String robotsURL = "http://"; - robotsURL << url.host() << "/robots.txt"; + String robotsURL = url.service(); + robotsURL << "://" << url.host() << "/robots.txt"; String *localRobotsFile = GetLocal(robotsURL.get()); - server = new Server(url.host(), url.port(), localRobotsFile); + server = new Server(url.host(), url.port(), strcmp(url.service(), "https") == 0, localRobotsFile); servers.Add(url.signature(), server); delete localRobotsFile; } diff -u --recursive htdig-3.1.4.org/htdig/Server.cc htdig-3.1.4/htdig/Server.cc --- htdig-3.1.4.org/htdig/Server.cc Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htdig/Server.cc Thu Jan 20 10:14:55 2000 @@ -20,9 +20,9 @@ //***************************************************************************** -// Server::Server(char *host, int port, String *local_robots_file) +// Server::Server(char *host, int port, int ssl, String *local_robots_file) // -Server::Server(char *host, int port, String *local_robots_file) +Server::Server(char *host, int port, int ssl, String *local_robots_file) { if (debug > 0) cout << endl << "New server: " << host << ", " << port << endl; @@ -40,7 +40,8 @@ // // Attempt to get a robots.txt file from the specified server // - String url = "http://"; + String url; + url = ssl ? "https://" : "http://"; url << host << ':' << port << "/robots.txt"; Document doc(url, 0); diff -u --recursive htdig-3.1.4.org/htdig/Server.h htdig-3.1.4/htdig/Server.h --- htdig-3.1.4.org/htdig/Server.h Fri Dec 10 01:28:44 1999 +++ htdig-3.1.4/htdig/Server.h Thu Jan 20 10:14:56 2000 @@ -25,7 +25,7 @@ // // Construction/Destruction // - Server(char *host, int port, String *local_robots_file = NULL); + Server(char *host, int port, int ssl, String *local_robots_file = NULL); ~Server(); // diff -u --recursive htdig-3.1.4.org/htlib/Connection.cc htdig-3.1.4/htlib/Connection.cc --- htdig-3.1.4.org/htlib/Connection.cc Fri Dec 10 01:28:46 1999 +++ htdig-3.1.4/htlib/Connection.cc Thu Jan 20 18:23:34 2000 @@ -39,6 +39,10 @@ int rresvport(int *); } +SSL_CTX *Connection::ctx = NULL; +SSL_METHOD *Connection::meth = NULL; + + List all_connections; Connection::Connection() @@ -49,8 +53,26 @@ server_name = 0; all_connections.Add(this); timeout_value = 0; + ssl = NULL; + m_ssl_on = 0; + initSSL(); } +void Connection::initSSL() +{ + if (ctx == NULL) + { + SSLeay_add_ssl_algorithms(); + meth = SSLv2_client_method(); + SSL_load_error_strings(); + ctx = SSL_CTX_new(meth); + if (ctx == NULL) + { + printf("ctx NULL\n"); + exit(1); + } + } +} //************************************************************************* // Connection::Connection(int socket) @@ -72,6 +94,9 @@ server_name = 0; all_connections.Add(this); timeout_value = 0; + ssl = NULL; + m_ssl_on = 0; + initSSL(); } @@ -94,15 +119,15 @@ { if (priv) { - int aport = IPPORT_RESERVED - 1; + int aport = IPPORT_RESERVED - 1; - sock = rresvport(&aport); + sock = rresvport(&aport); } else - sock = socket(AF_INET, SOCK_STREAM, 0); + sock = socket(AF_INET, SOCK_STREAM, 0); if (sock == NOTOK) - return NOTOK; + return NOTOK; int on = 1; setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)); @@ -149,6 +174,8 @@ if (sock >= 0) { int ret = ::close(sock); + SSL_free(ssl); + ssl = NULL; sock = -1; return ret; } @@ -224,6 +251,16 @@ //***************************************************************************** +// int Connection::assign_ssl(int ssl_on) +// +int Connection::assign_ssl(int ssl_on) +{ + m_ssl_on = ssl_on; + return OK; +} + + +//***************************************************************************** // int Connection::connect(int allow_EINTR) // int Connection::connect(int allow_EINTR) @@ -244,8 +281,24 @@ if (status == 0 || errno == EALREADY || errno == EISCONN) { - connected = 1; - return OK; + if (m_ssl_on) + { + ssl = SSL_new(ctx); + if (ssl != NULL) + { + SSL_set_fd(ssl, sock); + if (SSL_connect(ssl) != -1) + { + connected = 1; + return OK; + } + } + } + else + { + connected = 1; + return OK; + } } #if 0 if (status == ECONNREFUSED) @@ -373,26 +426,32 @@ need_io_stop = 0; do { - errno = 0; + errno = 0; + + if (timeout_value > 0) + { + fd_set fds; + FD_ZERO(&fds); + FD_SET(sock, &fds); + + timeval tv; + tv.tv_sec = timeout_value; + tv.tv_usec = 0; + + int selected = ::select(sock+1, &fds, 0, 0, &tv); + if (selected <= 0) + need_io_stop++; + } - if (timeout_value > 0) { - fd_set fds; - FD_ZERO(&fds); - FD_SET(sock, &fds); - - timeval tv; - tv.tv_sec = timeout_value; - tv.tv_usec = 0; - - int selected = ::select(sock+1, &fds, 0, 0, &tv); - if (selected <= 0) - need_io_stop++; - } - - if (!need_io_stop) - count = ::read(sock, buffer, maxlength); - else - count = -1; // Input timed out + if (!need_io_stop) + { + if (ssl != NULL) + count = SSL_read(ssl, buffer, maxlength); + else + count = ::read(sock, buffer, maxlength); + } + else + count = -1; // Input timed out } while (count < 0 && errno == EINTR && !need_io_stop); need_io_stop = 0; @@ -410,7 +469,10 @@ do { - count = ::write(sock, buffer, maxlength); + if (ssl != NULL) + count = SSL_write(ssl, buffer, maxlength); + else + count = ::write(sock, buffer, maxlength); } while (count < 0 && errno == EINTR && !need_io_stop); need_io_stop = 0; diff -u --recursive htdig-3.1.4.org/htlib/Connection.h htdig-3.1.4/htlib/Connection.h --- htdig-3.1.4.org/htlib/Connection.h Fri Dec 10 01:28:46 1999 +++ htdig-3.1.4/htlib/Connection.h Thu Jan 20 10:16:09 2000 @@ -36,6 +36,14 @@ #include #include +#include +#include +#include +#include +#include +#include + + class String; class Connection : public io @@ -45,6 +53,7 @@ Connection(); Connection(int socket); ~Connection(); + void initSSL(); // (De)initialization int open(int priv = 0); @@ -64,6 +73,9 @@ int assign_server(unsigned int addr = INADDR_ANY); char *get_server() {return server_name;} + // SLL stuff + int assign_ssl(int ssl_on); + // Connection establishment int connect(int allow_EINTR = 0); Connection *accept(int priv = 0); @@ -90,6 +102,10 @@ private: int sock; + int m_ssl_on; + SSL *ssl; + static SSL_CTX *ctx; + static SSL_METHOD *meth; struct sockaddr_in server; int connected; char *peer; diff -u --recursive htdig-3.1.4.org/htlib/URL.cc htdig-3.1.4/htlib/URL.cc --- htdig-3.1.4.org/htlib/URL.cc Fri Dec 10 01:28:47 1999 +++ htdig-3.1.4/htlib/URL.cc Fri Jan 21 13:55:00 2000 @@ -130,8 +130,9 @@ while (isalpha(*p)) p++; int hasService = (*p == ':'); - if (hasService && ((strncmp(ref, "http://", 7) == 0) || - (strncmp(ref, "http:", 5) != 0))) + if (hasService && ( + ((strncmp(ref, "http://", 7) == 0) || (strncmp(ref, "http:", 5) != 0)) || + ((strncmp(ref, "https://", 8) == 0) || (strncmp(ref, "https:", 6) != 0)))) { // // No need to look at the parent url since this is a complete url... @@ -216,7 +217,7 @@ _url << ":"; if (_host.length()) _url << "//" << _host; - if (_port != 80 && strcmp(_service, "http") == 0) + if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") == 0)) _url << ':' << _port; _url << _path; } @@ -464,7 +465,7 @@ if (_service.length() == 0 || _normal) return; - if (strcmp(_service, "http") != 0) + if (strcmp(_service, "http") != 0 && strcmp(_service, "https") != 0) return; removeIndex(_path); @@ -521,7 +522,7 @@ _url << ":"; if (_host.length()) _url << "//" << _host; - if (_port != 80 && strcmp(_service, "http") == 0) + if (_port != 80 && (strcmp(_service, "http") == 0 || strcmp(_service, "https") == 0)) _url << ':' << _port; _url << _path; _normal = 1; diff -u --recursive htdig-3.1.4.org/htlib/URL.h htdig-3.1.4/htlib/URL.h --- htdig-3.1.4.org/htlib/URL.h Fri Dec 10 01:28:47 1999 +++ htdig-3.1.4/htlib/URL.h Thu Jan 20 15:58:47 2000 @@ -40,7 +40,7 @@ void host(char *h) {_host = h;} int port() {return _port;} void port(int p) {_port = p;} - char *service() {return _service;} + char *service() {return _service.get();} void service(char *s) {_service = s;} char *path() {return _path;} void path(char *p);