From faee255730cfd4b5441fcf47e08072459f1d3ef6 Mon Sep 17 00:00:00 2001 From: Aleric Inglewood Date: Tue, 9 Oct 2012 21:01:55 +0200 Subject: [PATCH] HTTP timeout bug fix! Plus work in progress. Use the better understandable alias CURLOPT_HTTPHEADER in debug output Bug fix - finally! This fixes a bug I've been looking for a week. By accidently calling get_clock_count() for the mTimeoutLowSpeedClock 'event', the time difference 'now' - 'event' becomes negative, which should be impossible; the result being that timeout_low_speed stalled for 24 seconds while looping over the full 32 bit. That in turn made SSL handshakes of libcurl fail, which seemed to be impossible since the calls to libcurl had not changed! --- indra/aistatemachine/aicurl.cpp | 80 ++++++++++++++++++++++---- indra/aistatemachine/aicurlprivate.h | 18 +++++- indra/aistatemachine/aicurlthread.cpp | 51 ++++++++++++++-- indra/aistatemachine/debug_libcurl.cpp | 2 +- 4 files changed, 133 insertions(+), 18 deletions(-) diff --git a/indra/aistatemachine/aicurl.cpp b/indra/aistatemachine/aicurl.cpp index cd3716c8a..5f53a8685 100644 --- a/indra/aistatemachine/aicurl.cpp +++ b/indra/aistatemachine/aicurl.cpp @@ -1061,6 +1061,8 @@ static int curl_debug_cb(CURL*, curl_infotype infotype, char* buf, size_t size, break; case CURLINFO_HEADER_OUT: LibcwDoutStream << "H< "; + if (size >= 4 && strncmp(buf, "GET ", 4) == 0) + request->mDebugIsGetMethod = true; break; case CURLINFO_DATA_IN: LibcwDoutStream << "D> "; @@ -1171,6 +1173,7 @@ void CurlEasyRequest::applyProxySettings(void) //static CURLcode CurlEasyRequest::curlCtxCallback(CURL* curl, void* sslctx, void* parm) { + DoutEntering(dc::curl, "CurlEasyRequest::curlCtxCallback((CURL*)" << (void*)curl << ", " << sslctx << ", " << parm << ")"); SSL_CTX* ctx = (SSL_CTX*)sslctx; // Turn off TLS v1.1 (which is not supported anyway by Linden Lab) because otherwise we fail to connect. // Also turn off SSL v2, which is highly broken and strongly discouraged[1]. @@ -1320,6 +1323,21 @@ void CurlEasyRequest::finalizeRequest(std::string const& url, AIHTTPTimeoutPolic F64 const CurlEasyRequest::sTimeoutClockWidth = 1.0 / calc_clock_frequency(); // Time between two clock ticks, in seconds. U64 CurlEasyRequest::sTimeoutClockCount; // Clock count, set once per select() exit. +void CurlEasyRequest::timeout_timings(void) +{ + double t; + getinfo(CURLINFO_NAMELOOKUP_TIME, &t); + DoutCurlEasy("CURLINFO_NAMELOOKUP_TIME = " << t); + getinfo(CURLINFO_CONNECT_TIME, &t); + DoutCurlEasy("CURLINFO_CONNECT_TIME = " << t); + getinfo(CURLINFO_APPCONNECT_TIME, &t); + DoutCurlEasy("CURLINFO_APPCONNECT_TIME = " << t); + getinfo(CURLINFO_PRETRANSFER_TIME, &t); + DoutCurlEasy("CURLINFO_PRETRANSFER_TIME = " << t); + getinfo(CURLINFO_STARTTRANSFER_TIME, &t); + DoutCurlEasy("CURLINFO_STARTTRANSFER_TIME = " << t); +} + // CURL-THREAD // This is called when the easy handle is actually being added to the multi handle (thus after being queued). // AIFIXME: Doing this only when it is actually being added assures that the first curl easy handle that is @@ -1336,9 +1354,11 @@ void CurlEasyRequest::timeout_add_easy_request(void) setopt(CURLOPT_CONNECTTIMEOUT, mTimeoutPolicy->getConnectTimeout(mTimeoutLowercaseHostname)); setopt(CURLOPT_TIMEOUT, mTimeoutPolicy->getCurlTransaction()); // This boolean is valid (only) if we get a time out event from libcurl. + mTimeoutLowSpeedOn = false; mTimeoutNothingReceivedYet = true; mTimeoutStalled = (U64)-1; DoutCurlEasy("timeout_add_easy_request: mTimeoutStalled set to -1"); + mTimeoutUploadFinished = false; } // CURL-THREAD @@ -1367,11 +1387,11 @@ bool CurlEasyRequest::timeout_data_sent(size_t n) // | | void CurlEasyRequest::timeout_reset_lowspeed(void) { - mTimeoutLowSpeedClock = get_clock_count(); + mTimeoutLowSpeedClock = sTimeoutClockCount; mTimeoutLowSpeedOn = true; mTimeoutLastSecond = -1; // This causes timeout_lowspeed to initialize the rest. mTimeoutStalled = (U64)-1; // Stop reply delay timer. - DoutCurlEasy("timeout_reset_lowspeed: mTimeoutStalled set to -1"); + DoutCurlEasy("timeout_reset_lowspeed: mTimeoutLowSpeedClock = " << mTimeoutLowSpeedClock << "; mTimeoutStalled = -1"); } // CURL-THREAD @@ -1382,6 +1402,10 @@ void CurlEasyRequest::timeout_reset_lowspeed(void) // | void CurlEasyRequest::timeout_upload_finished(void) { + if (1 || mTimeoutUploadFinished) + timeout_timings(); + llassert(!mTimeoutUploadFinished); // If we get here twice, then the 'upload finished' detection failed. + mTimeoutUploadFinished = true; // We finished uploading (if there was a body to upload at all), so not more transfer rate timeouts. mTimeoutLowSpeedOn = false; // Timeout if the server doesn't reply quick enough. @@ -1402,6 +1426,16 @@ bool CurlEasyRequest::timeout_data_received(size_t n) if (mTimeoutNothingReceivedYet && n > 0) { mTimeoutNothingReceivedYet = false; + if (!mTimeoutUploadFinished) + { + // mTimeoutUploadFinished not being set this point should only happen for GET requests (in fact, then it is normal), + // because in that case it is impossible to detect the difference between connecting and waiting for a reply without + // using CURLOPT_DEBUGFUNCTION. Note that mDebugIsGetMethod is only valid when the debug channel 'curlio' is on, + // because it is set in the debug callback function. + Debug(llassert(mDebugIsGetMethod || !dc::curlio.is_on())); + // 'Upload finished' detection failed, generate it now. + timeout_upload_finished(); + } // We received something; switch to getLowSpeedLimit()/getLowSpeedTime(). timeout_reset_lowspeed(); } @@ -1435,6 +1469,10 @@ bool CurlEasyRequest::timeout_lowspeed(size_t bytes) // When are we? S32 second = (sTimeoutClockCount - mTimeoutLowSpeedClock) * sTimeoutClockWidth; + llassert(sTimeoutClockWidth > 0.0); + // This REALLY should never happen, but due to another bug it did happened + // and caused something so evil and hard to find that... NEVER AGAIN! + llassert(second >= 0); // If this is the same second as last time, just add the number of bytes to the current bucket. if (second == mTimeoutLastSecond) @@ -1520,7 +1558,7 @@ bool CurlEasyRequest::timeout_lowspeed(size_t bytes) if (second + max_stall_time >= low_speed_time && mTimeoutTotalBytes - dropped_bytes < mintotalbytes) break; } - // If this function isn't called until mTimeoutStalled, we stalled. + // If this function isn't called again within max_stall_time seconds, we stalled. mTimeoutStalled = sTimeoutClockCount + max_stall_time / sTimeoutClockWidth; DoutCurlEasy("mTimeoutStalled set to sTimeoutClockCount (" << sTimeoutClockCount << ") + " << (mTimeoutStalled - sTimeoutClockCount) << " (" << max_stall_time << " seconds)"); @@ -1535,14 +1573,27 @@ bool CurlEasyRequest::timeout_lowspeed(size_t bytes) // | void CurlEasyRequest::timeout_done(CURLcode code) { - // AIFIXME: checking mTimeoutConnected doesn't work: CURL_POLL_OUT is set when connect() is called and always - // reset before we get here causing timeout_upload_finished() to be called: mTimeoutConnected will always be true. - // Also mTimeoutNothingReceivedYet is hardly accurate, as what we really want to know is if the DNS failed. - if (code == CURLE_OPERATION_TIMEDOUT && mTimeoutNothingReceivedYet) + timeout_timings(); + llassert(mTimeoutUploadFinished || mTimeoutNothingReceivedYet); // If this is false then the 'upload finished' detection failed. + if (code == CURLE_OPERATION_TIMEDOUT) { - // Inform policy object that there might be problems with this host. - AIHTTPTimeoutPolicy::connect_timed_out(mTimeoutLowercaseHostname); - // AIFIXME: use return value to change priority + if (mTimeoutNothingReceivedYet) + { + // Only consider this to possibly be related to a DNS lookup if we didn't connect + // to the remote host yet. Note that CURLINFO_CONNECT_TIME gives the time needed + // to connect to the proxy, or first host-- and fails to take redirects into account. + // Also note that CURLINFO_APPCONNECT_TIME gives the time of the FIRST actual + // connect, so for any transfers on the same pipeline that come after this we + // also correctly determine that there was not a problem with a DNS lookup. + double appconnect_time; + getinfo(CURLINFO_APPCONNECT_TIME, &appconnect_time); + if (appconnect_time == 0) + { + // Inform policy object that there might be problems with resolving this host. + AIHTTPTimeoutPolicy::connect_timed_out(mTimeoutLowercaseHostname); + // AIFIXME: use return value to change priority + } + } } // Make sure no timeout will happen anymore. mTimeoutLowSpeedOn = false; @@ -1550,6 +1601,11 @@ void CurlEasyRequest::timeout_done(CURLcode code) DoutCurlEasy("timeout_done: mTimeoutStalled set to -1"); } +void CurlEasyRequest::timeout_print_diagnostics(AIHTTPTimeoutPolicy const& policy) +{ + llwarns << "Request to " << mTimeoutLowercaseHostname << " timed out for " << policy.name() << llendl; +} + // End of HTTP Timeout stuff. //............................................................................. @@ -1880,6 +1936,10 @@ void CurlResponderBuffer::processOutput(AICurlEasyRequest_wat& curl_easy_request if (mResponder) { + if (code == CURLE_OPERATION_TIMEDOUT) + { + curl_easy_request_w->timeout_print_diagnostics(mResponder->getHTTPTimeoutPolicy()); + } if (mEventsTarget) { // Only the responder registers for these events. diff --git a/indra/aistatemachine/aicurlprivate.h b/indra/aistatemachine/aicurlprivate.h index e22976ca6..5b9d6457f 100644 --- a/indra/aistatemachine/aicurlprivate.h +++ b/indra/aistatemachine/aicurlprivate.h @@ -253,7 +253,7 @@ namespace AICurlPrivate { // Called from applyDefaultOptions. void applyProxySettings(void); - // Used in applyProxySettings. + // Used in applyDefaultOptions. static CURLcode curlCtxCallback(CURL* curl, void* sslctx, void* parm); public: @@ -288,6 +288,9 @@ namespace AICurlPrivate { // Accessor. bool timeout_has_stalled(void) const { return mTimeoutStalled < sTimeoutClockCount; } + // Called from CurlResponderBuffer::processOutput if a timeout occurred. + void timeout_print_diagnostics(AIHTTPTimeoutPolicy const& policy); + private: // (Re)start low speed transer rate detection. void timeout_reset_lowspeed(void); @@ -300,7 +303,7 @@ namespace AICurlPrivate { public: // Called by MultiHandle::check_run_count() to store result code that is returned by getResult. - void store_result(CURLcode result) { mResult = result; } + void storeResult(CURLcode result) { mResult = result; } // Called by MultiHandle::check_run_count() when the curl easy handle is done. void done(AICurlEasyRequest_wat& curl_easy_request_w) { finished(curl_easy_request_w); } @@ -324,6 +327,7 @@ namespace AICurlPrivate { U16 mTimeoutBucket; // The bucket corresponding to mTimeoutLastSecond. bool mTimeoutNothingReceivedYet; // Set when connected, reset when the HTML reply header from the server is received. bool mTimeoutLowSpeedOn; // Set while uploading or downloading data. + bool mTimeoutUploadFinished; // Only used for debugging. S32 mTimeoutLastSecond; // The time at which timeout_lowspeed was last called, in seconds since mTimeoutLowSpeedClock. U32 mTimeoutTotalBytes; // The sum of all bytes in mTimeoutBuckets. U64 mTimeoutLowSpeedClock; // Clock count at which low speed transfer rate started. @@ -331,13 +335,20 @@ namespace AICurlPrivate { public: static F64 const sTimeoutClockWidth; // Time between two clock ticks in seconds. static U64 sTimeoutClockCount; // Clock count used during one loop of the main loop. +#if defined(CWDEBUG) || defined(DEBUG_CURLIO) + bool mDebugIsGetMethod; +#endif private: // This class may only be created by constructing a ThreadSafeCurlEasyRequest. friend class ThreadSafeCurlEasyRequest; // Throws AICurlNoEasyHandle. CurlEasyRequest(void) : - mHeaders(NULL), mRequestFinalized(false), mEventsTarget(NULL), mResult(CURLE_FAILED_INIT), mTimeoutPolicy(NULL) + mHeaders(NULL), mRequestFinalized(false), mEventsTarget(NULL), mResult(CURLE_FAILED_INIT), +#if defined(CWDEBUG) || defined(DEBUG_CURLIO) + mDebugIsGetMethod(false), +#endif + mTimeoutPolicy(NULL) { applyDefaultOptions(); } public: ~CurlEasyRequest(); @@ -348,6 +359,7 @@ namespace AICurlPrivate { // For debugging purposes bool is_finalized(void) const { return mRequestFinalized; } + void timeout_timings(void); // Return pointer to the ThreadSafe (wrapped) version of this object. ThreadSafeCurlEasyRequest* get_lockobj(void); diff --git a/indra/aistatemachine/aicurlthread.cpp b/indra/aistatemachine/aicurlthread.cpp index bea567c9e..ee345ac47 100644 --- a/indra/aistatemachine/aicurlthread.cpp +++ b/indra/aistatemachine/aicurlthread.cpp @@ -697,7 +697,7 @@ bool MergeIterator::next(curl_socket_t& fd_out, int& ev_bitmask_out) #if defined(CWDEBUG) || defined(DEBUG_CURLIO) #undef AI_CASE_RETURN -#define AI_CASE_RETURN(x) do { case x: return #x; } while(0) +#define AI_CASE_RETURN(x) case x: return #x; static char const* action_str(int action) { switch(action) @@ -710,6 +710,32 @@ static char const* action_str(int action) } return ""; } + +struct DebugFdSet { + int nfds; + fd_set* fdset; + DebugFdSet(int n, fd_set* p) : nfds(n), fdset(p) { } +}; + +std::ostream& operator<<(std::ostream& os, DebugFdSet const& s) +{ + if (!s.fdset) + return os << "NULL"; + bool first = true; + os << '{'; + for (int fd = 0; fd < s.nfds; ++fd) + { + if (FD_ISSET(fd, s.fdset)) + { + if (!first) + os << ", "; + os << fd; + first = false; + } + } + os << '}'; + return os; +} #endif // A class with info for each socket that is in use by curl. @@ -763,9 +789,17 @@ void CurlSocketInfo::set_action(int action) else { mMultiHandle.mWritePollSet->remove(mSocketFd); - // If CURL_POLL_OUT is removed, we have nothing more to send apparently. + + // The following is a bit of a hack, needed because of the lack of proper timeout callbacks in libcurl. + // The removal of CURL_POLL_OUT could be part of the SSL handshake, therefore check if we're already connected: AICurlEasyRequest_wat curl_easy_request_w(*mEasyRequest); - curl_easy_request_w->timeout_upload_finished(); // Update timeout administration. + double pretransfer_time; + curl_easy_request_w->getinfo(CURLINFO_PRETRANSFER_TIME, &pretransfer_time); + if (pretransfer_time > 0) + { + // If CURL_POLL_OUT is removed and CURLINFO_PRETRANSFER_TIME is already set, then we have nothing more to send apparently. + curl_easy_request_w->timeout_upload_finished(); // Update timeout administration. + } } } } @@ -1284,6 +1318,9 @@ void AICurlThread::run(void) timeout.tv_sec = timeout_ms / 1000; timeout.tv_usec = (timeout_ms % 1000) * 1000; #ifdef CWDEBUG +#ifdef DEBUG_CURLIO + Dout(dc::curl|flush_cf|continued_cf, "select(" << nfds << ", " << DebugFdSet(nfds, read_fd_set) << ", " << DebugFdSet(nfds, write_fd_set) << ", NULL, timeout = " << timeout_ms << " ms) = "); +#else static int last_nfds = -1; static long last_timeout_ms = -1; static int same_count = 0; @@ -1299,10 +1336,14 @@ void AICurlThread::run(void) { ++same_count; } +#endif #endif ready = select(nfds, read_fd_set, write_fd_set, NULL, &timeout); mWakeUpMutex.unlock(); #ifdef CWDEBUG +#ifdef DEBUG_CURLIO + Dout(dc::finish|cond_error_cf(ready == -1), ready); +#else static int last_ready = -2; static int last_errno = 0; if (!same) @@ -1319,6 +1360,7 @@ void AICurlThread::run(void) last_ready = ready; if (ready == -1) last_errno = errno; +#endif #endif // Select returns the total number of bits set in each of the fd_set's (upon return), // or -1 when an error occurred. A value of 0 means that a timeout occurred. @@ -1329,6 +1371,7 @@ void AICurlThread::run(void) } // Clock count used for timeouts. CurlEasyRequest::sTimeoutClockCount = get_clock_count(); + Dout(dc::curl, "CurlEasyRequest::sTimeoutClockCount = " << CurlEasyRequest::sTimeoutClockCount); if (ready == 0) { multi_handle_w->socket_action(CURL_SOCKET_TIMEOUT, 0); @@ -1607,7 +1650,7 @@ void MultiHandle::finish_easy_request(AICurlEasyRequest const& easy_request, CUR { AICurlEasyRequest_wat curl_easy_request_w(*easy_request); // Store the result in the easy handle. - curl_easy_request_w->store_result(result); + curl_easy_request_w->storeResult(result); #ifdef CWDEBUG char* eff_url; curl_easy_request_w->getinfo(CURLINFO_EFFECTIVE_URL, &eff_url); diff --git a/indra/aistatemachine/debug_libcurl.cpp b/indra/aistatemachine/debug_libcurl.cpp index 85833d4b1..512bc696c 100644 --- a/indra/aistatemachine/debug_libcurl.cpp +++ b/indra/aistatemachine/debug_libcurl.cpp @@ -249,7 +249,7 @@ std::ostream& operator<<(std::ostream& os, CURLoption option) CASEPRINT(CURLOPT_LOW_SPEED_TIME); CASEPRINT(CURLOPT_RESUME_FROM); CASEPRINT(CURLOPT_COOKIE); - CASEPRINT(CURLOPT_RTSPHEADER); + CASEPRINT(CURLOPT_HTTPHEADER); CASEPRINT(CURLOPT_HTTPPOST); CASEPRINT(CURLOPT_SSLCERT); CASEPRINT(CURLOPT_KEYPASSWD);