diff --git a/indra/llmessage/CMakeLists.txt b/indra/llmessage/CMakeLists.txt index 8bbb150c4..f3e68b53b 100644 --- a/indra/llmessage/CMakeLists.txt +++ b/indra/llmessage/CMakeLists.txt @@ -27,6 +27,7 @@ set(llmessage_SOURCE_FILES aicurlperhost.cpp aicurlthread.cpp aihttpheaders.cpp + aihttptimeout.cpp aihttptimeoutpolicy.cpp debug_libcurl.cpp llhttpclient.cpp @@ -114,6 +115,7 @@ set(llmessage_HEADER_FILES aicurlperhost.h aicurlthread.h aihttpheaders.h + aihttptimeout.h aihttptimeoutpolicy.h debug_libcurl.h llares.h diff --git a/indra/llmessage/aicurlprivate.h b/indra/llmessage/aicurlprivate.h index d9944ccb3..bb39bdddb 100644 --- a/indra/llmessage/aicurlprivate.h +++ b/indra/llmessage/aicurlprivate.h @@ -35,82 +35,15 @@ #include "llatomic.h" #include "llrefcount.h" #include "aicurlperhost.h" +#include "aihttptimeout.h" class AIHTTPHeaders; -class AIHTTPTimeoutPolicy; -class AICurlEasyRequest; class AICurlEasyRequestStateMachine; namespace AICurlPrivate { -class CurlEasyRequest; -class ThreadSafeBufferedCurlEasyRequest; - namespace curlthread { - class MultiHandle; - -// A class that keeps track of timeout administration per connection. -class HTTPTimeout : public LLRefCount { - private: - AIHTTPTimeoutPolicy const* mPolicy; // A pointer to the used timeout policy. - std::vector mBuckets; // An array with the number of bytes transfered in each second. - U16 mBucket; // The bucket corresponding to mLastSecond. - bool mNothingReceivedYet; // Set when created, reset when the HTML reply header from the server is received. - bool mLowSpeedOn; // Set while uploading or downloading data. - bool mUploadFinished; // Used to keep track of whether upload_finished was called yet. - S32 mLastSecond; // The time at which lowspeed() was last called, in seconds since mLowSpeedClock. - U32 mTotalBytes; // The sum of all bytes in mBuckets. - U64 mLowSpeedClock; // Clock count at which low speed detection (re)started. - U64 mStalled; // The clock count at which this transaction is considered to be stalling if nothing is transfered anymore. - public: - static F64 const sClockWidth; // Time between two clock ticks in seconds. - static U64 sClockCount; // Clock count used as 'now' during one loop of the main loop. -#if defined(CWDEBUG) || defined(DEBUG_CURLIO) - ThreadSafeBufferedCurlEasyRequest* mLockObj; -#endif - - public: - HTTPTimeout(AIHTTPTimeoutPolicy const* policy, ThreadSafeBufferedCurlEasyRequest* lock_obj) : - mPolicy(policy), mNothingReceivedYet(true), mLowSpeedOn(false), mUploadFinished(false), mStalled((U64)-1) -#if defined(CWDEBUG) || defined(DEBUG_CURLIO) - , mLockObj(lock_obj) -#endif - { } - - // Called after sending all headers, when body data is written the first time. - void connected(void); - - // Called when everything we had to send to the server has been sent. - void upload_finished(void); - - // Called when data is sent. Returns true if transfer timed out. - bool data_sent(size_t n); - - // Called when data is received. Returns true if transfer timed out. - bool data_received(size_t n/*,*/ ASSERT_ONLY_COMMA(bool upload_error_status = false)); - - // Called immediately before done() after curl finished, with code. - void done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code); - - // Accessor. - bool has_stalled(void) const { return mStalled < sClockCount; } - - // Called from BufferedCurlEasyRequest::processOutput if a timeout occurred. - void print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url); - -#if defined(CWDEBUG) || defined(DEBUG_CURLIO) - void* get_lockobj(void) const { return mLockObj; } -#endif - - private: - // (Re)start low speed transer rate detection. - void reset_lowspeed(void); - - // Common low speed detection, Called from data_sent or data_received. - bool lowspeed(size_t bytes); -}; - } // namespace curlthread void handle_multi_error(CURLMcode code); diff --git a/indra/llmessage/aicurlthread.cpp b/indra/llmessage/aicurlthread.cpp index ea83258a3..efb5ca46b 100644 --- a/indra/llmessage/aicurlthread.cpp +++ b/indra/llmessage/aicurlthread.cpp @@ -31,6 +31,7 @@ #include "linden_common.h" #include "aicurlthread.h" #include "aihttptimeoutpolicy.h" +#include "aihttptimeout.h" #include "aicurlperhost.h" #include "lltimer.h" // ms_sleep, get_clock_count #include "llhttpstatuscodes.h" @@ -1809,390 +1810,6 @@ void MultiHandle::finish_easy_request(AICurlEasyRequest const& easy_request, CUR curl_easy_request_w->done(curl_easy_request_w, result); } -//----------------------------------------------------------------------------- -// HTTPTimeout - -//static -F64 const HTTPTimeout::sClockWidth = 1.0 / calc_clock_frequency(); // Time between two clock ticks, in seconds. -U64 HTTPTimeout::sClockCount; // Clock count, set once per select() exit. - -// CURL-THREAD -// This is called when body data was sent to the server socket. -// <-----mLowSpeedOn------> -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ ^ ^ ^ ^ ^ -// | | | | | | -bool HTTPTimeout::data_sent(size_t n) -{ - // Generate events. - if (!mLowSpeedOn) - { - // If we can send data (for the first time) then that's our only way to know we connected. - reset_lowspeed(); - } - // Detect low speed. - return lowspeed(n); -} - -// CURL-THREAD -// This is called when the 'low speed' timer should be started. -// <-----mLowSpeedOn------> <-------mLowSpeedOn--------> -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ ^ -// | | -void HTTPTimeout::reset_lowspeed(void) -{ - mLowSpeedClock = sClockCount; - mLowSpeedOn = true; - mLastSecond = -1; // This causes lowspeed to initialize the rest. - mStalled = (U64)-1; // Stop reply delay timer. - DoutCurl("reset_lowspeed: mLowSpeedClock = " << mLowSpeedClock << "; mStalled = -1"); -} - -// CURL-THREAD -// This is called when everything we had to send to the server has been sent. -// <-----mLowSpeedOn------> -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ -// | -void HTTPTimeout::upload_finished(void) -{ - llassert(!mUploadFinished); // If we get here twice, then the 'upload finished' detection failed. - mUploadFinished = true; - // We finished uploading (if there was a body to upload at all), so not more transfer rate timeouts. - mLowSpeedOn = false; - // Timeout if the server doesn't reply quick enough. - mStalled = sClockCount + mPolicy->getReplyDelay() / sClockWidth; - DoutCurl("upload_finished: mStalled set to sClockCount (" << sClockCount << ") + " << (mStalled - sClockCount) << " (" << mPolicy->getReplyDelay() << " seconds)"); -} - -// CURL-THREAD -// This is called when data was received from the server. -// -// <--------------------------------mNothingReceivedYet------------------------------><-------mLowSpeedOn--------> -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ ^ ^ ^ ^ ^ ^ ^ -// | | | | | | | | -bool HTTPTimeout::data_received(size_t n/*,*/ -#ifdef CWDEBUG - ASSERT_ONLY_COMMA(bool upload_error_status) -#else - ASSERT_ONLY_COMMA(bool) -#endif - ) -{ - // The HTTP header of the reply is the first thing we receive. - if (mNothingReceivedYet && n > 0) - { - if (!mUploadFinished) - { - // mUploadFinished not being set this point should only happen for GET requests (in fact, then it is normal), - // because in that case it is impossible to detect the difference between connecting and waiting for a reply without - // using CURLOPT_DEBUGFUNCTION. Note that mDebugIsHeadOrGetMethod is only valid when the debug channel 'curlio' is on, - // because it is set in the debug callback function. - // This is also normal if we received a HTTP header with an error status, since that can interrupt our upload. - Debug(llassert(upload_error_status || AICurlEasyRequest_wat(*mLockObj)->mDebugIsHeadOrGetMethod || !dc::curlio.is_on())); - // 'Upload finished' detection failed, generate it now. - upload_finished(); - } - // Turn this flag off again now that we received data, so that if 'upload_finished()' is called again - // for a future upload on the same descriptor, then that won't trigger an assert. - // Note that because we also set mNothingReceivedYet here, we won't enter this code block anymore, - // so it's safe to do this. - mUploadFinished = false; - // Mark that something was received. - mNothingReceivedYet = false; - // We received something; switch to getLowSpeedLimit()/getLowSpeedTime(). - reset_lowspeed(); - } - return mLowSpeedOn ? lowspeed(n) : false; -} - -// CURL_THREAD -// bytes is the number of bytes we just sent or received (including headers). -// Returns true if the transfer should be aborted. -// -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ -// | | | | | | | | | | | | | | -bool HTTPTimeout::lowspeed(size_t bytes) -{ - //DoutCurlEntering("HTTPTimeout::lowspeed(" << bytes << ")"); commented out... too spammy for normal use. - - // The algorithm to determine if we timed out if different from how libcurls CURLOPT_LOW_SPEED_TIME works. - // - // libcurl determines the transfer rate since the last call to an equivalent 'lowspeed' function, and then - // triggers a timeout if CURLOPT_LOW_SPEED_TIME long such a transfer value is less than CURLOPT_LOW_SPEED_LIMIT. - // That doesn't work right because once there IS data it can happen that this function is called a few - // times (with less than a milisecond in between) causing seemingly VERY high "transfer rate" spikes. - // The only correct way to determine the transfer rate is to actually average over CURLOPT_LOW_SPEED_TIME - // seconds. - // - // We do this as follows: we create low_speed_time (in seconds) buckets and fill them with the number - // of bytes received during that second. We also keep track of the sum of all bytes received between 'now' - // and 'now - llmax(starttime, low_speed_time)'. Then if that period reaches at least low_speed_time - // seconds, and the transfer rate (sum / low_speed_time) is less than low_speed_limit, we abort. - - // When are we? - S32 second = (sClockCount - mLowSpeedClock) * sClockWidth; - llassert(sClockWidth > 0.0); - // This REALLY should never happen, but due to another bug it did happened - // and caused something so evil and hard to find that... NEVER AGAIN! - llassert(second >= 0); - - // If this is the same second as last time, just add the number of bytes to the current bucket. - if (second == mLastSecond) - { - mTotalBytes += bytes; - mBuckets[mBucket] += bytes; - return false; - } - - // We arrived at a new second. - // The below is at most executed once per second, even though for - // every currently connected transfer, CPU is not a big issue. - - // Determine the number of buckets needed and increase the number of buckets if needed. - U16 const low_speed_time = mPolicy->getLowSpeedTime(); - if (low_speed_time > mBuckets.size()) - { - mBuckets.resize(low_speed_time, 0); - } - - S32 s = mLastSecond; - mLastSecond = second; - - // If this is the first time this function is called, we need to do some initialization. - if (s == -1) - { - mBucket = 0; // It doesn't really matter where we start. - mTotalBytes = bytes; - mBuckets[mBucket] = bytes; - return false; - } - - // Update all administration. - U16 bucket = mBucket; - while(1) // Run over all the seconds that were skipped. - { - if (++bucket == low_speed_time) - bucket = 0; - if (++s == second) - break; - mTotalBytes -= mBuckets[bucket]; - mBuckets[bucket] = 0; - } - mBucket = bucket; - mTotalBytes -= mBuckets[mBucket]; - mTotalBytes += bytes; - mBuckets[mBucket] = bytes; - - // Check if we timed out. - U32 const low_speed_limit = mPolicy->getLowSpeedLimit(); - U32 mintotalbytes = low_speed_limit * low_speed_time; - DoutCurl("Transfered " << mTotalBytes << " bytes in " << llmin(second, (S32)low_speed_time) << " seconds after " << second << " second" << ((second == 1) ? "" : "s") << "."); - if (second >= low_speed_time) - { - DoutCurl("Average transfer rate is " << (mTotalBytes / low_speed_time) << " bytes/s (low speed limit is " << low_speed_limit << " bytes/s)"); - if (mTotalBytes < mintotalbytes) - { - // The average transfer rate over the passed low_speed_time seconds is too low. Abort the transfer. - llwarns << -#ifdef CWDEBUG - (void*)get_lockobj() << ": " -#endif - "aborting slow connection (average transfer rate below " << low_speed_limit << - " for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") << ")." << llendl; - return true; - } - } - - // Calculate how long the data transfer may stall until we should timeout. - llassert_always(mintotalbytes > 0); - S32 max_stall_time = 0; - U32 dropped_bytes = 0; - while(1) - { - if (++bucket == low_speed_time) // The next second the next bucket will be emptied. - bucket = 0; - ++max_stall_time; - dropped_bytes += mBuckets[bucket]; - // Note how, when max_stall_time == low_speed_time, dropped_bytes has - // to be equal to mTotalBytes, the sum of all vector elements. - llassert_always(max_stall_time < low_speed_time || dropped_bytes == mTotalBytes); - // And thus the following will certainly abort. - if (second + max_stall_time >= low_speed_time && mTotalBytes - dropped_bytes < mintotalbytes) - break; - } - // If this function isn't called again within max_stall_time seconds, we stalled. - mStalled = sClockCount + max_stall_time / sClockWidth; - DoutCurl("mStalled set to sClockCount (" << sClockCount << ") + " << (mStalled - sClockCount) << " (" << max_stall_time << " seconds)"); - - return false; -} - -// CURL-THREAD -// This is called immediately before done() after curl finished, with code. -// <-------mLowSpeedOn--------> -// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done -// ^ -// | -void HTTPTimeout::done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code) -{ - if (code == CURLE_OPERATION_TIMEDOUT || code == CURLE_COULDNT_RESOLVE_HOST) - { - bool dns_problem = false; - if (code == CURLE_COULDNT_RESOLVE_HOST) - { - // Note that CURLINFO_OS_ERRNO returns 0; we don't know any more than this. - llwarns << "Failed to resolve hostname " << curlEasyRequest_w->getLowercaseHostname() << llendl; - dns_problem = true; - } - else if (mNothingReceivedYet) - { - // Only consider this to possibly be related to a DNS lookup if we didn't - // resolved the host yet, which can be detected by asking for - // CURLINFO_NAMELOOKUP_TIME which is set when libcurl initiates the - // actual connect and thus knows the IP# (possibly from it's DNS cache). - double namelookup_time; - curlEasyRequest_w->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time); - dns_problem = (namelookup_time == 0); - } - if (dns_problem) - { - // Inform policy object that there might be problems with resolving this host. - // This will increase the connect timeout the next time we try to connect to this host. - AIHTTPTimeoutPolicy::connect_timed_out(curlEasyRequest_w->getLowercaseHostname()); - // AIFIXME: use return value to change priority - } - } - // Make sure no timeout will happen anymore. - mLowSpeedOn = false; - mStalled = (U64)-1; - DoutCurl("done: mStalled set to -1"); -} - -// Libcurl uses GetTickCount on windows, with a resolution of 10 to 16 ms. -// As a result, we can not assume that namelookup_time == 0 has a special meaning. -#define LOWRESTIMER LL_WINDOWS - -void HTTPTimeout::print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url) -{ - llwarns << "Request to \"" << curl_easy_request->getLowercaseHostname() << "\" timed out for " << curl_easy_request->getTimeoutPolicy()->name() << llendl; - llinfos << "Effective URL: \"" << eff_url << "\"." << llendl; - double namelookup_time, connect_time, appconnect_time, pretransfer_time, starttransfer_time; - curl_easy_request->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time); - curl_easy_request->getinfo(CURLINFO_CONNECT_TIME, &connect_time); - curl_easy_request->getinfo(CURLINFO_APPCONNECT_TIME, &appconnect_time); - curl_easy_request->getinfo(CURLINFO_PRETRANSFER_TIME, &pretransfer_time); - curl_easy_request->getinfo(CURLINFO_STARTTRANSFER_TIME, &starttransfer_time); - if (namelookup_time == 0 -#if LOWRESTIMER - && connect_time == 0 -#endif - ) - { -#if LOWRESTIMER - llinfos << "Hostname seems to have been still in the DNS cache." << llendl; -#else - llwarns << "Curl returned CURLE_OPERATION_TIMEDOUT and DNS lookup did not occur according to timings. Apparently the resolve attempt timed out (bad network?)" << llendl; - llassert(connect_time == 0); - llassert(appconnect_time == 0); - llassert(pretransfer_time == 0); - llassert(starttransfer_time == 0); - return; -#endif - } - // If namelookup_time is less than 500 microseconds, then it's very likely just a DNS cache lookup. - else if (namelookup_time < 500e-6) - { -#if LOWRESTIMER - llinfos << "Hostname was most likely still in DNS cache (or lookup occured in under ~10ms)." << llendl; -#else - llinfos << "Hostname was still in DNS cache." << llendl; -#endif - } - else - { - llinfos << "DNS lookup of " << curl_easy_request->getLowercaseHostname() << " took " << namelookup_time << " seconds." << llendl; - } - if (connect_time == 0 -#if LOWRESTIMER - && namelookup_time > 0 // connect_time, when set, is namelookup_time + something. -#endif - ) - { - llwarns << "Curl returned CURLE_OPERATION_TIMEDOUT and connection did not occur according to timings: apparently the connect attempt timed out (bad network?)" << llendl; - llassert(appconnect_time == 0); - llassert(pretransfer_time == 0); - llassert(starttransfer_time == 0); - return; - } - // If connect_time is almost equal to namelookup_time, then it was just set because it was already connected. - if (connect_time - namelookup_time <= 1e-5) - { -#if LOWRESTIMER // Assuming 10ms resolution. - llinfos << "The socket was most likely already connected (or you connected to a proxy with a connect time of under ~10 ms)." << llendl; -#else - llinfos << "The socket was already connected (to remote or proxy)." << llendl; -#endif - // I'm assuming that the SSL/TLS handshake can be measured with a low res timer. - if (appconnect_time == 0) - { - llwarns << "The SSL/TLS handshake never occurred according to the timings!" << llendl; - return; - } - // If appconnect_time is almost equal to connect_time, then it was just set because this is a connection re-use. - if (appconnect_time - connect_time <= 1e-5) - { - llinfos << "Connection with HTTP server was already established; this was a re-used connection." << llendl; - } - else - { - llinfos << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << llendl; - } - } - else - { - llinfos << "Socket connected to remote host (or proxy) in " << (connect_time - namelookup_time) << " seconds." << llendl; - if (appconnect_time == 0) - { - llwarns << "The SSL/TLS handshake never occurred according to the timings!" << llendl; - return; - } - llinfos << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << llendl; - } - if (pretransfer_time == 0) - { - llwarns << "The transfer never happened because there was too much in the pipeline (apparently)." << llendl; - return; - } - else if (pretransfer_time - appconnect_time >= 1e-5) - { - llinfos << "Apparently there was a delay, due to waits in line for the pipeline, of " << (pretransfer_time - appconnect_time) << " seconds before the transfer began." << llendl; - } - if (starttransfer_time == 0) - { - llwarns << "No data was ever received from the server according to the timings." << llendl; - } - else - { - llinfos << "The time it took to send the request to the server plus the time it took before the server started to reply was " << (starttransfer_time - pretransfer_time) << " seconds." << llendl; - } - if (mNothingReceivedYet) - { - llinfos << "No data at all was actually received from the server." << llendl; - } - if (mUploadFinished) - { - llinfos << "The request upload finished successfully." << llendl; - } - if (mLastSecond > 0 && mLowSpeedOn) - { - llinfos << "The " << (mNothingReceivedYet ? "upload" : "download") << " did last " << mLastSecond << " second" << ((mLastSecond == 1) ? "" : "s") << ", before it timed out." << llendl; - } -} - } // namespace curlthread } // namespace AICurlPrivate diff --git a/indra/llmessage/aihttptimeout.cpp b/indra/llmessage/aihttptimeout.cpp new file mode 100644 index 000000000..fa96165f1 --- /dev/null +++ b/indra/llmessage/aihttptimeout.cpp @@ -0,0 +1,427 @@ +/** + * @file aihttptimeout.cpp + * @brief Implementation of HTTPTimeout + * + * Copyright (c) 2012, Aleric Inglewood. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution. + * + * CHANGELOG + * and additional copyright holders. + * + * 28/04/2012 + * Initial version, written by Aleric Inglewood @ SL + */ + +#include "aihttptimeout.h" +#include "aihttptimeoutpolicy.h" +#include "lltimer.h" // calc_clock_frequency() +#include "aicurl.h" // DoutCurl + +#undef AICurlPrivate + +namespace AICurlPrivate { +namespace curlthread { + +//----------------------------------------------------------------------------- +// HTTPTimeout + +//static +F64 const HTTPTimeout::sClockWidth = 1.0 / calc_clock_frequency(); // Time between two clock ticks, in seconds. +U64 HTTPTimeout::sClockCount; // Clock count, set once per select() exit. + +// CURL-THREAD +// This is called when body data was sent to the server socket. +// <-----mLowSpeedOn------> +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ ^ ^ ^ ^ ^ +// | | | | | | +bool HTTPTimeout::data_sent(size_t n) +{ + // Generate events. + if (!mLowSpeedOn) + { + // If we can send data (for the first time) then that's our only way to know we connected. + reset_lowspeed(); + } + // Detect low speed. + return lowspeed(n); +} + +// CURL-THREAD +// This is called when the 'low speed' timer should be started. +// <-----mLowSpeedOn------> <-------mLowSpeedOn--------> +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ ^ +// | | +void HTTPTimeout::reset_lowspeed(void) +{ + mLowSpeedClock = sClockCount; + mLowSpeedOn = true; + mLastSecond = -1; // This causes lowspeed to initialize the rest. + mStalled = (U64)-1; // Stop reply delay timer. + DoutCurl("reset_lowspeed: mLowSpeedClock = " << mLowSpeedClock << "; mStalled = -1"); +} + +// CURL-THREAD +// This is called when everything we had to send to the server has been sent. +// <-----mLowSpeedOn------> +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ +// | +void HTTPTimeout::upload_finished(void) +{ + llassert(!mUploadFinished); // If we get here twice, then the 'upload finished' detection failed. + mUploadFinished = true; + // We finished uploading (if there was a body to upload at all), so not more transfer rate timeouts. + mLowSpeedOn = false; + // Timeout if the server doesn't reply quick enough. + mStalled = sClockCount + mPolicy->getReplyDelay() / sClockWidth; + DoutCurl("upload_finished: mStalled set to sClockCount (" << sClockCount << ") + " << (mStalled - sClockCount) << " (" << mPolicy->getReplyDelay() << " seconds)"); +} + +// CURL-THREAD +// This is called when data was received from the server. +// +// <--------------------------------mNothingReceivedYet------------------------------><-------mLowSpeedOn--------> +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ ^ ^ ^ ^ ^ ^ ^ +// | | | | | | | | +bool HTTPTimeout::data_received(size_t n/*,*/ +#ifdef CWDEBUG + ASSERT_ONLY_COMMA(bool upload_error_status) +#else + ASSERT_ONLY_COMMA(bool) +#endif + ) +{ + // The HTTP header of the reply is the first thing we receive. + if (mNothingReceivedYet && n > 0) + { + if (!mUploadFinished) + { + // mUploadFinished not being set this point should only happen for GET requests (in fact, then it is normal), + // because in that case it is impossible to detect the difference between connecting and waiting for a reply without + // using CURLOPT_DEBUGFUNCTION. Note that mDebugIsHeadOrGetMethod is only valid when the debug channel 'curlio' is on, + // because it is set in the debug callback function. + // This is also normal if we received a HTTP header with an error status, since that can interrupt our upload. + Debug(llassert(upload_error_status || AICurlEasyRequest_wat(*mLockObj)->mDebugIsHeadOrGetMethod || !dc::curlio.is_on())); + // 'Upload finished' detection failed, generate it now. + upload_finished(); + } + // Turn this flag off again now that we received data, so that if 'upload_finished()' is called again + // for a future upload on the same descriptor, then that won't trigger an assert. + // Note that because we also set mNothingReceivedYet here, we won't enter this code block anymore, + // so it's safe to do this. + mUploadFinished = false; + // Mark that something was received. + mNothingReceivedYet = false; + // We received something; switch to getLowSpeedLimit()/getLowSpeedTime(). + reset_lowspeed(); + } + return mLowSpeedOn ? lowspeed(n) : false; +} + +// CURL_THREAD +// bytes is the number of bytes we just sent or received (including headers). +// Returns true if the transfer should be aborted. +// +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ +// | | | | | | | | | | | | | | +bool HTTPTimeout::lowspeed(size_t bytes) +{ + //DoutCurlEntering("HTTPTimeout::lowspeed(" << bytes << ")"); commented out... too spammy for normal use. + + // The algorithm to determine if we timed out if different from how libcurls CURLOPT_LOW_SPEED_TIME works. + // + // libcurl determines the transfer rate since the last call to an equivalent 'lowspeed' function, and then + // triggers a timeout if CURLOPT_LOW_SPEED_TIME long such a transfer value is less than CURLOPT_LOW_SPEED_LIMIT. + // That doesn't work right because once there IS data it can happen that this function is called a few + // times (with less than a milisecond in between) causing seemingly VERY high "transfer rate" spikes. + // The only correct way to determine the transfer rate is to actually average over CURLOPT_LOW_SPEED_TIME + // seconds. + // + // We do this as follows: we create low_speed_time (in seconds) buckets and fill them with the number + // of bytes received during that second. We also keep track of the sum of all bytes received between 'now' + // and 'now - llmax(starttime, low_speed_time)'. Then if that period reaches at least low_speed_time + // seconds, and the transfer rate (sum / low_speed_time) is less than low_speed_limit, we abort. + + // When are we? + S32 second = (sClockCount - mLowSpeedClock) * sClockWidth; + llassert(sClockWidth > 0.0); + // This REALLY should never happen, but due to another bug it did happened + // and caused something so evil and hard to find that... NEVER AGAIN! + llassert(second >= 0); + + // If this is the same second as last time, just add the number of bytes to the current bucket. + if (second == mLastSecond) + { + mTotalBytes += bytes; + mBuckets[mBucket] += bytes; + return false; + } + + // We arrived at a new second. + // The below is at most executed once per second, even though for + // every currently connected transfer, CPU is not a big issue. + + // Determine the number of buckets needed and increase the number of buckets if needed. + U16 const low_speed_time = mPolicy->getLowSpeedTime(); + if (low_speed_time > mBuckets.size()) + { + mBuckets.resize(low_speed_time, 0); + } + + S32 s = mLastSecond; + mLastSecond = second; + + // If this is the first time this function is called, we need to do some initialization. + if (s == -1) + { + mBucket = 0; // It doesn't really matter where we start. + mTotalBytes = bytes; + mBuckets[mBucket] = bytes; + return false; + } + + // Update all administration. + U16 bucket = mBucket; + while(1) // Run over all the seconds that were skipped. + { + if (++bucket == low_speed_time) + bucket = 0; + if (++s == second) + break; + mTotalBytes -= mBuckets[bucket]; + mBuckets[bucket] = 0; + } + mBucket = bucket; + mTotalBytes -= mBuckets[mBucket]; + mTotalBytes += bytes; + mBuckets[mBucket] = bytes; + + // Check if we timed out. + U32 const low_speed_limit = mPolicy->getLowSpeedLimit(); + U32 mintotalbytes = low_speed_limit * low_speed_time; + DoutCurl("Transfered " << mTotalBytes << " bytes in " << llmin(second, (S32)low_speed_time) << " seconds after " << second << " second" << ((second == 1) ? "" : "s") << "."); + if (second >= low_speed_time) + { + DoutCurl("Average transfer rate is " << (mTotalBytes / low_speed_time) << " bytes/s (low speed limit is " << low_speed_limit << " bytes/s)"); + if (mTotalBytes < mintotalbytes) + { + // The average transfer rate over the passed low_speed_time seconds is too low. Abort the transfer. + llwarns << +#ifdef CWDEBUG + (void*)get_lockobj() << ": " +#endif + "aborting slow connection (average transfer rate below " << low_speed_limit << + " for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") << ")." << llendl; + return true; + } + } + + // Calculate how long the data transfer may stall until we should timeout. + llassert_always(mintotalbytes > 0); + S32 max_stall_time = 0; + U32 dropped_bytes = 0; + while(1) + { + if (++bucket == low_speed_time) // The next second the next bucket will be emptied. + bucket = 0; + ++max_stall_time; + dropped_bytes += mBuckets[bucket]; + // Note how, when max_stall_time == low_speed_time, dropped_bytes has + // to be equal to mTotalBytes, the sum of all vector elements. + llassert_always(max_stall_time < low_speed_time || dropped_bytes == mTotalBytes); + // And thus the following will certainly abort. + if (second + max_stall_time >= low_speed_time && mTotalBytes - dropped_bytes < mintotalbytes) + break; + } + // If this function isn't called again within max_stall_time seconds, we stalled. + mStalled = sClockCount + max_stall_time / sClockWidth; + DoutCurl("mStalled set to sClockCount (" << sClockCount << ") + " << (mStalled - sClockCount) << " (" << max_stall_time << " seconds)"); + + return false; +} + +// CURL-THREAD +// This is called immediately before done() after curl finished, with code. +// <-------mLowSpeedOn--------> +// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done +// ^ +// | +void HTTPTimeout::done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code) +{ + if (code == CURLE_OPERATION_TIMEDOUT || code == CURLE_COULDNT_RESOLVE_HOST) + { + bool dns_problem = false; + if (code == CURLE_COULDNT_RESOLVE_HOST) + { + // Note that CURLINFO_OS_ERRNO returns 0; we don't know any more than this. + llwarns << "Failed to resolve hostname " << curlEasyRequest_w->getLowercaseHostname() << llendl; + dns_problem = true; + } + else if (mNothingReceivedYet) + { + // Only consider this to possibly be related to a DNS lookup if we didn't + // resolved the host yet, which can be detected by asking for + // CURLINFO_NAMELOOKUP_TIME which is set when libcurl initiates the + // actual connect and thus knows the IP# (possibly from it's DNS cache). + double namelookup_time; + curlEasyRequest_w->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time); + dns_problem = (namelookup_time == 0); + } + if (dns_problem) + { + // Inform policy object that there might be problems with resolving this host. + // This will increase the connect timeout the next time we try to connect to this host. + AIHTTPTimeoutPolicy::connect_timed_out(curlEasyRequest_w->getLowercaseHostname()); + // AIFIXME: use return value to change priority + } + } + // Make sure no timeout will happen anymore. + mLowSpeedOn = false; + mStalled = (U64)-1; + DoutCurl("done: mStalled set to -1"); +} + +// Libcurl uses GetTickCount on windows, with a resolution of 10 to 16 ms. +// As a result, we can not assume that namelookup_time == 0 has a special meaning. +#define LOWRESTIMER LL_WINDOWS + +void HTTPTimeout::print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url) +{ + llwarns << "Request to \"" << curl_easy_request->getLowercaseHostname() << "\" timed out for " << curl_easy_request->getTimeoutPolicy()->name() << llendl; + llinfos << "Effective URL: \"" << eff_url << "\"." << llendl; + double namelookup_time, connect_time, appconnect_time, pretransfer_time, starttransfer_time; + curl_easy_request->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time); + curl_easy_request->getinfo(CURLINFO_CONNECT_TIME, &connect_time); + curl_easy_request->getinfo(CURLINFO_APPCONNECT_TIME, &appconnect_time); + curl_easy_request->getinfo(CURLINFO_PRETRANSFER_TIME, &pretransfer_time); + curl_easy_request->getinfo(CURLINFO_STARTTRANSFER_TIME, &starttransfer_time); + if (namelookup_time == 0 +#if LOWRESTIMER + && connect_time == 0 +#endif + ) + { +#if LOWRESTIMER + llinfos << "Hostname seems to have been still in the DNS cache." << llendl; +#else + llwarns << "Curl returned CURLE_OPERATION_TIMEDOUT and DNS lookup did not occur according to timings. Apparently the resolve attempt timed out (bad network?)" << llendl; + llassert(connect_time == 0); + llassert(appconnect_time == 0); + llassert(pretransfer_time == 0); + llassert(starttransfer_time == 0); + return; +#endif + } + // If namelookup_time is less than 500 microseconds, then it's very likely just a DNS cache lookup. + else if (namelookup_time < 500e-6) + { +#if LOWRESTIMER + llinfos << "Hostname was most likely still in DNS cache (or lookup occured in under ~10ms)." << llendl; +#else + llinfos << "Hostname was still in DNS cache." << llendl; +#endif + } + else + { + llinfos << "DNS lookup of " << curl_easy_request->getLowercaseHostname() << " took " << namelookup_time << " seconds." << llendl; + } + if (connect_time == 0 +#if LOWRESTIMER + && namelookup_time > 0 // connect_time, when set, is namelookup_time + something. +#endif + ) + { + llwarns << "Curl returned CURLE_OPERATION_TIMEDOUT and connection did not occur according to timings: apparently the connect attempt timed out (bad network?)" << llendl; + llassert(appconnect_time == 0); + llassert(pretransfer_time == 0); + llassert(starttransfer_time == 0); + return; + } + // If connect_time is almost equal to namelookup_time, then it was just set because it was already connected. + if (connect_time - namelookup_time <= 1e-5) + { +#if LOWRESTIMER // Assuming 10ms resolution. + llinfos << "The socket was most likely already connected (or you connected to a proxy with a connect time of under ~10 ms)." << llendl; +#else + llinfos << "The socket was already connected (to remote or proxy)." << llendl; +#endif + // I'm assuming that the SSL/TLS handshake can be measured with a low res timer. + if (appconnect_time == 0) + { + llwarns << "The SSL/TLS handshake never occurred according to the timings!" << llendl; + return; + } + // If appconnect_time is almost equal to connect_time, then it was just set because this is a connection re-use. + if (appconnect_time - connect_time <= 1e-5) + { + llinfos << "Connection with HTTP server was already established; this was a re-used connection." << llendl; + } + else + { + llinfos << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << llendl; + } + } + else + { + llinfos << "Socket connected to remote host (or proxy) in " << (connect_time - namelookup_time) << " seconds." << llendl; + if (appconnect_time == 0) + { + llwarns << "The SSL/TLS handshake never occurred according to the timings!" << llendl; + return; + } + llinfos << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << llendl; + } + if (pretransfer_time == 0) + { + llwarns << "The transfer never happened because there was too much in the pipeline (apparently)." << llendl; + return; + } + else if (pretransfer_time - appconnect_time >= 1e-5) + { + llinfos << "Apparently there was a delay, due to waits in line for the pipeline, of " << (pretransfer_time - appconnect_time) << " seconds before the transfer began." << llendl; + } + if (starttransfer_time == 0) + { + llwarns << "No data was ever received from the server according to the timings." << llendl; + } + else + { + llinfos << "The time it took to send the request to the server plus the time it took before the server started to reply was " << (starttransfer_time - pretransfer_time) << " seconds." << llendl; + } + if (mNothingReceivedYet) + { + llinfos << "No data at all was actually received from the server." << llendl; + } + if (mUploadFinished) + { + llinfos << "The request upload finished successfully." << llendl; + } + if (mLastSecond > 0 && mLowSpeedOn) + { + llinfos << "The " << (mNothingReceivedYet ? "upload" : "download") << " did last " << mLastSecond << " second" << ((mLastSecond == 1) ? "" : "s") << ", before it timed out." << llendl; + } +} + +} // namespace curlthread +} // namespace AICurlPrivate + diff --git a/indra/llmessage/aihttptimeout.h b/indra/llmessage/aihttptimeout.h new file mode 100644 index 000000000..1ae79ec84 --- /dev/null +++ b/indra/llmessage/aihttptimeout.h @@ -0,0 +1,127 @@ +/** + * @file aihttptimeout.h + * @brief HTTP timeout control class. + * + * Copyright (c) 2012, Aleric Inglewood. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution. + * + * CHANGELOG + * and additional copyright holders. + * + * 28/04/2012 + * Initial version, written by Aleric Inglewood @ SL + */ + +#ifndef AIHTTPTIMEOUT_H +#define AIHTTPTIMEOUT_H + +#include "linden_common.h" +#include "llrefcount.h" +#include "aithreadsafe.h" // AIAccess + +#include +#include // Needed for files that include this header (also for aicurlprivate.h). +#ifdef DEBUG_CURLIO +#include "debug_libcurl.h" +#endif + +//#undef AICurlPrivate + +class AIHTTPTimeoutPolicy; + +// Forward declaration (see aicurlprivate.h). +namespace AICurlPrivate { + class BufferedCurlEasyRequest; +} // namespace AICurlPrivate + +typedef AIAccess AICurlEasyRequest_wat; + +namespace AICurlPrivate { + +class CurlEasyRequest; +class ThreadSafeBufferedCurlEasyRequest; + +namespace curlthread { + +// A class that keeps track of timeout administration per connection. +class HTTPTimeout : public LLRefCount { + private: + AIHTTPTimeoutPolicy const* mPolicy; // A pointer to the used timeout policy. + std::vector mBuckets; // An array with the number of bytes transfered in each second. + U16 mBucket; // The bucket corresponding to mLastSecond. + bool mNothingReceivedYet; // Set when created, reset when the HTML reply header from the server is received. + bool mLowSpeedOn; // Set while uploading or downloading data. + bool mUploadFinished; // Used to keep track of whether upload_finished was called yet. + S32 mLastSecond; // The time at which lowspeed() was last called, in seconds since mLowSpeedClock. + U32 mTotalBytes; // The sum of all bytes in mBuckets. + U64 mLowSpeedClock; // Clock count at which low speed detection (re)started. + U64 mStalled; // The clock count at which this transaction is considered to be stalling if nothing is transfered anymore. + public: + static F64 const sClockWidth; // Time between two clock ticks in seconds. + static U64 sClockCount; // Clock count used as 'now' during one loop of the main loop. +#if defined(CWDEBUG) || defined(DEBUG_CURLIO) + ThreadSafeBufferedCurlEasyRequest* mLockObj; +#endif + + public: + HTTPTimeout(AIHTTPTimeoutPolicy const* policy, ThreadSafeBufferedCurlEasyRequest* lock_obj) : + mPolicy(policy), mNothingReceivedYet(true), mLowSpeedOn(false), mUploadFinished(false), mStalled((U64)-1) +#if defined(CWDEBUG) || defined(DEBUG_CURLIO) + , mLockObj(lock_obj) +#endif + { } + + // Called after sending all headers, when body data is written the first time. + void connected(void); + + // Called when everything we had to send to the server has been sent. + void upload_finished(void); + + // Called when data is sent. Returns true if transfer timed out. + bool data_sent(size_t n); + + // Called when data is received. Returns true if transfer timed out. + bool data_received(size_t n/*,*/ ASSERT_ONLY_COMMA(bool upload_error_status = false)); + + // Called immediately before done() after curl finished, with code. + void done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code); + + // Accessor. + bool has_stalled(void) const { return mStalled < sClockCount; } + + // Called from BufferedCurlEasyRequest::processOutput if a timeout occurred. + void print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url); + +#if defined(CWDEBUG) || defined(DEBUG_CURLIO) + void* get_lockobj(void) const { return mLockObj; } +#endif + + private: + // (Re)start low speed transer rate detection. + void reset_lowspeed(void); + + // Common low speed detection, Called from data_sent or data_received. + bool lowspeed(size_t bytes); +}; + +} // namespace curlthread +} // namespace AICurlPrivate + +#endif +