Files
SingularityViewer/indra/llmessage/aihttptimeout.cpp

575 lines
23 KiB
C++

/**
* @file aihttptimeout.cpp
* @brief Implementation of HTTPTimeout
*
* Copyright (c) 2012, Aleric Inglewood.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* There are special exceptions to the terms and conditions of the GPL as
* it is applied to this Source Code. View the full text of the exception
* in the file doc/FLOSS-exception.txt in this software distribution.
*
* CHANGELOG
* and additional copyright holders.
*
* 28/04/2012
* Initial version, written by Aleric Inglewood @ SL
*/
#ifndef HTTPTIMEOUT_TESTSUITE
#include "linden_common.h"
#include "aihttptimeoutpolicy.h"
#include "lltimer.h" // calc_clock_frequency()
#include "aicurl.h" // DoutCurl
#undef AICurlPrivate
#else
#include "../llcommon/stdtypes.h"
#include <iostream>
#include <cassert>
#define llassert assert
#define llassert_always assert
#define DoutCurl(...) do { std::cout << __VA_ARGS__ ; } while(0)
#define Debug(...) do { } while(0)
#define llmin std::min
#define llwarns std::cout
#define llendl std::endl
int const CURLE_OPERATION_TIMEDOUT = 1;
int const CURLE_COULDNT_RESOLVE_HOST = 2;
int const CURLINFO_NAMELOOKUP_TIME = 3;
F64 calc_clock_frequency(void) { return 1000000.0; }
template<typename T>
struct AIAccess {
T* mP;
AIAccess(T* p) : mP(p) { }
T* operator->() const { return mP; }
};
struct AIHTTPTimeoutPolicy {
U16 getReplyDelay(void) const { return 60; }
U16 getLowSpeedTime(void) const { return 30; }
U32 getLowSpeedLimit(void) const { return 7000; }
static bool connect_timed_out(std::string const&) { return false; }
};
namespace AICurlPrivate {
class BufferedCurlEasyRequest {
public:
char const* getLowercaseHostname(void) const { return "hostname.com"; }
char const* getLowercaseServicename(void) const { return "hostname.com:12047"; }
void getinfo(const int&, double* p) { *p = 0.1; }
};
}
#endif // HTTPTIMEOUT_TESTSUITE
#include "aihttptimeout.h"
// If this is set, treat dc::curlio as off in the assertion below.
#ifdef CWDEBUG
bool gCurlIo;
#endif
namespace AICurlPrivate {
namespace curlthread {
//-----------------------------------------------------------------------------
// HTTPTimeout
//static
F64 const HTTPTimeout::sClockWidth_10ms = 100.0 / calc_clock_frequency(); // Time between two clock ticks, in 10ms units.
F64 const HTTPTimeout::sClockWidth_40ms = HTTPTimeout::sClockWidth_10ms * 0.25; // Time between two clock ticks, in 40ms units.
U64 HTTPTimeout::sTime_10ms; // Time in 10ms units, set once per select() exit.
// CURL-THREAD
// This is called when body data was sent to the server socket.
// <-----mLowSpeedOn------>
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^ ^ ^ ^ ^ ^
// | | | | | |
bool HTTPTimeout::data_sent(size_t n, bool finished)
{
// Generate events.
if (!mLowSpeedOn)
{
// If we can send data (for the first time) then that's our only way to know we connected.
reset_lowspeed();
}
// Detect low speed.
return lowspeed(n, finished);
}
// CURL-THREAD
// This is called when the 'low speed' timer should be started.
// <-----mLowSpeedOn------> <-------mLowSpeedOn-------->
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^ ^
// | |
void HTTPTimeout::reset_lowspeed(void)
{
mLowSpeedClock = sTime_10ms;
mLowSpeedOn = true;
mLastBytesSent = false; // We're just starting!
mLastSecond = -1; // This causes lowspeed to initialize the rest.
mStalled = (U64)-1; // Stop reply delay timer.
DoutCurl("reset_lowspeed: mLowSpeedClock = " << mLowSpeedClock << "; mStalled = -1");
}
void HTTPTimeout::being_redirected(void)
{
mBeingRedirected = true;
}
void HTTPTimeout::upload_starting(void)
{
// We're not supposed start with an upload when it already finished, unless we're being redirected.
llassert(!mUploadFinished || mBeingRedirected);
mUploadFinished = false;
// Apparently there is something to upload. Start detecting low speed timeouts.
reset_lowspeed();
}
// CURL-THREAD
// This is called when everything we had to send to the server has been sent.
// <-----mLowSpeedOn------>
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^
// |
void HTTPTimeout::upload_finished(void)
{
// This function can be called more than once. Ignore the second call.
if (mUploadFinished)
{
return;
}
mUploadFinished = true;
// Only accept a call to upload_starting() if being_redirected() is called after this point.
mBeingRedirected = false;
// We finished uploading (if there was a body to upload at all), so no more transfer rate timeouts.
mLowSpeedOn = false;
// Timeout if the server doesn't reply quick enough.
mStalled = sTime_10ms + 100 * mPolicy->getReplyDelay();
DoutCurl("upload_finished: mStalled set to Time_10ms (" << sTime_10ms << ") + " << (mStalled - sTime_10ms) << " (" << mPolicy->getReplyDelay() << " seconds)");
}
// CURL-THREAD
// This is called when data was received from the server.
//
// <--------------------------------mNothingReceivedYet------------------------------><-------mLowSpeedOn-------->
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^ ^ ^ ^ ^ ^ ^ ^
// | | | | | | | |
bool HTTPTimeout::data_received(size_t n/*,*/
#ifdef CWDEBUG
ASSERT_ONLY_COMMA(bool upload_error_status)
#else
ASSERT_ONLY_COMMA(bool)
#endif
)
{
// The HTTP header of the reply is the first thing we receive.
if (mNothingReceivedYet && n > 0)
{
if (!mUploadFinished)
{
// mUploadFinished not being set this point should only happen for GET requests (in fact, then it is normal),
// because in that case it is impossible to detect the difference between connecting and waiting for a reply without
// using CURLOPT_DEBUGFUNCTION. Note that mDebugIsHeadOrGetMethod is only valid when the debug channel 'curlio' is on,
// because it is set in the debug callback function.
// This is also normal if we received a HTTP header with an error status, since that can interrupt our upload.
Debug(llassert(upload_error_status || AICurlEasyRequest_wat(*mLockObj)->mDebugIsHeadOrGetMethod || !dc::curlio.is_on() || gCurlIo));
// 'Upload finished' detection failed, generate it now.
upload_finished();
}
// Mark that something was received.
mNothingReceivedYet = false;
// We received something; switch to getLowSpeedLimit()/getLowSpeedTime().
reset_lowspeed();
}
return mLowSpeedOn ? lowspeed(n) : false;
}
// CURL_THREAD
// bytes is the number of bytes we just sent or received (including headers).
// Returns true if the transfer should be aborted.
//
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
// | | | | | | | | | | | | | |
bool HTTPTimeout::lowspeed(size_t bytes, bool finished)
{
//DoutCurlEntering("HTTPTimeout::lowspeed(" << bytes << ", " << finished << ")"); commented out... too spammy for normal use.
// The algorithm to determine if we timed out if different from how libcurls CURLOPT_LOW_SPEED_TIME works.
//
// libcurl determines the transfer rate since the last call to an equivalent 'lowspeed' function, and then
// triggers a timeout if CURLOPT_LOW_SPEED_TIME long such a transfer value is less than CURLOPT_LOW_SPEED_LIMIT.
// That doesn't work right because once there IS data it can happen that this function is called a few
// times (with less than a milisecond in between) causing seemingly VERY high "transfer rate" spikes.
// The only correct way to determine the transfer rate is to actually average over CURLOPT_LOW_SPEED_TIME
// seconds.
//
// We do this as follows: we create low_speed_time (in seconds) buckets and fill them with the number
// of bytes received during that second. We also keep track of the sum of all bytes received between 'now'
// and 'llmax(starttime, now - low_speed_time)'. Then if that period reaches at least low_speed_time
// seconds (when now >= starttime + low_speed_time) and the transfer rate (sum / low_speed_time) is
// less than low_speed_limit, we abort.
// When are we?
S32 second = (sTime_10ms - mLowSpeedClock) / 100;
// This REALLY should never happen, but due to another bug it did happened
// and caused something so evil and hard to find that... NEVER AGAIN!
llassert(second >= 0);
// finished should be false until the very last call to this function.
mLastBytesSent = finished;
// If this is the same second as last time, just add the number of bytes to the current bucket.
if (second == mLastSecond)
{
mTotalBytes += bytes;
mBuckets[mBucket] += bytes;
return false;
}
// We arrived at a new second.
// The below is at most executed once per second, even though for
// every currently connected transfer, CPU is not a big issue.
// Determine the number of buckets needed and increase the number of buckets if needed.
U16 const low_speed_time = mPolicy->getLowSpeedTime();
if (low_speed_time > mBuckets.size())
{
mBuckets.resize(low_speed_time, 0);
}
S32 s = mLastSecond;
mLastSecond = second;
// If this is the first time this function is called, we need to do some initialization.
if (s == -1)
{
mBucket = 0; // It doesn't really matter where we start.
mOverwriteSecond = second + low_speed_time;
mTotalBytes = bytes;
mBuckets[mBucket] = bytes;
return false;
}
// Update all administration.
U16 bucket = mBucket;
while(1) // Run over all the seconds that were skipped.
{
if (++bucket == low_speed_time)
bucket = 0;
if (++s == second)
break;
if (s >= mOverwriteSecond)
{
mTotalBytes -= mBuckets[bucket];
}
mBuckets[bucket] = 0;
}
mBucket = bucket;
if (s >= mOverwriteSecond)
{
mTotalBytes -= mBuckets[mBucket];
}
mTotalBytes += bytes;
mBuckets[mBucket] = bytes;
// Check if we timed out.
U32 const low_speed_limit = mPolicy->getLowSpeedLimit(); // In bytes/s
U32 mintotalbytes = low_speed_limit * low_speed_time; // In bytes.
DoutCurl("Transfered " << mTotalBytes << " bytes in " << llmin(second, (S32)low_speed_time) << " seconds after " << second << " second" << ((second == 1) ? "" : "s") << ".");
if (second >= low_speed_time)
{
DoutCurl("Average transfer rate is " << (mTotalBytes / low_speed_time) << " bytes/s (low speed limit is " << low_speed_limit << " bytes/s)");
if (mTotalBytes < mintotalbytes)
{
if (finished)
{
LL_WARNS() <<
#ifdef CWDEBUG
(void*)get_lockobj() << ": "
#endif
"Transfer rate timeout (average transfer rate below " << low_speed_limit <<
" bytes/s for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") <<
") but we just sent the LAST bytes! Waiting an additional 4 seconds." << LL_ENDL;
// Lets hope these last bytes will make it and do not time out on transfer speed anymore.
// Just give these bytes 4 more seconds to be written to the socket (after which we'll
// assume that the 'upload finished' detection failed and we'll wait another ReplyDelay
// seconds before finally, actually timing out.
mStalled = sTime_10ms + 400; // 4 seconds into the future.
DoutCurl("mStalled set to sTime_10ms (" << sTime_10ms << ") + 400 (4 seconds)");
return false;
}
// The average transfer rate over the passed low_speed_time seconds is too low. Abort the transfer.
LL_WARNS() <<
#ifdef CWDEBUG
(void*)get_lockobj() << ": "
#endif
"aborting slow connection (average transfer rate below " << low_speed_limit <<
" bytes/s for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") << ")." << LL_ENDL;
// This causes curl to exit with CURLE_WRITE_ERROR.
return true;
}
}
// Calculate how long the data transfer may stall until we should timeout.
//
// Assume 6 buckets: 0 1 2 3 4 5 (low_speed_time == 6)
// Seconds since start of transfer: 4 5 6 7 8 9 (mOverwriteSecond == 10)
// Current second: ^
// Data in buckets: A B w x y z (mTotalBytes = A + B; w, x, y and z are undefined)
//
// Obviously, we need to stall at LEAST till second low_speed_time before we can "timeout".
// And possibly more if mTotalBytes is already >= mintotalbytes.
//
// The code below finds 'max_stall_time', so that when from now on the buckets
// are filled with 0, then at 'second + max_stall_time' we should time out,
// meaning that the resulting mTotalBytes after writing 0 at that second
// will be less than mintotalbytes and 'second + max_stall_time' >= low_speed_time.
//
llassert_always(mintotalbytes > 0);
S32 max_stall_time = low_speed_time - second; // Minimum value.
// Note that if max_stall_time <= 0 here, then second >= low_speed_time and
// thus mTotalBytes >= mintotalbytes because we didn't timeout already above.
if (mTotalBytes >= mintotalbytes)
{
// In this case max_stall_time has a minimum value equal to when we will reach mOverwriteSecond,
// because that is the first second at which mTotalBytes will decrease.
max_stall_time = mOverwriteSecond - second - 1;
U32 total_bytes = mTotalBytes;
int bucket = -1; // Must be one less as the start bucket, which corresponds with mOverwriteSecond (and thus with the current max_stall_time).
do
{
++max_stall_time; // Next second (mOverwriteSecond upon entry of the loop).
++bucket; // The next bucket (0 upon entry of the loop).
// Once we reach the end of the vector total_bytes MUST have reached 0 exactly and we should have left this loop.
llassert_always(bucket < low_speed_time);
total_bytes -= mBuckets[bucket]; // Empty this bucket.
}
while(total_bytes >= mintotalbytes);
}
// If this function isn't called again within max_stall_time seconds, we stalled.
mStalled = sTime_10ms + 100 * max_stall_time;
DoutCurl("mStalled set to sTime_10ms (" << sTime_10ms << ") + " << (mStalled - sTime_10ms) << " (" << max_stall_time << " seconds)");
return false;
}
// CURL-THREAD
// This is called immediately before done() after curl finished, with code.
// <-------mLowSpeedOn-------->
// queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
// ^
// |
void HTTPTimeout::done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code)
{
if (code == CURLE_OPERATION_TIMEDOUT || code == CURLE_COULDNT_RESOLVE_HOST)
{
bool dns_problem = false;
if (code == CURLE_COULDNT_RESOLVE_HOST)
{
// Note that CURLINFO_OS_ERRNO returns 0; we don't know any more than this.
LL_WARNS() << "Failed to resolve hostname " << curlEasyRequest_w->getLowercaseHostname() << LL_ENDL;
dns_problem = true;
}
else if (mNothingReceivedYet)
{
// Only consider this to possibly be related to a DNS lookup if we didn't
// resolved the host yet, which can be detected by asking for
// CURLINFO_NAMELOOKUP_TIME which is set when libcurl initiates the
// actual connect and thus knows the IP# (possibly from it's DNS cache).
double namelookup_time;
curlEasyRequest_w->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time);
dns_problem = (namelookup_time == 0);
}
if (dns_problem)
{
// Inform policy object that there might be problems with resolving this host.
// This will increase the connect timeout the next time we try to connect to this host.
AIHTTPTimeoutPolicy::connect_timed_out(curlEasyRequest_w->getLowercaseHostname());
// AIFIXME: use return value to change priority
}
}
// Make sure no timeout will happen anymore.
mLowSpeedOn = false;
mStalled = (U64)-1;
DoutCurl("done: mStalled set to -1");
}
bool HTTPTimeout::maybe_upload_finished(void)
{
if (!mUploadFinished && mLastBytesSent)
{
// Assume that 'upload finished' detection failed and the server is slow with a reply.
// Switch to waiting for a reply.
upload_finished();
return true;
}
// The upload certainly finished or certainly did not finish.
return false;
}
// Libcurl uses GetTickCount on windows, with a resolution of 10 to 16 ms.
// As a result, we can not assume that namelookup_time == 0 has a special meaning.
#define LOWRESTIMER LL_WINDOWS
void HTTPTimeout::print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url)
{
#ifndef HTTPTIMEOUT_TESTSUITE
LL_WARNS() << "Request to \"" << curl_easy_request->getLowercaseServicename() << "\" timed out for " << curl_easy_request->getTimeoutPolicy()->name() << LL_ENDL;
LL_INFOS() << "Effective URL: \"" << eff_url << "\"." << LL_ENDL;
double namelookup_time, connect_time, appconnect_time, pretransfer_time, starttransfer_time;
curl_easy_request->getinfo(CURLINFO_NAMELOOKUP_TIME, &namelookup_time);
curl_easy_request->getinfo(CURLINFO_CONNECT_TIME, &connect_time);
curl_easy_request->getinfo(CURLINFO_APPCONNECT_TIME, &appconnect_time);
curl_easy_request->getinfo(CURLINFO_PRETRANSFER_TIME, &pretransfer_time);
curl_easy_request->getinfo(CURLINFO_STARTTRANSFER_TIME, &starttransfer_time);
if (namelookup_time == 0
#if LOWRESTIMER
&& connect_time == 0
#endif
)
{
#if LOWRESTIMER
LL_INFOS() << "Hostname seems to have been still in the DNS cache." << LL_ENDL;
#else
LL_WARNS() << "Curl returned CURLE_OPERATION_TIMEDOUT and DNS lookup did not occur according to timings. Apparently the resolve attempt timed out (bad network?)" << LL_ENDL;
llassert(connect_time == 0);
llassert(appconnect_time == 0);
llassert(pretransfer_time == 0);
llassert(starttransfer_time == 0);
return;
#endif
}
// If namelookup_time is less than 500 microseconds, then it's very likely just a DNS cache lookup.
else if (namelookup_time < 500e-6)
{
#if LOWRESTIMER
LL_INFOS() << "Hostname was most likely still in DNS cache (or lookup occured in under ~10ms)." << LL_ENDL;
#else
LL_INFOS() << "Hostname was still in DNS cache." << LL_ENDL;
#endif
}
else
{
LL_INFOS() << "DNS lookup of " << curl_easy_request->getLowercaseHostname() << " took " << namelookup_time << " seconds." << LL_ENDL;
}
if (connect_time == 0
#if LOWRESTIMER
&& namelookup_time > 0 // connect_time, when set, is namelookup_time + something.
#endif
)
{
LL_WARNS() << "Curl returned CURLE_OPERATION_TIMEDOUT and connection did not occur according to timings: apparently the connect attempt timed out (bad network?)" << LL_ENDL;
llassert(appconnect_time == 0);
llassert(pretransfer_time == 0);
llassert(starttransfer_time == 0);
return;
}
// If connect_time is almost equal to namelookup_time, then it was just set because it was already connected.
if (connect_time - namelookup_time <= 1e-5)
{
#if LOWRESTIMER // Assuming 10ms resolution.
LL_INFOS() << "The socket was most likely already connected (or you connected to a proxy with a connect time of under ~10 ms)." << LL_ENDL;
#else
LL_INFOS() << "The socket was already connected (to remote or proxy)." << LL_ENDL;
#endif
// I'm assuming that the SSL/TLS handshake can be measured with a low res timer.
if (appconnect_time == 0)
{
LL_WARNS() << "The SSL/TLS handshake never occurred according to the timings!" << LL_ENDL;
return;
}
// If appconnect_time is almost equal to connect_time, then it was just set because this is a connection re-use.
if (appconnect_time - connect_time <= 1e-5)
{
LL_INFOS() << "Connection with HTTP server was already established; this was a re-used connection." << LL_ENDL;
}
else
{
LL_INFOS() << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << LL_ENDL;
}
}
else
{
LL_INFOS() << "Socket connected to remote host (or proxy) in " << (connect_time - namelookup_time) << " seconds." << LL_ENDL;
if (appconnect_time == 0)
{
LL_WARNS() << "The SSL/TLS handshake never occurred according to the timings!" << LL_ENDL;
return;
}
LL_INFOS() << "SSL/TLS handshake with HTTP server took " << (appconnect_time - connect_time) << " seconds." << LL_ENDL;
}
if (pretransfer_time == 0)
{
LL_WARNS() << "The transfer never happened because there was too much in the pipeline (apparently)." << LL_ENDL;
return;
}
else if (pretransfer_time - appconnect_time >= 1e-5)
{
LL_INFOS() << "Apparently there was a delay, due to waits in line for the pipeline, of " << (pretransfer_time - appconnect_time) << " seconds before the transfer began." << LL_ENDL;
}
if (starttransfer_time == 0)
{
LL_WARNS() << "No data was ever received from the server according to the timings." << LL_ENDL;
}
else
{
LL_INFOS() << "The time it took to send the request to the server plus the time it took before the server started to reply was " << (starttransfer_time - pretransfer_time) << " seconds." << LL_ENDL;
}
if (mNothingReceivedYet)
{
LL_INFOS() << "No data at all was actually received from the server." << LL_ENDL;
}
if (mUploadFinished)
{
LL_INFOS() << "The request upload finished successfully." << LL_ENDL;
}
else if (mLastBytesSent)
{
LL_INFOS() << "All bytes where sent to libcurl for upload." << LL_ENDL;
}
if (mLastSecond > 0 && mLowSpeedOn)
{
LL_INFOS() << "The " << (mNothingReceivedYet ? "upload" : "download") << " did last " << mLastSecond << " second" << ((mLastSecond == 1) ? "" : "s") << ", before it timed out." << LL_ENDL;
}
#endif // HTTPTIMEOUT_TESTSUITE
}
} // namespace curlthread
} // namespace AICurlPrivate
#ifdef HTTPTIMEOUT_TESTSUITE
int main()
{
using namespace AICurlPrivate::curlthread;
AIHTTPTimeoutPolicy policy;
HTTPTimeout test(&policy, NULL);
}
#endif // HTTPTIMEOUT_TESTSUITE