diff --git a/indra/aistatemachine/aistatemachine.cpp b/indra/aistatemachine/aistatemachine.cpp
index 5352435d2..dedec3bd7 100644
--- a/indra/aistatemachine/aistatemachine.cpp
+++ b/indra/aistatemachine/aistatemachine.cpp
@@ -2,7 +2,7 @@
  * @file aistatemachine.cpp
  * @brief Implementation of AIStateMachine
  *
- * Copyright (c) 2010, Aleric Inglewood.
+ * Copyright (c) 2010 - 2013, Aleric Inglewood.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,68 +26,785 @@
  *
  *   01/03/2010
  *   Initial version, written by Aleric Inglewood @ SL
+ *
+ *   28/02/2013
+ *   Rewritten from scratch to fully support threading.
  */
 
 #include "linden_common.h"
-
-#include <algorithm>
-
-#include "llcontrol.h"
-#include "llfasttimer.h"
-#include "aithreadsafe.h"
 #include "aistatemachine.h"
+#include "lltimer.h"
 
-// Local variables.
-namespace {
-  struct QueueElementComp;
+//==================================================================
+// Overview
 
-  class QueueElement {
-	private:
-	  AIStateMachine* mStateMachine;
-	  U64 mRuntime;
+// A AIStateMachine is a base class that allows derived classes to
+// go through asynchronous states, while the code still appears to
+// be more or less sequential.
+//
+// These state machine objects can be reused to build more complex
+// objects.
+//
+// It is important to note that each state has a duality: the object
+// can have a state that will cause a corresponding function to be
+// called; and often that function will end with changing the state
+// again, to signal that it was handled. It is easy to confuse the
+// function of a state with the state at the end of the function.
+// For example, the state "initialize" could cause the member
+// function 'init()' to be called, and at the end one would be
+// inclined to set the state to "initialized". However, this is the
+// wrong approach: the correct use of state names does reflect the
+// functions that will be called, never the function that just was
+// called.
+//
+// Each (derived) class goes through a series of states as follows:
+//
+//   Creation
+//       |
+//       v
+//     (idle) <----.   		Idle until run() is called.
+//       |         |
+//   Initialize    |		Calls initialize_impl().
+//       |         |
+//       | (idle)  |		Idle until cont() or advance_state() is called.
+//       |  |  ^   |
+//       v  v  |   |
+//   .-----------. |
+//   | Multiplex | |		Call multiplex_impl() until idle(), abort() or finish() is called.
+//   '-----------' |
+//    |    |       |
+//    v    |       |
+//  Abort  |       |		Calls abort_impl().
+//    |    |       |
+//    v    v       |
+//    Finish       |		Calls finish_impl(), which may call run().
+//    |    |       |
+//    |    v       |
+//    | Callback   |        which may call kill() and/or run().
+//    |  |   |     |
+//    |  |   `-----'
+//    v  v
+//  Killed					Delete the statemachine (all statemachines must be allocated with new).
+//
+// Each state causes corresponding code to be called.
+// Finish cleans up whatever is done by Initialize.
+// Abort should clean up additional things done while running.
+//
+// The running state is entered by calling run().
+//
+// While the base class is in the bs_multiplex state, it is the derived class
+// that goes through different states. The state variable of the derived
+// class is only valid while the base class is in the bs_multiplex state.
+//
+// A derived class can exit the bs_multiplex state by calling one of two methods:
+// abort() in case of failure, or finish() in case of success.
+// Respectively these set the state to bs_abort and bs_finish.
+//
+// The methods of the derived class call set_state() to change their
+// own state within the bs_multiplex state, or by calling either abort()
+// or finish().
+//
+// Restarting a finished state machine can be done by calling run(),
+// which will cause a re-initialize. The default is to destruct the
+// state machine once the last LLPointer to it is deleted.
+//
 
-	public:
-	  QueueElement(AIStateMachine* statemachine) : mStateMachine(statemachine), mRuntime(0) { }
-	  friend bool operator==(QueueElement const& e1, QueueElement const& e2) { return e1.mStateMachine == e2.mStateMachine; }
-	  friend struct QueueElementComp;
 
-	  AIStateMachine& statemachine(void) const { return *mStateMachine; }
-	  void add(U64 count) { mRuntime += count; }
-  };
+//==================================================================
+// Declaration
 
-  struct QueueElementComp {
-	bool operator()(QueueElement const& e1, QueueElement const& e2) const { return e1.mRuntime < e2.mRuntime; }
-  };
+// Every state machine is (indirectly) derived from AIStateMachine.
+// For example:
 
-  typedef std::vector<QueueElement> active_statemachines_type;
-  active_statemachines_type active_statemachines;
+#ifdef EXAMPLE_CODE	// undefined
+
+class HelloWorld : public AIStateMachine {
+  protected:
+	// The base class of this state machine.
+	typedef AIStateMachine direct_base_type;
+
+	// The different states of the state machine.
+	enum hello_world_state_type {
+	  HelloWorld_start = direct_base_type::max_state,
+	  HelloWorld_done,
+	};
+  public:
+	static state_type const max_state = HelloWorld_done + 1;	// One beyond the largest state.
+
+  public:
+	// The derived class must have a default constructor.
+	HelloWorld();
+
+  protected:
+	// The destructor must be protected.
+	/*virtual*/ ~HelloWorld();
+
+  protected:
+	// The following virtual functions must be implemented:
+
+	// Handle initializing the object.
+	/*virtual*/ void initialize_impl(void);
+
+	// Handle mRunState.
+	/*virtual*/ void multiplex_impl(state_type run_state);
+
+	// Handle aborting from current bs_multiplex state (the default AIStateMachine::abort_impl() does nothing).
+	/*virtual*/ void abort_impl(void);
+
+	// Handle cleaning up from initialization (or post abort) state (the default AIStateMachine::finish_impl() does nothing).
+	/*virtual*/ void finish_impl(void);
+
+	// Return human readable string for run_state.
+	/*virtual*/ char const* state_str_impl(state_type run_state) const;
+};
+
+// In the .cpp file:
+
+char const* HelloWorld::state_str_impl(state_type run_state) const
+{
+  switch(run_state)
+  {
+	// A complete listing of hello_world_state_type.
+	AI_CASE_RETURN(HelloWorld_start);
+	AI_CASE_RETURN(HelloWorld_done);
+  }
+#if directly_derived_from_AIStateMachine
+  llassert(false);
+  return "UNKNOWN STATE";
+#else
+  llassert(run_state < direct_base_type::max_state);
+  return direct_base_type::state_str_impl(run_state);
+#endif
+}
+
+#endif // EXAMPLE_CODE
+
+
+//==================================================================
+// Life cycle: creation, initialization, running and destruction
+
+// Any thread may create a state machine object, initialize it by calling
+// it's initializing member function and call one of the 'run' methods,
+// which might or might not immediately start to execute the state machine.
+
+#ifdef EXAMPLE_CODE
+  HelloWorld* hello_world = new HelloWorld;
+  hello_world->init(...);					// A custom initialization function.
+  hello_world->run(...);					// One of the run() functions.
+  // hello_world might be destructed here.
+  // You can avoid possible destruction by using an LLPointer<HelloWorld>
+  // instead of HelloWorld*.
+#endif // EXAMPLE_CODE
+
+// The call to run() causes a call to initialize_impl(), which MUST call
+//   set_state() at least once (only the last call is used).
+// Upon return from initialize_impl(), multiplex_impl() will be called
+//   with that state.
+// multiplex_impl() may never reentrant (cause itself to be called).
+// multiplex_impl() should end by callling either one of:
+//   idle(current_state), yield*(), finish() [or abort()].
+// Leaving multiplex_impl() without calling any of those might result in an
+//   immediate reentry, which could lead to 100% CPU usage unless the state
+//   is changed with set_state().
+// If multiplex_impl() calls finish() then finish_impl() will be called [if it
+//   calls abort() then abort_impl() will called, followed by finish_impl()].
+// Upon return from multiplex_impl(), and if finish() [or abort()] was called,
+//   the call back passed to run() will be called.
+// Upon return from the call back, the state machine object might be destructed
+//   (see below).
+// If idle(current_state) was called, and the state was (still) current_state,
+//   then multiplex_impl() will not be called again until the state is
+//   advanced, or cont() is called.
+//
+// If the call back function does not call run(), then the state machine is
+//   deleted when the last LLPointer<> reference is deleted.
+// If kill() is called after run() was called, then the call to run() is ignored.
+
+
+//==================================================================
+// Aborting
+
+// If abort() is called before initialize_impl() is entered, then the state
+//   machine is destructed after the last LLPointer<> reference to it is
+//   deleted (if any). Note that this is only possible when a child state
+//   machine is aborted before the parent even runs.
+//
+// If abort() is called inside its initialize_impl() that initialize_impl()
+//   should return immediately after.
+// if idle(), abort() or finish() are called inside its multiplex_impl() then
+//   that multiplex_impl() should return immediately after.
+//
+
+
+//==================================================================
+// Thread safety
+
+// Only one thread can "run" a state machine at a time; can call 'multiplex_impl'.
+//
+// Only from inside multiplex_impl (set_state also from initialize_impl), any of the
+// following functions can be called:
+//
+// - set_state(new_state)		--> Force the state to new_state. This voids any previous call to set_state() or idle().
+// - idle(current_state)		--> If the current state is still current_state (if there was no call to advance_state()
+// 									since the last call to set_state(current_state)) then go idle (do nothing until
+// 									cont() or advance_state() is called). If the current state is not current_state,
+// 									then multiplex_impl shall be reentered immediately upon return.
+// - finish()					--> Disables any scheduled runs.
+// 								--> finish_impl		--> [optional] kill()
+// 								--> call back
+// 								--> [optional] delete
+// 								--> [optional] reset, upon return from multiplex_impl, call initialize_impl and start again at the top of multiplex.
+// - yield([engine])			--> give CPU to other state machines before running again, run next from a state machine engine.
+// 									If no engine is passed, the state machine will run in it's default engine (as set during construction).
+// - yield_frame()/yield_ms()	--> yield(&gMainThreadEngine)
+//
+// the following function may be called from multiplex_impl() of any state machine (and thus by any thread):
+//
+// - abort()					--> abort_impl
+// 								--> finish()
+//
+// while the following functions may be called from anywhere (and any thread):
+//
+// - cont()						--> schedules a run if there was no call to set_state() or advance_state() since the last call to idle().
+// - advance_state(new_state)	--> sets the state to new_state, if the new_state > current_state, and schedules a run (and voids the last call to idle()).
+//
+// In the above "scheduling a run" means calling multiplex_impl(), but the same holds for any *_impl()
+// and the call back: Whenever one of those have to be called, thread_safe_impl() is called to
+// determine if the current state machine allows that function to be called by the current thread,
+// and if not - by which thread it should be called then (either main thread, or a special state machine
+// thread). If thread switching is necessary, the call is literally scheduled in a queue of one
+// of those two, otherwise it is run immediately.
+//
+// However, since only one thread at a time may be calling any *_impl function (except thread_safe_impl())
+// or the call back function, it is possible that at the moment scheduling is necessary another thread
+// is already running one of those functions. In that case thread_safe_impl() does not consider the
+// current thread, but rather the running thread and does not do any scheduling if the running thread
+// is ok, rather marks the need to continue running which should be picked up upon return from
+// whatever the running thread is calling.
+
+void AIEngine::add(AIStateMachine* state_machine)
+{
+  Dout(dc::statemachine, "Adding state machine [" << (void*)state_machine << "] to " << mName);
+  engine_state_type_wat engine_state_w(mEngineState);
+  engine_state_w->list.push_back(QueueElement(state_machine));
+  if (engine_state_w->waiting)
+  {
+	engine_state_w.signal();
+  }
+}
+
+extern void print_statemachine_diagnostics(U64 total_clocks, U64 max_delta, AIEngine::queued_type::const_reference slowest_state_machine);
+
+// MAIN-THREAD
+void AIEngine::mainloop(void)
+{
+  queued_type::iterator queued_element, end;
+  {
+	engine_state_type_wat engine_state_w(mEngineState);
+	end = engine_state_w->list.end();
+	queued_element = engine_state_w->list.begin();
+  }
+  U64 total_clocks = 0;
+#ifndef LL_RELEASE_FOR_DOWNLOAD
+  U64 max_delta = 0;
+  queued_type::value_type slowest_element(NULL);
+#endif
+  while (queued_element != end)
+  {
+	AIStateMachine& state_machine(queued_element->statemachine());
+	U64 start = get_clock_count();
+	if (!state_machine.sleep(start))
+	{
+	  state_machine.multiplex(AIStateMachine::normal_run);
+	}
+	U64 delta = get_clock_count() - start;
+	state_machine.add(delta);
+	total_clocks += delta;
+#ifndef LL_RELEASE_FOR_DOWNLOAD
+	if (delta > max_delta)
+	{
+	  max_delta = delta;
+	  slowest_element = *queued_element;
+	}
+#endif
+	bool active = state_machine.active(this);		// This locks mState shortly, so it must be called before locking mEngineState because add() locks mEngineState while holding mState.
+	engine_state_type_wat engine_state_w(mEngineState);
+	if (!active)
+	{
+	  Dout(dc::statemachine, "Erasing state machine [" << (void*)&state_machine << "] from " << mName);
+	  engine_state_w->list.erase(queued_element++);
+	}
+	else
+	{
+	  ++queued_element;
+	}
+	if (total_clocks >= sMaxCount)
+	{
+#ifndef LL_RELEASE_FOR_DOWNLOAD
+	  print_statemachine_diagnostics(total_clocks, max_delta, slowest_element);
+#endif
+	  Dout(dc::statemachine, "Sorting " << engine_state_w->list.size() << " state machines.");
+	  engine_state_w->list.sort(QueueElementComp());
+	  break;
+	}
+  }
+}
+
+void AIEngine::flush(void)
+{
+  DoutEntering(dc::statemachine, "AIEngine::flush [" << mName << "]");
+  engine_state_type_wat engine_state_w(mEngineState);
+  engine_state_w->list.clear();
 }
 
 // static
-U64 AIStateMachine::sMaxCount;
-AIThreadSafeDC<AIStateMachine::csme_type> AIStateMachine::sContinuedStateMachinesAndMainloopEnabled;
+U64 AIEngine::sMaxCount;
 
 // static
-void AIStateMachine::setMaxCount(F32 StateMachineMaxTime)
+void AIEngine::setMaxCount(F32 StateMachineMaxTime)
 {
-  llassert(is_main_thread());
+  llassert(AIThreadID::in_main_thread());
   Dout(dc::statemachine, "(Re)calculating AIStateMachine::sMaxCount");
   sMaxCount = calc_clock_frequency() * StateMachineMaxTime / 1000;
 }
 
-//----------------------------------------------------------------------------
-//
-// Public methods
-//
-
-void AIStateMachine::run(AIStateMachine* parent, state_type new_parent_state, bool abort_parent, bool on_abort_signal_parent)
+#ifdef CWDEBUG
+char const* AIStateMachine::event_str(event_type event)
 {
-  DoutEntering(dc::statemachine, "AIStateMachine::run(" << (void*)parent << ", " << (parent ? parent->state_str(new_parent_state) : "NA") << ", " << abort_parent << ") [" << (void*)this << "]");
-  // Must be the first time we're being run, or we must be called from a callback function.
-  llassert(!mParent || mState == bs_callback);
-  llassert(!mCallback || mState == bs_callback);
-  // Can only be run when in this state.
-  llassert(mState == bs_initialize || mState == bs_callback);
+  switch(event)
+  {
+	AI_CASE_RETURN(initial_run);
+	AI_CASE_RETURN(schedule_run);
+	AI_CASE_RETURN(normal_run);
+	AI_CASE_RETURN(insert_abort);
+  }
+  llassert(false);
+  return "UNKNOWN EVENT";
+}
+#endif
+
+void AIStateMachine::multiplex(event_type event)
+{
+  // If this fails then you are using a pointer to a state machine instead of an LLPointer.
+  llassert(event == initial_run || getNumRefs() > 0);
+
+  DoutEntering(dc::statemachine, "AIStateMachine::multiplex(" << event_str(event) << ") [" << (void*)this << "]");
+
+  base_state_type state;
+  state_type run_state;
+
+  // Critical area of mState.
+  {
+	multiplex_state_type_rat state_r(mState);
+
+	// If another thread is already running multiplex() then it will pick up
+	// our need to run (by us having set need_run), so there is no need to run
+	// ourselves.
+	llassert(!mMultiplexMutex.isSelfLocked());		// We may never enter recursively!
+	if (!mMultiplexMutex.tryLock())
+	{
+	  Dout(dc::statemachine, "Leaving because it is already being run [" << (void*)this << "]");
+	  return;
+	}
+
+	//===========================================
+	// Start of critical area of mMultiplexMutex.
+
+	// If another thread already called begin_loop() since we needed a run,
+	// then we must not schedule a run because that could lead to running
+	// the same state twice. Note that if need_run was reset in the mean
+	// time and then set again, then it can't hurt to schedule a run since
+	// we should indeed run, again.
+	if (event == schedule_run && !sub_state_type_rat(mSubState)->need_run)
+	{
+	  Dout(dc::statemachine, "Leaving because it was already being run [" << (void*)this << "]");
+	  return;
+	}
+
+	// We're at the beginning of multiplex, about to actually run it.
+	// Make a copy of the states.
+	run_state = begin_loop((state = state_r->base_state));
+  }
+  // End of critical area of mState.
+
+  bool keep_looping;
+  bool destruct = false;
+  do
+  {
+
+	if (event == normal_run)
+	{
+#ifdef CWDEBUG
+	  if (state == bs_multiplex)
+		Dout(dc::statemachine, "Running state bs_multiplex / " << state_str_impl(run_state) << " [" << (void*)this << "]");
+	  else
+		Dout(dc::statemachine, "Running state " << state_str(state) << " [" << (void*)this << "]");
+#endif
+
+#ifdef SHOW_ASSERT
+	  // This debug code checks that each state machine steps precisely through each of it's states correctly.
+	  if (state != bs_reset)
+	  {
+		switch(mDebugLastState)
+		{
+		  case bs_reset:
+			llassert(state == bs_initialize || state == bs_killed);
+			break;
+		  case bs_initialize:
+			llassert(state == bs_multiplex || state == bs_abort);
+			break;
+		  case bs_multiplex:
+			llassert(state == bs_multiplex || state == bs_finish || state == bs_abort);
+			break;
+		  case bs_abort:
+			llassert(state == bs_finish);
+			break;
+		  case bs_finish:
+			llassert(state == bs_callback);
+			break;
+		  case bs_callback:
+			llassert(state == bs_killed || state == bs_reset);
+			break;
+		  case bs_killed:
+			llassert(state == bs_killed);
+			break;
+		}
+	  }
+	  // More sanity checks.
+	  if (state == bs_multiplex)
+	  {
+		// set_state is only called from multiplex_impl and therefore synced with mMultiplexMutex.
+		mDebugShouldRun |= mDebugSetStatePending;
+		// Should we run at all?
+		llassert(mDebugShouldRun);
+	  }
+	  // Any previous reason to run is voided by actually running.
+	  mDebugShouldRun = false;
+#endif
+
+	  mRunMutex.lock();
+	  // Now we are actually running a single state.
+	  // If abort() was called at any moment before, we execute that state instead.
+	  bool const late_abort = (state == bs_multiplex || state == bs_initialize) && sub_state_type_rat(mSubState)->aborted;
+	  if (LL_UNLIKELY(late_abort))
+	  {
+		// abort() was called from a child state machine, from another thread, while we were already scheduled to run normally from an engine.
+		// What we want to do here is pretend we detected the abort at the end of the *previous* run.
+		// If the state is bs_multiplex then the previous state was either bs_initialize or bs_multiplex,
+		// both of which would have switched to bs_abort: we set the state to bs_abort instead and just
+		// continue this run.
+		// However, if the state is bs_initialize we can't switch to bs_killed because that state isn't
+		// handled in the switch below; it's only handled when exiting multiplex() directly after it is set.
+		// Therefore, in that case we have to set the state BACK to bs_reset and run it again. This duplicated
+		// run of bs_reset is not a problem because it happens to be a NoOp.
+		state = (state == bs_initialize) ? bs_reset : bs_abort;
+#ifdef CWDEBUG
+		Dout(dc::statemachine, "Late abort detected! Running state " << state_str(state) << " instead [" << (void*)this << "]");
+#endif
+	  }
+#ifdef SHOW_ASSERT
+	  mDebugLastState = state;
+	  // Make sure we only call ref() once and in balance with unref().
+	  if (state == bs_initialize)
+	  {
+		// This -- and call to ref() (and the test when we're about to call unref()) -- is all done in the critical area of mMultiplexMutex.
+		llassert(!mDebugRefCalled);
+		mDebugRefCalled = true;
+	  }
+#endif
+	  switch(state)
+	  {
+		case bs_reset:
+		  // We're just being kick started to get into the right thread
+		  // (possibly for the second time when a late abort was detected, but that's ok: we do nothing here).
+		  break;
+		case bs_initialize:
+		  ref();
+		  initialize_impl();
+		  break;
+		case bs_multiplex:
+		  llassert(!mDebugAborted);
+		  multiplex_impl(run_state);
+		  break;
+		case bs_abort:
+		  abort_impl();
+		  break;
+		case bs_finish:
+		  sub_state_type_wat(mSubState)->reset = false;		// By default, halt state machines when finished.
+		  finish_impl();									// Call run() from finish_impl() or the call back to restart from the beginning.
+		  break;
+		case bs_callback:
+		  callback();
+		  break;
+		case bs_killed:
+		  mRunMutex.unlock();
+		  // bs_killed is handled when it is set. So, this must be a re-entry.
+		  // We can only get here when being called by an engine that we were added to before we were killed.
+		  // This should already be have been set to NULL to indicate that we want to be removed from that engine.
+		  llassert(!multiplex_state_type_rat(mState)->current_engine);
+		  // Do not call unref() twice.
+		  return;
+	  }
+	  mRunMutex.unlock();
+	}
+
+	{
+	  multiplex_state_type_wat state_w(mState);
+
+	  //=================================
+	  // Start of critical area of mState
+
+	  // Unless the state is bs_multiplex or bs_killed, the state machine needs to keep calling multiplex().
+	  bool need_new_run = true;
+	  if (event == normal_run || event == insert_abort)
+	  {
+		sub_state_type_rat sub_state_r(mSubState);
+
+		if (event == normal_run)
+		{
+		  // Switch base state as function of sub state.
+		  switch(state)
+		  {
+			case bs_reset:
+			  if (sub_state_r->aborted)
+			  {
+				// We have been aborted before we could even initialize, no de-initialization is possible.
+				state_w->base_state = bs_killed;
+				// Stop running.
+				need_new_run = false;
+			  }
+			  else
+			  {
+				// run() was called: call initialize_impl() next.
+				state_w->base_state = bs_initialize;
+			  }
+			  break;
+			case bs_initialize:
+			  if (sub_state_r->aborted)
+			  {
+				// initialize_impl() called abort.
+				state_w->base_state = bs_abort;
+			  }
+			  else
+			  {
+				// Start actually running.
+				state_w->base_state = bs_multiplex;
+				// If the state is bs_multiplex we only need to run again when need_run was set again in the meantime or when this state machine isn't idle.
+				need_new_run = sub_state_r->need_run || !sub_state_r->idle;
+			  }
+			  break;
+			case bs_multiplex:
+			  if (sub_state_r->aborted)
+			  {
+				// abort() was called.
+				state_w->base_state = bs_abort;
+			  }
+			  else if (sub_state_r->finished)
+			  {
+				// finish() was called.
+				state_w->base_state = bs_finish;
+			  }
+			  else
+			  {
+				// Continue in bs_multiplex.
+				// If the state is bs_multiplex we only need to run again when need_run was set again in the meantime or when this state machine isn't idle.
+				need_new_run = sub_state_r->need_run || !sub_state_r->idle;
+			  }
+			  break;
+			case bs_abort:
+			  // After calling abort_impl(), call finish_impl().
+			  state_w->base_state = bs_finish;
+			  break;
+			case bs_finish:
+			  // After finish_impl(), call the call back function.
+			  state_w->base_state = bs_callback;
+			  break;
+			case bs_callback:
+			  if (sub_state_r->reset)
+			  {
+				// run() was called (not followed by kill()).
+				state_w->base_state = bs_reset;
+			  }
+			  else
+			  {
+				// After the call back, we're done.
+				state_w->base_state = bs_killed;
+				// Call unref().
+				destruct = true;
+				// Stop running.
+				need_new_run = false;
+			  }
+			  break;
+			default: // bs_killed
+			  // We never get here.
+			  break;
+		  }
+		}
+		else // event == insert_abort
+		{
+		  // We have been aborted, but we're idle. If we'd just schedule a new run below, it would re-run
+		  // the last state before the abort is handled. What we really need is to pick up as if the abort
+		  // was handled directly after returning from the last run. If we're not running anymore, then
+		  // do nothing as the state machine already ran and things should be processed normally
+		  // (in that case this is just a normal schedule which can't harm because we're can't accidently
+		  // re-run an old run_state).
+		  if (state_w->base_state == bs_multiplex)		// Still running?
+		  {
+			// See the switch above for case bs_multiplex.
+			llassert(sub_state_r->aborted);
+			// abort() was called.
+			state_w->base_state = bs_abort;
+		  }
+		}
+
+#ifdef CWDEBUG
+		if (state != state_w->base_state)
+		  Dout(dc::statemachine, "Base state changed from " << state_str(state) << " to " << state_str(state_w->base_state) <<
+			  "; need_new_run = " << (need_new_run ? "true" : "false") << " [" << (void*)this << "]");
+#endif
+	  }
+
+	  // Figure out in which engine we should run.
+	  AIEngine* engine = mYieldEngine ? mYieldEngine : (state_w->current_engine ? state_w->current_engine : mDefaultEngine);
+	  // And the current engine we're running in.
+	  AIEngine* current_engine = (event == normal_run) ? state_w->current_engine : NULL;
+
+	  // Immediately run again if yield() wasn't called and it's OK to run in this thread.
+	  // Note that when it's OK to run in any engine (mDefaultEngine is NULL) then the last
+	  // compare is also true when current_engine == NULL.
+	  keep_looping = need_new_run && !mYieldEngine && engine == current_engine;
+	  mYieldEngine = NULL;
+
+	  if (keep_looping)
+	  {
+		// Start a new loop.
+		run_state = begin_loop((state = state_w->base_state));
+		event = normal_run;
+	  }
+	  else
+	  {
+		if (need_new_run)
+		{
+		  // Add us to an engine if necessary.
+		  if (engine != state_w->current_engine)
+		  {
+			// engine can't be NULL here: it can only be NULL if mDefaultEngine is NULL.
+			engine->add(this);
+			// Mark that we're added to this engine, and at the same time, that we're not added to the previous one.
+			state_w->current_engine = engine;
+		  }
+		}
+		else
+		{
+		  // Remove this state machine from any engine.
+		  // Cause the engine to remove us.
+		  state_w->current_engine = NULL;
+		}
+
+#ifdef SHOW_ASSERT
+		// Mark that we stop running the loop.
+		mThreadId.clear();
+
+		if (destruct)
+		{
+		  // We're about to call unref(). Make sure we call that in balance with ref()!
+		  llassert(mDebugRefCalled);
+		  mDebugRefCalled  = false;
+		}
+#endif
+
+		// End of critical area of mMultiplexMutex.
+		//=========================================
+
+		// Release the lock on mMultiplexMutex *first*, before releasing the lock on mState,
+		// to avoid to ever call the tryLock() and fail, while this thread isn't still
+		// BEFORE the critical area of mState!
+
+		mMultiplexMutex.unlock();
+	  }
+
+	  // Now it is safe to leave the critical area of mState as the tryLock won't fail anymore.
+	  // (Or, if we didn't release mMultiplexMutex because keep_looping is true, then this
+	  // end of the critical area of mState is equivalent to the first critical area in this
+	  // function.
+
+	  // End of critical area of mState.
+	  //================================
+	}
+
+  }
+  while (keep_looping);
+
+  if (destruct)
+  {
+	unref();
+  }
+}
+
+AIStateMachine::state_type AIStateMachine::begin_loop(base_state_type base_state)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::begin_loop(" << state_str(base_state) << ") [" << (void*)this << "]");
+
+  sub_state_type_wat sub_state_w(mSubState);
+  // Honor a subsequent call to idle() (only necessary in bs_multiplex, but it doesn't hurt to reset this flag in other states too).
+  sub_state_w->skip_idle = false;
+  // Mark that we're about to honor all previous run requests.
+  sub_state_w->need_run = false;
+  // Honor previous calls to advance_state() (once run_state is initialized).
+  if (base_state == bs_multiplex && sub_state_w->advance_state > sub_state_w->run_state)
+  {
+	Dout(dc::statemachine, "Copying advance_state to run_state, because it is larger [" << state_str_impl(sub_state_w->advance_state) << " > " << state_str_impl(sub_state_w->run_state) << "]");
+	sub_state_w->run_state = sub_state_w->advance_state;
+  }
+#ifdef SHOW_ASSERT
+  else
+  {
+	// If advance_state wasn't honored then it isn't a reason to run.
+	// We're running anyway, but that should be because set_state() was called.
+	mDebugAdvanceStatePending = false;
+  }
+#endif
+  sub_state_w->advance_state = 0;
+
+#ifdef SHOW_ASSERT
+  // Mark that we're running the loop.
+  mThreadId.reset();
+  // This point marks handling cont().
+  mDebugShouldRun |= mDebugContPending;
+  mDebugContPending = false;
+  // This point also marks handling advance_state().
+  mDebugShouldRun |= mDebugAdvanceStatePending;
+  mDebugAdvanceStatePending = false;
+#endif
+
+  // Make a copy of the state that we're about to run.
+  return sub_state_w->run_state;
+}
+
+void AIStateMachine::run(LLPointer<AIStateMachine> parent, state_type new_parent_state, bool abort_parent, bool on_abort_signal_parent, AIEngine* default_engine)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::run(" <<
+	  (void*)parent.get() << ", " <<
+	  (parent ? parent->state_str_impl(new_parent_state) : "NA") <<
+	  ", abort_parent = " << (abort_parent ? "true" : "false") <<
+	  ", on_abort_signal_parent = " << (on_abort_signal_parent ? "true" : "false") <<
+	  ", default_engine = " << (default_engine ? default_engine->name() : "NULL") << ") [" << (void*)this << "]");
+
+#ifdef SHOW_ASSERT
+  {
+	multiplex_state_type_rat state_r(mState);
+	// Can only be run when in one of these states.
+	llassert(state_r->base_state == bs_reset || state_r->base_state == bs_finish || state_r->base_state == bs_callback);
+	// Must be the first time we're being run, or we must be called from finish_impl or a callback function.
+	llassert(!(state_r->base_state == bs_reset && (mParent || mCallback)));
+  }
+#endif
+
+  // Store the requested default engine.
+  mDefaultEngine = default_engine;
+
+  // Initialize sleep timer.
+  mSleep = 0;
 
   // Allow NULL to be passed as parent to signal that we want to reuse the old one.
   if (parent)
@@ -108,24 +825,31 @@ void AIStateMachine::run(AIStateMachine* parent, state_type new_parent_state, bo
   // If abort_parent is requested then a parent must be provided.
   llassert(!abort_parent || mParent);
   // If a parent is provided, it must be running.
-  llassert(!mParent || mParent->mState == bs_run);
+  llassert(!mParent || mParent->running());
 
-  // Mark that run() has been called, in case we're being called from a callback function.
-  mState = bs_initialize;
-
-  // Set mIdle to false and add statemachine to continued_statemachines.
-  mSetStateLock.lock();
-  locked_cont();
+  // Start from the beginning.
+  reset();
 }
 
-void AIStateMachine::run(callback_type::signal_type::slot_type const& slot)
+void AIStateMachine::run(callback_type::signal_type::slot_type const& slot, AIEngine* default_engine)
 {
-  DoutEntering(dc::statemachine, "AIStateMachine::run(<slot>) [" << (void*)this << "]");
-  // Must be the first time we're being run, or we must be called from a callback function.
-  llassert(!mParent || mState == bs_callback);
-  llassert(!mCallback || mState == bs_callback);
-  // Can only be run when in this state.
-  llassert(mState == bs_initialize || mState == bs_callback);
+  DoutEntering(dc::statemachine, "AIStateMachine::run(<slot>, default_engine = " << default_engine->name() << ") [" << (void*)this << "]");
+
+#ifdef SHOW_ASSERT
+  {
+	multiplex_state_type_rat state_r(mState);
+	// Can only be run when in one of these states.
+	llassert(state_r->base_state == bs_reset || state_r->base_state == bs_finish || state_r->base_state == bs_callback);
+	// Must be the first time we're being run, or we must be called from finish_impl or a callback function.
+	llassert(!(state_r->base_state == bs_reset && (mParent || mCallback)));
+  }
+#endif
+
+  // Store the requested default engine.
+  mDefaultEngine = default_engine;
+
+  // Initialize sleep timer.
+  mSleep = 0;
 
   // Clean up any old callbacks.
   mParent = NULL;
@@ -135,229 +859,18 @@ void AIStateMachine::run(callback_type::signal_type::slot_type const& slot)
 	mCallback = NULL;
   }
 
+  // Create new call back.
   mCallback = new callback_type(slot);
 
-  // Mark that run() has been called, in case we're being called from a callback function.
-  mState = bs_initialize;
-
-  // Set mIdle to false and add statemachine to continued_statemachines.
-  mSetStateLock.lock();
-  locked_cont();
+  // Start from the beginning.
+  reset();
 }
 
-void AIStateMachine::idle(void)
+void AIStateMachine::callback(void)
 {
-  DoutEntering(dc::statemachine, "AIStateMachine::idle() [" << (void*)this << "]");
-  llassert(is_main_thread());
-  llassert(!mIdle);
-  mIdle = true;
-  mSleep = 0;
-#ifdef SHOW_ASSERT
-  mCalledThreadUnsafeIdle = true;
-#endif
-}
+  DoutEntering(dc::statemachine, "AIStateMachine::callback() [" << (void*)this << "]");
 
-void AIStateMachine::idle(state_type current_run_state)
-{
-  DoutEntering(dc::statemachine, "AIStateMachine::idle(" << state_str(current_run_state) << ") [" << (void*)this << "]");
-  llassert(is_main_thread());
-  llassert(!mIdle);
-  mSetStateLock.lock();
-  // Only go idle if the run state is (still) what we expect it to be.
-  // Otherwise assume that another thread called set_state() and continue running.
-  if (current_run_state == mRunState)
-  {
-	mIdle = true;
-	mSleep = 0;
-  }
-  mSetStateLock.unlock();
-}
-
-// About thread safeness:
-//
-// The main thread initializes a statemachine and calls run, so a statemachine
-// runs in the main thread. However, it is allowed that a state calls idle(current_state)
-// and then allows one or more other threads to call cont() upon some
-// event (only once, of course, as idle() has to be called before cont()
-// can be called again-- and a non-main thread is not allowed to call idle()).
-// Instead of cont() one may also call set_state().
-// Of course, this may give rise to a race condition; if that happens then
-// the thread that calls cont() (set_state()) first is serviced, and the other
-// thread(s) are ignored, as if they never called cont().
-void AIStateMachine::locked_cont(void)
-{
-  DoutEntering(dc::statemachine, "AIStateMachine::locked_cont() [" << (void*)this << "]");
-  llassert(mIdle);
-  // Atomic test mActive and change mIdle.
-  mIdleActive.lock();
-#ifdef SHOW_ASSERT
-  mContThread.reset();
-#endif
-  mIdle = false;
-  bool not_active = mActive == as_idle;
-  mIdleActive.unlock();
-  // mActive is only changed in AIStateMachine::mainloop, by the main-thread, and
-  // here, possibly by any thread. However, after setting mIdle to false above, it
-  // is impossible for any thread to come here, until after the main-thread called
-  // idle(). So, if this is the main thread then that certainly isn't going to
-  // happen until we left this function, while if this is another thread  and the
-  // state machine is already running in the main thread then not_active is false
-  // and we're already at the end of this function.
-  // If not_active is true then main-thread is not running this statemachine.
-  // It might call cont() (or set_state()) but never locked_cont(), and will never
-  // start actually running until we are done here and release the lock on
-  // sContinuedStateMachinesAndMainloopEnabled again. It is therefore safe
-  // to release mSetStateLock here, with as advantage that if we're not the main-
-  // thread and not_active is true, then the main-thread won't block when it has
-  // a timer running that times out and calls set_state().
-  mSetStateLock.unlock();
-  if (not_active)
-  {
-	AIWriteAccess<csme_type> csme_w(sContinuedStateMachinesAndMainloopEnabled);
-	// See above: it is not possible that mActive was changed since not_active
-	// was set to true above.
-	llassert_always(mActive == as_idle);
-	Dout(dc::statemachine, "Adding " << (void*)this << " to continued_statemachines");
-	csme_w->continued_statemachines.push_back(this);
-	if (!csme_w->mainloop_enabled)
-	{
-	  Dout(dc::statemachine, "Activating AIStateMachine::mainloop.");
-	  csme_w->mainloop_enabled = true;
-	}
-	mActive = as_queued;
-	llassert_always(!mIdle);	// It should never happen that the main thread calls idle(), while another thread calls cont() concurrently.
-  }
-}
-
-void AIStateMachine::set_state(state_type state)
-{
-  DoutEntering(dc::statemachine, "AIStateMachine::set_state(" << state_str(state) << ") [" << (void*)this << "]");
-
-  // Stop race condition of multiple threads calling cont() or set_state() here.
-  mSetStateLock.lock();
-
-  // Do not call set_state() unless running.
-  llassert(mState == bs_run || !is_main_thread());
-
-  // If this function is called from another thread than the main thread, then we have to ignore
-  // it if we're not idle and the state is less than the current state. The main thread must
-  // be able to change the state to anything (also smaller values). Note that that only can work
-  // if the main thread itself at all times cancels thread callbacks that call set_state()
-  // before calling idle() again!
-  //
-  // Thus: main thead calls idle(), and tells one or more threads to do callbacks on events,
-  // which (might) call set_state(). If the main thread calls set_state first (currently only
-  // possible as a result of the use of a timer) it will set mIdle to false (here) then cancel
-  // the call backs from the other threads and only then call idle() again.
-  // Thus if you want other threads get here while mIdle is false to be ignored then the
-  // main thread should use a large value for the new run state.
-  //
-  // If a non-main thread calls set_state first, then the state is changed but the main thread
-  // can still override it if it calls set_state before handling the new state; in the latter
-  // case it would still be as if the call from the non-main thread was ignored.
-  //
-  // Concurrent calls from non-main threads however, always result in the largest state
-  // to prevail.
-
-  // If the state machine is already running, and we are not the main-thread and the new
-  // state is less than the current state, ignore it.
-  // Also, if abort() or finish() was called, then we should just ignore it.
-  if (mState != bs_run ||
-	  (!mIdle && state <= mRunState && !AIThreadID::in_main_thread()))
-  {
-#ifdef SHOW_ASSERT
-	// It's a bit weird if the same thread does two calls on a row where the second call
-	// has a smaller value: warn about that.
-	if (mState == bs_run && mContThread.equals_current_thread())
-	{
-	  llwarns << "Ignoring call to set_state(" << state_str(state) <<
-		  ") by non-main thread before main-thread could react on previous call, "
-		  "because new state is smaller than old state (" << state_str(mRunState) << ")." << llendl;
-	}
-#endif
-	mSetStateLock.unlock();
-	return;		// Ignore.
-  }
-
-  // Do not call idle() when set_state is called from another thread; use idle(state_type) instead.
-  llassert(!mCalledThreadUnsafeIdle || is_main_thread());
-
-  // Change mRunState to the requested value.
-  if (mRunState != state)
-  {
-	mRunState = state;
-	Dout(dc::statemachine, "mRunState set to " << state_str(mRunState));
-  }
-
-  // Continue the state machine if appropriate.
-  if (mIdle)
-	locked_cont();				// This unlocks mSetStateLock.
-  else
-	mSetStateLock.unlock();
-
-  // If we get here then mIdle is false, so only mRunState can still be changed but we won't
-  // call locked_cont() anymore. When the main thread finally picks up on the state change,
-  // it will cancel any possible callbacks from other threads and process the largest state
-  // that this function was called with in the meantime.
-}
-
-void AIStateMachine::abort(void)
-{
-  DoutEntering(dc::statemachine, "AIStateMachine::abort() [" << (void*)this << "]");
-  // It's possible that abort() is called before calling AIStateMachine::multiplex.
-  // In that case the statemachine wasn't initialized yet and we should just kill() it.
-  if (LL_UNLIKELY(mState == bs_initialize))
-  {
-	// It's ok to use the thread-unsafe idle() here, because if the statemachine
-	// wasn't started yet, then other threads won't call set_state() on it.
-	if (!mIdle)
-	  idle();
-	// run() calls locked_cont() after which the top of the mainloop adds this
-	// state machine to active_statemachines. Therefore, if the following fails
-	// then either the same statemachine called run() immediately followed by abort(),
-	// which is not allowed; or there were two active statemachines running,
-	// the first created a new statemachine and called run() on it, and then
-	// the other (before reaching the top of the mainloop) called abort() on
-	// that freshly created statemachine. Obviously, this is highly unlikely,
-	// but if that is the case then here we bump the statemachine into
-	// continued_statemachines to prevent kill() to delete this statemachine:
-	// the caller of abort() does not expect that.
-	if (LL_UNLIKELY(mActive == as_idle))
-	{
-	  mSetStateLock.lock();
-	  locked_cont();
-	  idle();
-	}
-	kill();
-  }
-  else
-  {
-	llassert(mState == bs_run);
-	mSetStateLock.lock();
-	mState = bs_abort;		// Causes additional calls to set_state to be ignored.
-	mSetStateLock.unlock();
-	abort_impl();
-	mAborted = true;
-	finish();
-  }
-}
-
-void AIStateMachine::finish(void)
-{
-  DoutEntering(dc::statemachine, "AIStateMachine::finish() [" << (void*)this << "]");
-  mSetStateLock.lock();
-  llassert(mState == bs_run || mState == bs_abort);
-  // It is possible that mIdle is true when abort or finish was called from
-  // outside multiplex_impl. However, that only may be done by the main thread.
-  llassert(!mIdle || is_main_thread());
-  if (!mIdle)
-	idle();					// After calling this, we don't want other threads to call set_state() anymore.
-  mState = bs_finish;		// Causes additional calls to set_state to be ignored.
-  mSetStateLock.unlock();
-  finish_impl();
-  // Did finish_impl call kill()? Then that is only the default. Remember it.
-  bool default_delete = (mState == bs_killed);
-  mState = bs_finish;
+  bool aborted = sub_state_type_rat(mSubState)->aborted;
   if (mParent)
   {
 	// It is possible that the parent is not running when the parent is in fact aborting and called
@@ -365,26 +878,21 @@ void AIStateMachine::finish(void)
 	// call abort again (or change it's state).
 	if (mParent->running())
 	{
-	  if (mAborted && mAbortParent)
+	  if (aborted && mAbortParent)
 	  {
 		mParent->abort();
 		mParent = NULL;
 	  }
-	  else if (!mAborted || mOnAbortSignalParent)
+	  else if (!aborted || mOnAbortSignalParent)
 	  {
-		mParent->set_state(mNewParentState);
+		mParent->advance_state(mNewParentState);
 	  }
 	}
   }
-  // After this (bool)*this evaluates to true and we can call the callback, which then is allowed to call run().
-  mState = bs_callback;
   if (mCallback)
   {
-	// This can/may call kill() that sets mState to bs_kill and in which case the whole AIStateMachine
-	// will be deleted from the mainloop, or it may call run() that sets mState is set to bs_initialize
-	// and might change or reuse mCallback or mParent.
-	mCallback->callback(!mAborted);
-	if (mState != bs_initialize)
+	mCallback->callback(!aborted);
+	if (multiplex_state_type_rat(mState)->base_state != bs_reset)
 	{
 	  delete mCallback;
 	  mCallback = NULL;
@@ -396,233 +904,376 @@ void AIStateMachine::finish(void)
 	// Not restarted by callback. Allow run() to be called later on.
 	mParent = NULL;
   }
-  // Fix the final state.
-  if (mState == bs_callback)
-	mState = default_delete ? bs_killed : bs_initialize;
-  if (mState == bs_killed && mActive == as_idle)
-  {
-	// Bump the statemachine onto the active statemachine list, or else it won't be deleted.
-	mSetStateLock.lock();
-	locked_cont();
-	idle();
-  }
 }
 
 void AIStateMachine::kill(void)
 {
   DoutEntering(dc::statemachine, "AIStateMachine::kill() [" << (void*)this << "]");
-  // Should only be called from finish() (or when not running (bs_initialize)).
-  // However, also allow multiple calls to kill() on a row (bs_killed) (which effectively don't do anything).
-  llassert(mIdle && (mState == bs_callback || mState == bs_finish || mState == bs_initialize || mState == bs_killed));
-  base_state_type prev_state = mState;
-  mState = bs_killed;
-  if (prev_state == bs_initialize && mActive == as_idle)
+#ifdef SHOW_ASSERT
   {
-	// We're not running (ie being deleted by a parent statemachine), delete it immediately.
-	delete this;
+	multiplex_state_type_rat state_r(mState);
+	// kill() may only be called from the call back function.
+	llassert(state_r->base_state == bs_callback);
+	// May only be called by the thread that is holding mMultiplexMutex.
+	llassert(mThreadId.equals_current_thread());
+  }
+#endif
+  sub_state_type_wat sub_state_w(mSubState);
+  // Void last call to run() (ie from finish_impl()), if any.
+  sub_state_w->reset = false;
+}
+
+void AIStateMachine::reset()
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::reset() [" << (void*)this << "]");
+#ifdef SHOW_ASSERT
+  mDebugAborted = false;
+  mDebugContPending = false;
+  mDebugSetStatePending = false;
+  mDebugRefCalled = false;
+#endif
+  mRuntime = 0;
+  bool inside_multiplex;
+  {
+	multiplex_state_type_rat state_r(mState);
+	// reset() is only called from run(), which may only be called when just created, from finish_impl() or from the call back function.
+	llassert(state_r->base_state == bs_reset || state_r->base_state == bs_finish || state_r->base_state == bs_callback);
+	inside_multiplex = state_r->base_state != bs_reset;
+  }
+  {
+	sub_state_type_wat sub_state_w(mSubState);
+	// Reset.
+	sub_state_w->aborted = sub_state_w->finished = false;
+	// Signal that we want to start running from the beginning.
+	sub_state_w->reset = true;
+	// Start running.
+	sub_state_w->idle = false;
+	// Keep running till we reach at least bs_multiplex.
+	sub_state_w->need_run = true;
+  }
+  if (!inside_multiplex)
+  {
+	// Kick start the state machine.
+	multiplex(initial_run);
   }
 }
 
-// Return stringified 'state'.
-char const* AIStateMachine::state_str(state_type state)
+void AIStateMachine::set_state(state_type new_state)
 {
-  if (state >= min_state && state < max_state)
+  DoutEntering(dc::statemachine, "AIStateMachine::set_state(" << state_str_impl(new_state) << ") [" << (void*)this << "]");
+#ifdef SHOW_ASSERT
   {
-	switch (state)
+	multiplex_state_type_rat state_r(mState);
+	// set_state() may only be called from initialize_impl() or multiplex_impl().
+	llassert(state_r->base_state == bs_initialize || state_r->base_state == bs_multiplex);
+	// May only be called by the thread that is holding mMultiplexMutex. If this fails, you probably called set_state() by accident instead of advance_state().
+	llassert(mThreadId.equals_current_thread());
+  }
+#endif
+  sub_state_type_wat sub_state_w(mSubState);
+  // Force current state to the requested state.
+  sub_state_w->run_state = new_state;
+  // Void last call to advance_state.
+  sub_state_w->advance_state = 0;
+  // Void last call to idle(), if any.
+  sub_state_w->idle = false;
+  // Honor a subsequent call to idle().
+  sub_state_w->skip_idle = false;
+#ifdef SHOW_ASSERT
+  // We should run. This can only be cancelled by a call to idle().
+  mDebugSetStatePending = true;
+#endif
+}
+
+void AIStateMachine::advance_state(state_type new_state)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::advance_state(" << state_str_impl(new_state) << ") [" << (void*)this << "]");
+  {
+	sub_state_type_wat sub_state_w(mSubState);
+	// Ignore call to advance_state when the currently queued state is already greater or equal to the requested state.
+	if (sub_state_w->advance_state >= new_state)
 	{
-	  AI_CASE_RETURN(bs_initialize);
-	  AI_CASE_RETURN(bs_run);
-	  AI_CASE_RETURN(bs_abort);
-	  AI_CASE_RETURN(bs_finish);
-	  AI_CASE_RETURN(bs_callback);
-	  AI_CASE_RETURN(bs_killed);
+	  Dout(dc::statemachine, "Ignored, because " << state_str_impl(sub_state_w->advance_state) << " >= " << state_str_impl(new_state) << ".");
+	  return;
+	}
+	// Increment state.
+	sub_state_w->advance_state = new_state;
+	// Void last call to idle(), if any.
+	sub_state_w->idle = false;
+	// Ignore a call to idle if it occurs before we leave multiplex_impl().
+	sub_state_w->skip_idle = true;
+	// Mark that a re-entry of multiplex() is necessary.
+	sub_state_w->need_run = true;
+#ifdef SHOW_ASSERT
+	// From this moment on.
+	mDebugAdvanceStatePending = true;
+#endif
+  }
+  if (!mMultiplexMutex.isSelfLocked())
+  {
+	multiplex(schedule_run);
+  }
+}
+
+void AIStateMachine::idle(void)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::idle() [" << (void*)this << "]");
+#ifdef SHOW_ASSERT
+  {
+	multiplex_state_type_rat state_r(mState);
+	// idle() may only be called from initialize_impl() or multiplex_impl().
+	llassert(state_r->base_state == bs_multiplex || state_r->base_state == bs_initialize);
+	// May only be called by the thread that is holding mMultiplexMutex.
+	llassert(mThreadId.equals_current_thread());
+  }
+  // idle() following set_state() cancels the reason to run because of the call to set_state.
+  mDebugSetStatePending = false;
+#endif
+  sub_state_type_wat sub_state_w(mSubState);
+  // As idle may only be called from within the state machine, it should never happen that the state machine is already idle.
+  llassert(!sub_state_w->idle);
+  // Ignore call to idle() when advance_state() was called since last call to set_state().
+  if (sub_state_w->skip_idle)
+  {
+	Dout(dc::statemachine, "Ignored, because skip_idle is true (advance_state() was called last).");
+	return;
+  }
+  // Mark that we are idle.
+  sub_state_w->idle = true;
+  // Not sleeping (anymore).
+  mSleep = 0;
+}
+
+void AIStateMachine::cont(void)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::cont() [" << (void*)this << "]");
+  {
+	sub_state_type_wat sub_state_w(mSubState);
+	// Void last call to idle(), if any.
+	sub_state_w->idle = false;
+	// Mark that a re-entry of multiplex() is necessary.
+	sub_state_w->need_run = true;
+#ifdef SHOW_ASSERT
+	// From this moment.
+	mDebugContPending = true;
+#endif
+  }
+  if (!mMultiplexMutex.isSelfLocked())
+  {
+	multiplex(schedule_run);
+  }
+}
+
+void AIStateMachine::abort(void)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::abort() [" << (void*)this << "]");
+  bool is_waiting = false;
+  {
+	multiplex_state_type_rat state_r(mState);
+	sub_state_type_wat sub_state_w(mSubState);
+	// Mark that we are aborted, iff we didn't already finish.
+	sub_state_w->aborted = !sub_state_w->finished;
+	// Mark that a re-entry of multiplex() is necessary.
+	sub_state_w->need_run = true;
+	// Schedule a new run when this state machine is waiting.
+	is_waiting = state_r->base_state == bs_multiplex && sub_state_w->idle;
+  }
+  if (is_waiting && !mMultiplexMutex.isSelfLocked())
+  {
+	multiplex(insert_abort);
+  }
+  // Block until the current run finished.
+  if (!mRunMutex.tryLock())
+  {
+	llwarns << "AIStateMachine::abort() blocks because the statemachine is still executing code in another thread." << llendl;
+	mRunMutex.lock();
+  }
+  mRunMutex.unlock();
+#ifdef SHOW_ASSERT
+  // When abort() returns, it may never run again.
+  mDebugAborted = true;
+#endif
+}
+
+void AIStateMachine::finish(void)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::finish() [" << (void*)this << "]");
+#ifdef SHOW_ASSERT
+  {
+	multiplex_state_type_rat state_r(mState);
+	// finish() may only be called from multiplex_impl().
+	llassert(state_r->base_state == bs_multiplex);
+	// May only be called by the thread that is holding mMultiplexMutex.
+	llassert(mThreadId.equals_current_thread());
+  }
+#endif
+  sub_state_type_wat sub_state_w(mSubState);
+  // finish() should not be called when idle.
+  llassert(!sub_state_w->idle);
+  // Mark that we are finished.
+  sub_state_w->finished = true;
+}
+
+void AIStateMachine::yield(void)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::yield() [" << (void*)this << "]");
+  multiplex_state_type_rat state_r(mState);
+  // yield() may only be called from multiplex_impl().
+  llassert(state_r->base_state == bs_multiplex);
+  // May only be called by the thread that is holding mMultiplexMutex.
+  llassert(mThreadId.equals_current_thread());
+  // Set mYieldEngine to the best non-NUL value.
+  mYieldEngine = state_r->current_engine ? state_r->current_engine : (mDefaultEngine ? mDefaultEngine : &gStateMachineThreadEngine);
+}
+
+void AIStateMachine::yield(AIEngine* engine)
+{
+  llassert(engine);
+  DoutEntering(dc::statemachine, "AIStateMachine::yield(" << engine->name() << ") [" << (void*)this << "]");
+#ifdef SHOW_ASSERT
+  {
+	multiplex_state_type_rat state_r(mState);
+	// yield() may only be called from multiplex_impl().
+	llassert(state_r->base_state == bs_multiplex);
+	// May only be called by the thread that is holding mMultiplexMutex.
+	llassert(mThreadId.equals_current_thread());
+  }
+#endif
+  mYieldEngine = engine;
+}
+
+void AIStateMachine::yield_frame(unsigned int frames)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::yield_frame(" << frames << ") [" << (void*)this << "]");
+  mSleep = -(S64)frames;
+  // Sleeping is always done from the main thread.
+  yield(&gMainThreadEngine);
+}
+
+void AIStateMachine::yield_ms(unsigned int ms)
+{
+  DoutEntering(dc::statemachine, "AIStateMachine::yield_ms(" << ms << ") [" << (void*)this << "]");
+  mSleep = get_clock_count() + calc_clock_frequency() * ms / 1000;
+  // Sleeping is always done from the main thread.
+  yield(&gMainThreadEngine);
+}
+
+char const* AIStateMachine::state_str(base_state_type state)
+{
+  switch(state)
+  {
+	AI_CASE_RETURN(bs_reset);
+	AI_CASE_RETURN(bs_initialize);
+	AI_CASE_RETURN(bs_multiplex);
+	AI_CASE_RETURN(bs_abort);
+	AI_CASE_RETURN(bs_finish);
+	AI_CASE_RETURN(bs_callback);
+	AI_CASE_RETURN(bs_killed);
+  }
+  llassert(false);
+  return "UNKNOWN BASE STATE";
+}
+
+AIEngine gMainThreadEngine("gMainThreadEngine");
+AIEngine gStateMachineThreadEngine("gStateMachineThreadEngine");
+
+// State Machine Thread main loop.
+void AIEngine::threadloop(void)
+{
+  queued_type::iterator queued_element, end;
+  {
+	engine_state_type_wat engine_state_w(mEngineState);
+	end = engine_state_w->list.end();
+	queued_element = engine_state_w->list.begin();
+	if (queued_element == end)
+	{
+	  // Nothing to do. Wait till something is added to the queue again.
+	  engine_state_w->waiting = true;
+	  engine_state_w.wait();
+	  engine_state_w->waiting = false;
+	  return;
 	}
   }
-  return state_str_impl(state);
-}
-
-//----------------------------------------------------------------------------
-//
-// Private methods
-//
-
-void AIStateMachine::multiplex(U64 current_time)
-{
-  // Return immediately when this state machine is sleeping.
-  // A negative value of mSleep means we're counting frames,
-  // a positive value means we're waiting till a certain
-  // amount of time has passed.
-  if (mSleep != 0)
+  do
   {
-	if (mSleep < 0)
+	AIStateMachine& state_machine(queued_element->statemachine());
+	state_machine.multiplex(AIStateMachine::normal_run);
+	bool active = state_machine.active(this);		// This locks mState shortly, so it must be called before locking mEngineState because add() locks mEngineState while holding mState.
+	engine_state_type_wat engine_state_w(mEngineState);
+	if (!active)
 	{
-	  if (++mSleep)
-		return;
+	  Dout(dc::statemachine, "Erasing state machine [" << (void*)&state_machine << "] from " << mName);
+	  engine_state_w->list.erase(queued_element++);
 	}
 	else
 	{
-	  if (current_time < (U64)mSleep)
-		return;
-	  mSleep = 0;
+	  ++queued_element;
 	}
   }
-
-  DoutEntering(dc::statemachine, "AIStateMachine::multiplex() [" << (void*)this << "] [with state: " << state_str(mState == bs_run ? mRunState : mState) << "]");
-  llassert(mState == bs_initialize || mState == bs_run);
-
-  // Real state machine starts here.
-  if (mState == bs_initialize)
-  {
-	mAborted = false;
-	mState = bs_run;
-	initialize_impl();
-	if (mAborted || mState != bs_run)
-	  return;
-  }
-  multiplex_impl();
+  while (queued_element != end);
 }
 
+void AIEngine::wake_up(void)
+{
+  engine_state_type_wat engine_state_w(mEngineState);
+  if (engine_state_w->waiting)
+  {
+	engine_state_w.signal();
+  }
+}
+
+//-----------------------------------------------------------------------------
+// AIEngineThread
+
+class AIEngineThread : public LLThread
+{
+  public:
+	static AIEngineThread* sInstance;
+	bool volatile mRunning;
+
+  public:
+    // MAIN-THREAD
+    AIEngineThread(void);
+    virtual ~AIEngineThread();
+
+  protected:
+	virtual void run(void);
+};
+
 //static
-void AIStateMachine::add_continued_statemachines(AIReadAccess<csme_type>& csme_r)
+AIEngineThread* AIEngineThread::sInstance;
+
+AIEngineThread::AIEngineThread(void) : LLThread("AIEngineThread"), mRunning(true)
 {
-  bool nonempty = false;
-  for (continued_statemachines_type::const_iterator iter = csme_r->continued_statemachines.begin(); iter != csme_r->continued_statemachines.end(); ++iter)
-  {
-	nonempty = true;
-	active_statemachines.push_back(QueueElement(*iter));
-	Dout(dc::statemachine, "Adding " << (void*)*iter << " to active_statemachines");
-	(*iter)->mActive = as_active;
-  }
-  if (nonempty)
-	AIWriteAccess<csme_type>(csme_r)->continued_statemachines.clear();
 }
 
-// static
-void AIStateMachine::dowork(void)
+AIEngineThread::~AIEngineThread(void)
 {
-  llassert(!active_statemachines.empty());
-  // Run one or more state machines.
-  U64 total_clocks = 0;
-  for (active_statemachines_type::iterator iter = active_statemachines.begin(); iter != active_statemachines.end(); ++iter)
+}
+
+void AIEngineThread::run(void)
+{
+  while(mRunning)
   {
-	AIStateMachine& statemachine(iter->statemachine());
-	if (!statemachine.mIdle)
-	{
-	  U64 start = get_clock_count();
-	  // This might call idle() and then pass the statemachine to another thread who then may call cont().
-	  // Hence, after this isn't not sure what mIdle is, and it can change from true to false at any moment,
-	  // if it is true after this function returns.
-	  statemachine.multiplex(start);
-	  U64 delta = get_clock_count() - start;
-	  iter->add(delta);
-	  total_clocks += delta;
-	  if (total_clocks >= sMaxCount)
-	  {
-#ifndef LL_RELEASE_FOR_DOWNLOAD
-		llwarns << "AIStateMachine::mainloop did run for " << (total_clocks * 1000 / calc_clock_frequency()) << " ms." << llendl;
-#endif
-		std::sort(active_statemachines.begin(), active_statemachines.end(), QueueElementComp());
-		break;
-	  }
-	}
-  }
-  // Remove idle state machines from the loop.
-  active_statemachines_type::iterator iter = active_statemachines.begin();
-  while (iter != active_statemachines.end())
-  {
-	AIStateMachine& statemachine(iter->statemachine());
-	// Atomic test mIdle and change mActive.
-	bool locked = statemachine.mIdleActive.tryLock();
-	// If the lock failed, then another thread is in the middle of calling cont(),
-	// thus mIdle will end up false. So, there is no reason to block here; just
-	// treat mIdle as false already.
-	if (locked && statemachine.mIdle)
-	{
-	  // Without the lock, it would be possible that another thread called cont() right here,
-	  // changing mIdle to false again but NOT adding the statemachine to continued_statemachines,
-	  // thinking it is in active_statemachines (and it is), while immediately below it is
-	  // erased from active_statemachines.
-	  statemachine.mActive = as_idle;
-	  // Now, calling cont() is ok -- as that will cause the statemachine to be added to
-	  // continued_statemachines, so it's fine in that case-- even necessary-- to remove it from
-	  // active_statemachines regardless, and we can release the lock here.
-	  statemachine.mIdleActive.unlock();
-	  Dout(dc::statemachine, "Erasing " << (void*)&statemachine << " from active_statemachines");
-	  iter = active_statemachines.erase(iter);
-	  if (statemachine.mState == bs_killed)
-	  {
-	  	Dout(dc::statemachine, "Deleting " << (void*)&statemachine);
-		delete &statemachine;
-	  }
-	}
-	else
-	{
-	  if (locked)
-	  {
-		statemachine.mIdleActive.unlock();
-	  }
-	  llassert(statemachine.mActive == as_active);	// It should not be possible that another thread called cont() and changed this when we are we are not idle.
-	  llassert(statemachine.mState == bs_run || statemachine.mState == bs_initialize);
-	  ++iter;
-	}
-  }
-  if (active_statemachines.empty())
-  {
-	// If this was the last state machine, remove mainloop from the IdleCallbacks.
-	AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled, true);
-	if (csme_r->continued_statemachines.empty() && csme_r->mainloop_enabled)
-	{
-	  Dout(dc::statemachine, "Deactivating AIStateMachine::mainloop: no active state machines left.");
-	  AIWriteAccess<csme_type>(csme_r)->mainloop_enabled = false;
-	}
+	gStateMachineThreadEngine.threadloop();
   }
 }
 
-// static
-void AIStateMachine::flush(void)
+void startEngineThread(void)
 {
-  DoutEntering(dc::curl, "AIStateMachine::flush(void)");
-  {
-	AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled);
-	add_continued_statemachines(csme_r);
-  }
-  // Abort all state machines.
-  for (active_statemachines_type::iterator iter = active_statemachines.begin(); iter != active_statemachines.end(); ++iter)
-  {
-	AIStateMachine& statemachine(iter->statemachine());
-	if (statemachine.abortable())
-	{
-	  // We can't safely call abort() here for non-running (run() was called, but they weren't initialized yet) statemachines,
-	  // because that might call kill() which in some cases is undesirable (ie, when it is owned by a partent that will
-	  // also call abort() on it when it is aborted itself).
-	  if (statemachine.running())
-		statemachine.abort();
-	  else
-		statemachine.idle();		// Stop the statemachine from starting, in the next loop with batch == 0.
-	}
-  }
-  for (int batch = 0;; ++batch)
-  {
-	// Run mainloop until all state machines are idle (batch == 0) or deleted (batch == 1).
-	for(;;)
-	{
-	  {
-		AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled);
-		if (!csme_r->mainloop_enabled)
-		  break;
-	  }
-	  mainloop();
-	}
-	if (batch == 1)
-	  break;
-	{
-	  AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled);
-	  add_continued_statemachines(csme_r);
-	}
-  }
-  // At this point all statemachines should be idle.
-  AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled);
-  llinfos << "Current number of continued statemachines: " << csme_r->continued_statemachines.size() << llendl;
-  llinfos << "Current number of active statemachines: " << active_statemachines.size() << llendl;
-  llassert(csme_r->continued_statemachines.empty() && active_statemachines.empty());
+  AIEngineThread::sInstance = new AIEngineThread;
+  AIEngineThread::sInstance->start();
 }
+
+void stopEngineThread(void)
+{
+  AIEngineThread::sInstance->mRunning = false;
+  gStateMachineThreadEngine.wake_up();
+  int count = 401;
+  while(--count && !AIEngineThread::sInstance->isStopped())
+  {
+	ms_sleep(10);
+  }
+  llinfos << "State machine thread" << (!AIEngineThread::sInstance->isStopped() ? " not" : "") << " stopped after " << ((400 - count) * 10) << "ms." << llendl;
+}
+
diff --git a/indra/aistatemachine/aistatemachine.h b/indra/aistatemachine/aistatemachine.h
index 9048260ba..dffd93c7d 100644
--- a/indra/aistatemachine/aistatemachine.h
+++ b/indra/aistatemachine/aistatemachine.h
@@ -2,7 +2,7 @@
  * @file aistatemachine.h
  * @brief State machine base class
  *
- * Copyright (c) 2010, Aleric Inglewood.
+ * Copyright (c) 2010 - 2013, Aleric Inglewood.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,397 +26,290 @@
  *
  *   01/03/2010
  *   Initial version, written by Aleric Inglewood @ SL
+ *
+ *   28/02/2013
+ *   Rewritten from scratch to fully support threading.
  */
 
 #ifndef AISTATEMACHINE_H
 #define AISTATEMACHINE_H
 
 #include "aithreadsafe.h"
-#include "lltimer.h"
+#include <llpointer.h>
+#include <list>
 #include <boost/signals2.hpp>
 
-//!
-// A AIStateMachine is a base class that allows derived classes to
-// go through asynchronous states, while the code still appears to
-// be more or less sequential.
-//
-// These state machine objects can be reused to build more complex
-// objects.
-//
-// It is important to note that each state has a duality: the object
-// can have a state that will cause a corresponding function to be
-// called; and often that function will end with changing the state
-// again, to signal that it was handled. It is easy to confuse the
-// function of a state with the state at the end of the function.
-// For example, the state "initialize" could cause the member
-// function 'init()' to be called, and at the end one would be
-// inclined to set the state to "initialized". However, this is the
-// wrong approach: the correct use of state names does reflect the
-// functions that will be called, never the function that just was
-// called.
-//
-// Each (derived) class goes through a series of states as follows:
-//
-//   Creation
-//       |
-//       v
-//     (idle) <----.   		Idle until run() is called.
-//       |         |
-//   Initialize    |		Calls initialize_impl().
-//       |         |
-//       | (idle)  |		Idle until cont() or set_state() is called.
-//       |  |  ^   |
-//       v  v  |   |
-//   .-------. |   |
-//   |  Run  |_,   |		Call multiplex_impl() until idle(), abort() or finish() is called.
-//   '-------'     |
-//    |    |       |
-//    v    |       |
-//  Abort  |       |		Calls abort_impl().
-//    |    |       |
-//    v    v       |
-//    Finish       |		Calls finish_impl() (which may call kill()) or
-//    |    |       |		the callback function passed to run(), if any,
-//    |    v       |
-//    | Callback   |        which may call kill() and/or run().
-//    |  |   |     |
-//    |  |   `-----'
-//    v  v
-//  Killed					Delete the statemachine (all statemachines must be allocated with new).
-//
-// Each state causes corresponding code to be called.
-// Finish cleans up whatever is done by Initialize.
-// Abort should clean up additional things done by Run.
-//
-// The Run state is entered by calling run().
-//
-// While the base class is in the Run state, it is the derived class
-// that goes through different states. The state variable of the derived
-// class is only valid while the base class is in the state Run.
-//
-// A derived class can exit the Run state by calling one of two methods:
-// abort() in case of failure, or finish() in case of success.
-// Respectively these set the state to Abort and Finish.
-//
-// finish_impl may call kill() for a (default) destruction upon finish.
-// Even in that case the callback (passed to run()) may call run() again,
-// which overrides the request for a default kill. Or, if finish_impl
-// doesn't call kill() the callback may call kill() to request the
-// destruction of the state machine object.
-//
-// State machines are run from the "idle" part of the viewer main loop.
-// Often a state machine has nothing to do however. In that case it can
-// call the method idle(). This will stop the state machine until
-// external code changes it's state (by calling set_state()), or calls
-// cont() to continue with the last state.
-//
-// The methods of the derived class call set_state() to change their
-// own state within the bs_run state, or by calling either abort()
-// or finish().
-//
-// Restarting a finished state machine can also be done by calling run(),
-// which will cause a re-initialize.
-//
-// Derived classes should implement the following constants:
-//
-//   static state_type const min_state = first_state;
-//   static state_type const max_state = last_state + 1;
-//
-// Where first_state should be equal to BaseClass::max_state.
-// These should represent the minimum and (one past) the maximum
-// values of mRunState.
-//
-//   virtual void initialize_impl(void)
-//
-// Initializes the derived class.
-//
-//   virtual void multiplex_impl(void);
-//
-// This method should handle mRunState in a switch.
-// For example:
-//
-//   switch(mRunState)
-//   {
-//     case foo:
-//       handle_foo();
-//       break;
-//     case wait_state:
-//       if (still_waiting())
-//       {
-//         idle();
-//         break;
-//       }
-//       set_state(working);
-//       /*fall-through*/
-//     case working:
-//       do_work();
-//       if (failure())
-//         abort();
-//       break;
-//     case etc:
-//       etc();
-//       finish();
-//       break;
-//   }
-//
-// virtual void abort_impl(void);
-//
-//   A call to this method should bring the object to a state
-//   where finish_impl() can be called.
-//
-// virtual void finish_impl(void);
-//
-//   Should cleanup whatever init_impl() did, or any of the
-//   states of the object where multiplex_impl() calls finish().
-//   Call kill() from here to make that the default behavior
-//   (state machine is deleted unless the callback calls run()).
-//
-// virtual char const* state_str_impl(state_type run_state);
-//
-//   Should return a stringified value of run_state.
-//
-class AIStateMachine {
-	//! The type of mState
+class AIStateMachine;
+
+class AIEngine
+{
+  private:
+	struct QueueElementComp;
+	class QueueElement {
+	  private:
+		LLPointer<AIStateMachine> mStateMachine;
+
+	  public:
+		QueueElement(AIStateMachine* statemachine) : mStateMachine(statemachine) { }
+		friend bool operator==(QueueElement const& e1, QueueElement const& e2) { return e1.mStateMachine == e2.mStateMachine; }
+		friend bool operator!=(QueueElement const& e1, QueueElement const& e2) { return e1.mStateMachine != e2.mStateMachine; }
+		friend struct QueueElementComp;
+
+		AIStateMachine const& statemachine(void) const { return *mStateMachine; }
+		AIStateMachine& statemachine(void) { return *mStateMachine; }
+	};
+	struct QueueElementComp {
+	  inline bool operator()(QueueElement const& e1, QueueElement const& e2) const;
+	};
+
+  public:
+	typedef std::list<QueueElement> queued_type;
+	struct engine_state_type {
+	  queued_type list;
+	  bool waiting;
+	  engine_state_type(void) : waiting(false) { }
+	};
+
+  private:
+	AIThreadSafeSimpleDC<engine_state_type, LLCondition>	mEngineState;
+	typedef AIAccessConst<engine_state_type, LLCondition>	engine_state_type_crat;
+	typedef AIAccess<engine_state_type, LLCondition>		engine_state_type_rat;
+	typedef AIAccess<engine_state_type, LLCondition>		engine_state_type_wat;
+	char const* mName;
+
+	static U64 sMaxCount;
+
+  public:
+	AIEngine(char const* name) : mName(name) { }
+
+	void add(AIStateMachine* state_machine);
+
+	void mainloop(void);
+	void threadloop(void);
+	void wake_up(void);
+	void flush(void);
+
+	char const* name(void) const { return mName; }
+
+	static void setMaxCount(F32 StateMachineMaxTime);
+};
+
+extern AIEngine gMainThreadEngine;
+extern AIEngine gStateMachineThreadEngine;
+
+class AIStateMachine : public LLThreadSafeRefCount
+{
+  public:
+	typedef U32 state_type;		//!< The type of run_state
+
+  protected:
+	// The type of event that causes multiplex() to be called.
+	enum event_type {
+	  initial_run,
+	  schedule_run,
+	  normal_run,
+	  insert_abort
+	};
+	// The type of mState
 	enum base_state_type {
-	  bs_initialize,
-	  bs_run,
+	  bs_reset,				// Idle state before run() is called. Reference count is zero (except for a possible external LLPointer).
+	  bs_initialize,		// State after run() and before/during initialize_impl().
+	  bs_multiplex,			// State after initialize_impl() before finish() or abort().
 	  bs_abort,
 	  bs_finish,
 	  bs_callback,
 	  bs_killed
 	};
-	//! The type of mActive
-	enum active_type {
-	  as_idle,					// State machine is on neither list.
-	  as_queued,				// State machine is on continued_statemachines list.
-	  as_active					// State machine is on active_statemachines list.
-	};
-
-	//! Type of continued_statemachines.
-	typedef std::vector<AIStateMachine*> continued_statemachines_type;
-	//! Type of sContinuedStateMachinesAndMainloopEnabled.
-	struct csme_type
-	{
-	  continued_statemachines_type continued_statemachines;
-	  bool mainloop_enabled;
-	};
-
   public:
-	typedef U32 state_type;		//!< The type of mRunState
-
-	//! Integral value equal to the state with the lowest value.
-	static state_type const min_state = bs_initialize;
-	//! Integral value one more than the state with the highest value.
 	static state_type const max_state = bs_killed + 1;
 
+  protected:
+	struct multiplex_state_type {
+	  base_state_type	base_state;
+	  AIEngine*			current_engine;			// Current engine.
+	  multiplex_state_type(void) : base_state(bs_reset), current_engine(NULL) { }
+	};
+	struct sub_state_type {
+	  state_type		run_state;
+	  state_type		advance_state;
+	  bool				reset;
+	  bool				need_run;
+	  bool				idle;
+	  bool				skip_idle;
+	  bool				aborted;
+	  bool				finished;
+	};
+
   private:
-	base_state_type mState;						//!< State of the base class.
-	bool mIdle;									//!< True if this state machine is not running.
-	bool mAborted;								//!< True after calling abort() and before calling run().
-	active_type mActive;						//!< Whether statemachine is idle, queued to be added to the active list, or already on the active list.
-	S64 mSleep;									//!< Non-zero while the state machine is sleeping.
-	LLMutex mIdleActive;						//!< Used for atomic operations on the pair mIdle / mActive.
-#ifdef SHOW_ASSERT
-	AIThreadID mContThread;						//!< Thread that last called locked_cont().
-	bool mCalledThreadUnsafeIdle;				//!< Set to true when idle() is called.
-#endif
+	// Base state.
+	AIThreadSafeSimpleDC<multiplex_state_type>	mState;
+	typedef AIAccessConst<multiplex_state_type>	multiplex_state_type_crat;
+	typedef AIAccess<multiplex_state_type>		multiplex_state_type_rat;
+	typedef AIAccess<multiplex_state_type>		multiplex_state_type_wat;
+	// Sub state.
+	AIThreadSafeSimpleDC<sub_state_type>	mSubState;
+	typedef AIAccessConst<sub_state_type>	sub_state_type_crat;
+	typedef AIAccess<sub_state_type>		sub_state_type_rat;
+	typedef AIAccess<sub_state_type>		sub_state_type_wat;
+
+	// Mutex protecting everything below and making sure only one thread runs the state machine at a time.
+	LLMutex mMultiplexMutex;
+	// Mutex that is locked while calling *_impl() functions and the call back.
+	LLMutex mRunMutex;
+
+	S64 mSleep;                                 //!< Non-zero while the state machine is sleeping.
 
 	// Callback facilities.
 	// From within an other state machine:
-	AIStateMachine* mParent;					//!< The parent object that started this state machine, or NULL if there isn't any.
-	state_type mNewParentState;					//!< The state at which the parent should continue upon a successful finish.
-	bool mAbortParent;							//!< If true, abort parent on abort(). Otherwise continue as normal.
-	bool mOnAbortSignalParent;					//!< If true and mAbortParent is false, change state of parent even on abort.
+	LLPointer<AIStateMachine> mParent;			// The parent object that started this state machine, or NULL if there isn't any.
+	state_type mNewParentState;					// The state at which the parent should continue upon a successful finish.
+	bool mAbortParent;							// If true, abort parent on abort(). Otherwise continue as normal.
+	bool mOnAbortSignalParent;					// If true and mAbortParent is false, change state of parent even on abort.
 	// From outside a state machine:
 	struct callback_type {
-	  typedef boost::signals2::signal<void (bool)> signal_type;
-	  callback_type(signal_type::slot_type const& slot) { connection = signal.connect(slot); }
-	  ~callback_type() { connection.disconnect(); }
-	  void callback(bool success) const { signal(success); }
-	private:
-	  boost::signals2::connection connection;
-	  signal_type signal;
+		typedef boost::signals2::signal<void (bool)> signal_type;
+		callback_type(signal_type::slot_type const& slot) { connection = signal.connect(slot); }
+		~callback_type() { connection.disconnect(); }
+		void callback(bool success) const { signal(success); }
+	  private:
+		boost::signals2::connection connection;
+		signal_type signal;
 	};
-	callback_type* mCallback;					//!< Pointer to signal/connection, or NULL when not connected.
+	callback_type* mCallback;					// Pointer to signal/connection, or NULL when not connected.
 
-	static U64 sMaxCount;						//!< Number of cpu clocks below which we start a new state machine within the same frame.
-	static AIThreadSafeDC<csme_type> sContinuedStateMachinesAndMainloopEnabled;	//!< Read/write locked variable pair.
+	// Engine stuff.
+	AIEngine* mDefaultEngine;					// Default engine.
+	AIEngine* mYieldEngine;						// Requested engine.
 
-  protected:
-	LLMutex mSetStateLock;						//!< For critical areas in set_state() and locked_cont().
-
-	//! State of the derived class. Only valid if mState == bs_run. Call set_state to change.
-	volatile state_type mRunState;
-
-  public:
-	//! Create a non-running state machine.
-	AIStateMachine(void) : mState(bs_initialize), mIdle(true), mAborted(true), mActive(as_idle), mSleep(0), mParent(NULL), mCallback(NULL)
 #ifdef SHOW_ASSERT
-		, mContThread(AIThreadID::none), mCalledThreadUnsafeIdle(false)
+	// Debug stuff.
+	AIThreadID mThreadId;						// The thread currently running multiplex().
+	base_state_type mDebugLastState;			// The previous state that multiplex() had a normal run with.
+	bool mDebugShouldRun;						// Set if we found evidence that we should indeed call multiplex_impl().
+	bool mDebugAborted;							// True when abort() was called.
+	bool mDebugContPending;						// True while cont() was called by not handled yet.
+	bool mDebugSetStatePending;					// True while set_state() was called by not handled yet.
+	bool mDebugAdvanceStatePending;				// True while advance_state() was called by not handled yet.
+	bool mDebugRefCalled;						// True when ref() is called (or will be called within the critial area of mMultiplexMutex).
 #endif
-		{ }
+	U64 mRuntime;								// Total time spent running in the main thread (in clocks).
+
+  public:
+	AIStateMachine(void) : mCallback(NULL), mDefaultEngine(NULL), mYieldEngine(NULL),
+#ifdef SHOW_ASSERT
+		mThreadId(AIThreadID::none), mDebugLastState(bs_killed), mDebugShouldRun(false), mDebugAborted(false), mDebugContPending(false),
+		mDebugSetStatePending(false), mDebugAdvanceStatePending(false), mDebugRefCalled(false),
+#endif
+		mRuntime(0)
+	{ }
 
   protected:
-	//! The user should call 'kill()', not delete a AIStateMachine (derived) directly.
-	virtual ~AIStateMachine() { llassert((mState == bs_killed && mActive == as_idle) || mState == bs_initialize); }
-
+	// The user should call finish() (or abort(), or kill() from the call back when finish_impl() calls run()), not delete a class derived from AIStateMachine directly.
+	virtual ~AIStateMachine() { llassert(multiplex_state_type_rat(mState)->base_state == bs_killed); }
+ 
   public:
-	//! Halt the state machine until cont() is called (not thread-safe).
-	void idle(void);
+	// These functions may be called directly after creation, or from within finish_impl(), or from the call back function.
+	void run(LLPointer<AIStateMachine> parent, state_type new_parent_state, bool abort_parent = true, bool on_abort_signal_parent = true, AIEngine* default_engine = &gMainThreadEngine);
+	void run(callback_type::signal_type::slot_type const& slot, AIEngine* default_engine = &gMainThreadEngine);
+	void run(void) { run(NULL, 0, false, true, mDefaultEngine); }
 
-	//! Halt the state machine until cont() is called, provided it is still in 'current_run_state'.
-	void idle(state_type current_run_state);
-
-	//! Temporarily halt the state machine.
-	void yield_frame(unsigned int frames) { mSleep = -(S64)frames; }
-
-	//! Temporarily halt the state machine.
-	void yield_ms(unsigned int ms) { mSleep = get_clock_count() + calc_clock_frequency() * ms / 1000; }
-
-	//! Continue running after calling idle.
-	void cont(void)
-	{
-		mSetStateLock.lock();
-		// Ignore calls to cont() if the statemachine isn't idle. See comments in set_state().
-		// Calling cont() twice or after calling set_state(), without first calling idle(), is an error.
-		if (mState != bs_run || !mIdle) { llassert(mState != bs_run || !mContThread.equals_current_thread()); mSetStateLock.unlock(); return; }
-		locked_cont();
-	}
-  private:
-	void locked_cont(void);
-
-  public:
-	//---------------------------------------
-	// Changing the state.
-
-	//! Change state to <code>bs_run</code>. May only be called after creation or after returning from finish().
-	// If <code>parent</code> is non-NULL, change the parent state machine's state to <code>new_parent_state</code>
-	// upon finish, or in the case of an abort and when <code>abort_parent</code> is true, call parent->abort() instead.
-	void run(AIStateMachine* parent, state_type new_parent_state, bool abort_parent = true, bool on_abort_signal_parent = true);
-
-	//! Change state to 'bs_run'. May only be called after creation or after returning from finish().
-	// Does not cause a callback.
-	void run(void) { run(NULL, 0, false); }
-
-	//! The same as above, but pass the result of a boost::bind with _1.
-	//
-	// Here _1, if present, will be replaced with a bool indicating success.
-	//
-	// For example:
-	//
-	// <code>
-	// struct Foo { void callback(AIStateMachineDerived* ptr, bool success); };
-	// ...
-	//   AIStateMachineDerived* magic = new AIStateMachineDerived; // Deleted by callback
-	//   // Call foo_ptr->callback(magic, _1) on finish.
-	//   state_machine->run(boost::bind(&Foo::callback, foo_ptr, magic, _1));
-	// </code>
-	//
-	// or
-	//
-	// <code>
-	// struct Foo { void callback(bool success, AIStateMachineDerived const& magic); };
-	// ...
-	//   AIStateMachineDerived magic;
-	//   // Call foo_ptr->callback(_1, magic) on finish.
-	//   magic.run(boost::bind(&Foo::callback, foo_ptr, _1, magic));
-	// </code>
-	//
-	// or
-	//
-	// <code>
-	// static void callback(void* userdata);
-	// ...
-	//   AIStateMachineDerived magic;
-	//   // Call callback(userdata) on finish.
-	//   magic.run(boost::bind(&callback, userdata));
-	// </code>
-	void run(callback_type::signal_type::slot_type const& slot);
-
-	//! Change state to 'bs_abort'. May only be called while in the bs_run state.
-	void abort(void);
-
-	//! Change state to 'bs_finish'. May only be called while in the bs_run state.
-	void finish(void);
-
-	//! Refine state while in the bs_run state. May only be called while in the bs_run state.
-	void set_state(state_type run_state);
-
-	//! Change state to 'bs_killed'. May only be called while in the bs_finish state.
+	// This function may only be called from the call back function (and cancels a call to run() from finish_impl()).
 	void kill(void);
 
-	//---------------------------------------
-	// Other.
-
-	//! Called whenever the StateMachineMaxTime setting is changed.
-	static void setMaxCount(F32 StateMachineMaxTime);
-
-	//---------------------------------------
-	// Accessors.
-
-	//! Return true if state machine was aborted (can be used in finish_impl).
-	bool aborted(void) const { return mAborted; }
-
-	//! Return true if the derived class is running (also when we are idle).
-	bool running(void) const { return mState == bs_run; }
-
-	//! Return true if it's safe to call abort.
-	bool abortable(void) const { return mState == bs_run || mState == bs_initialize; }
-
-	//! Return true if the derived class is running but idle.
-	bool waiting(void) const { return mState == bs_run && mIdle; }
-
-	// Use some safebool idiom (http://www.artima.com/cppsource/safebool.html) rather than operator bool.
-	typedef volatile state_type AIStateMachine::* const bool_type;
-	//! Return true if state machine successfully finished.
-	operator bool_type() const { return ((mState == bs_initialize || mState == bs_callback) && !mAborted) ? &AIStateMachine::mRunState : 0; }
-
-	//! Return a stringified state, for debugging purposes.
-	char const* state_str(state_type state);
-
-  private:
-	static void add_continued_statemachines(AIReadAccess<csme_type>& csme_r);
-	static void dowork(void);
-	void multiplex(U64 current_time);
+  protected:
+	// This function can be called from initialize_impl() and multiplex_impl() (both called from within multiplex()).
+	void set_state(state_type new_state);									// Run this state the NEXT loop.
+	// These functions can only be called from within multiplex_impl().
+	void idle(void);														// Go idle unless cont() or advance_state() were called since the start of the current loop, or until they are called.
+	void finish(void);														// Mark that the state machine finished and schedule the call back.
+	void yield(void);														// Yield to give CPU to other state machines, but do not go idle.
+	void yield(AIEngine* engine);											// Yield to give CPU to other state machines, but do not go idle. Continue running from engine 'engine'.
+	void yield_frame(unsigned int frames);									// Run from the main-thread engine after at least 'frames' frames have passed.
+	void yield_ms(unsigned int ms);											// Run from the main-thread engine after roughly 'ms' miliseconds have passed.
 
   public:
-	//! Call this once per frame to give the statemachines CPU cycles.
-	static void mainloop(void)
+	// This function can be called from multiplex_imp(), but also by a child state machine and
+	// therefore by any thread. The child state machine should use an LLPointer<AIStateMachine>
+	// to access this state machine.
+	void abort(void);														// Abort the state machine (unsuccessful finish).
+
+	// These are the only two functions that can be called by any thread at any moment.
+	// Those threads should use an LLPointer<AIStateMachine> to access this state machine.
+	void cont(void);														// Guarantee at least one full run of multiplex() after this function is called. Cancels the last call to idle().
+	void advance_state(state_type new_state);								// Guarantee at least one full run of multiplex() after this function is called
+																			// iff new_state is larger than the last state that was processed.
+
+  public:
+	// Accessors.
+
+	// Return true if the derived class is running (also when we are idle).
+	bool running(void) const { return multiplex_state_type_crat(mState)->base_state == bs_multiplex; }
+	// Return true if the derived class is running and idle.
+	bool waiting(void) const
 	{
-	  {
-		AIReadAccess<csme_type> csme_r(sContinuedStateMachinesAndMainloopEnabled, true);
-		if (!csme_r->mainloop_enabled)
-		  return;
-		if (!csme_r->continued_statemachines.empty())
-		  add_continued_statemachines(csme_r);
-	  }
-	  dowork();
+	  multiplex_state_type_crat state_r(mState);
+	  return state_r->base_state == bs_multiplex && sub_state_type_crat(mSubState)->idle;
+	}
+	// Return true if the derived class is running and idle or already being aborted.
+	bool waiting_or_aborting(void) const
+	{
+	  multiplex_state_type_crat state_r(mState);
+	  return state_r->base_state == bs_abort || ( state_r->base_state == bs_multiplex && sub_state_type_crat(mSubState)->idle);
+	}
+	// Return true if are added to the engine.
+	bool active(AIEngine const* engine) const { return multiplex_state_type_crat(mState)->current_engine == engine; }
+	bool aborted(void) const { return sub_state_type_crat(mSubState)->aborted; }
+
+	// Use some safebool idiom (http://www.artima.com/cppsource/safebool.html) rather than operator bool.
+	typedef state_type AIStateMachine::* const bool_type;
+	// Return true if state machine successfully finished.
+	operator bool_type() const
+	{
+	  sub_state_type_crat sub_state_r(mSubState);
+	  return (sub_state_r->finished && !sub_state_r->aborted) ? &AIStateMachine::mNewParentState : 0;
 	}
 
-	//! Abort all running state machines and then run mainloop until all state machines are idle (called when application is exiting).
-	static void flush(void);
+	// Return stringified state, for debugging purposes.
+	char const* state_str(base_state_type state);
+#ifdef CWDEBUG
+	char const* event_str(event_type event);
+#endif
+
+	void add(U64 count) { mRuntime += count; }
+	U64 getRuntime(void) const { return mRuntime; }
 
   protected:
-	//---------------------------------------
-	// Derived class implementations.
-
-	// Handle initializing the object.
 	virtual void initialize_impl(void) = 0;
-
-	// Handle mRunState.
-	virtual void multiplex_impl(void) = 0;
-
-	// Handle aborting from current bs_run state.
-	virtual void abort_impl(void) = 0;
-
-	// Handle cleaning up from initialization (or post abort) state.
-	virtual void finish_impl(void) = 0;
-
-	// Implemenation of state_str for run states.
+	virtual void multiplex_impl(state_type run_state) = 0;
+	virtual void abort_impl(void) { }
+	virtual void finish_impl(void) { }
 	virtual char const* state_str_impl(state_type run_state) const = 0;
+
+  private:
+	void reset(void);														// Called from run() to (re)initialize a (re)start.
+	void multiplex(event_type event);										// Called from AIEngine to step through the states (and from reset() to kick start the state machine).
+	state_type begin_loop(base_state_type base_state);						// Called from multiplex() at the start of a loop.
+	void callback(void);													// Called when the state machine finished.
+	bool sleep(U64 current_time)											// Count frames if necessary and return true when the state machine is still sleeping.
+	{
+	  if (mSleep == 0)
+		return false;
+	  else if (mSleep < 0)
+		++mSleep;
+	  else if ((U64)mSleep >= current_time)
+		mSleep = 0;
+	  return mSleep != 0;
+	}
+
+	friend class AIEngine;						// Calls multiplex().
 };
 
-// This case be used in state_str_impl.
+bool AIEngine::QueueElementComp::operator()(QueueElement const& e1, QueueElement const& e2) const
+{
+  return e1.mStateMachine->getRuntime() < e2.mStateMachine->getRuntime();
+}
+
+// This can be used in state_str_impl.
 #define AI_CASE_RETURN(x) do { case x: return #x; } while(0)
 
 #endif
diff --git a/indra/aistatemachine/aistatemachinethread.cpp b/indra/aistatemachine/aistatemachinethread.cpp
index 97391eb81..030bd0a5a 100644
--- a/indra/aistatemachine/aistatemachinethread.cpp
+++ b/indra/aistatemachine/aistatemachinethread.cpp
@@ -88,15 +88,15 @@ void AIStateMachineThreadBase::initialize_impl(void)
   set_state(start_thread);
 }
 
-void AIStateMachineThreadBase::multiplex_impl(void)
+void AIStateMachineThreadBase::multiplex_impl(state_type run_state)
 {
-  switch(mRunState)
+  switch(run_state)
   {
 	case start_thread:
 	  mThread = Thread::allocate(mImpl);
 	  // Set next state.
 	  set_state(wait_stopped);
-	  idle(wait_stopped);		// Wait till the thread returns.
+	  idle();					// Wait till the thread returns.
 	  mThread->start();
 	  break;
 	case wait_stopped:
@@ -179,12 +179,8 @@ bool AIThreadImpl::thread_done(bool result)
   {
 	// If state_machine_thread is non-NULL then AIThreadImpl::abort_impl wasn't called,
 	// which means the state machine still exists. In fact, it should be in the waiting() state.
-	// It can also happen that the state machine is being aborted right now (but it will still exist).
-	// (Note that waiting() and running() aren't strictly thread-safe (we should really lock
-	// mSetStateLock here) but by first calling waiting() and then running(), and assuming that
-	// changing an int from the value 1 to the value 2 is atomic, this will work since the
-	// only possible transition is from waiting to not running).
-	llassert(state_machine_thread->waiting() || !state_machine_thread->running());
+	// It can also happen that the state machine is being aborted right now.
+	llassert(state_machine_thread->waiting_or_aborting());
 	state_machine_thread->schedule_abort(!result);
 	// Note that if the state machine is not running (being aborted, ie - hanging in abort_impl
 	// waiting for the lock on mStateMachineThread) then this is simply ignored.
diff --git a/indra/aistatemachine/aistatemachinethread.h b/indra/aistatemachine/aistatemachinethread.h
index 9eefd4dca..85b2ff108 100644
--- a/indra/aistatemachine/aistatemachinethread.h
+++ b/indra/aistatemachine/aistatemachinethread.h
@@ -89,19 +89,8 @@ class HelloWorld : public AIStateMachine {
 	// Handle initializing the object.
 	/*virtual*/ void initialize_impl(void);
 
-	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
-
-	// Handle aborting from current bs_run state.
-	/*virtual*/ void abort_impl(void) { }
-
-	// Handle cleaning up from initialization (or post abort) state.
-	/*virtual*/ void finish_impl(void)
-	{
-	  // Kill object by default.
-	  // This can be overridden by calling run() from the callback function.
-	  kill();
-	}
+	// Handle run_state.
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Implemenation of state_str for run states.
 	/*virtual*/ char const* state_str_impl(state_type run_state) const
@@ -123,9 +112,9 @@ void HelloWorld::initialize_impl(void)
   set_state(HelloWorld_start);
 }
 
-void HelloWorld::multiplex_impl(void)
+void HelloWorld::multiplex_impl(state_type run_state)
 {
-  switch (mRunState)
+  switch (run_state)
   {
 	case HelloWorld_start:
 	{
@@ -177,28 +166,30 @@ class AIStateMachineThreadBase : public AIStateMachine {
 	// The actual thread (derived from LLThread).
 	class Thread;
 
+  protected:
+	typedef AIStateMachine direct_base_type;
+
 	// The states of this state machine.
 	enum thread_state_type {
-	  start_thread = AIStateMachine::max_state,		// Start the thread (if necessary create it first).
+	  start_thread = direct_base_type::max_state,	// Start the thread (if necessary create it first).
 	  wait_stopped									// Wait till the thread is stopped.
 	};
+  public:
+	static state_type const max_state = wait_stopped + 1;
 
   protected:
-	AIStateMachineThreadBase(AIThreadImpl* impl) : mImpl(impl) { }
+	AIStateMachineThreadBase(AIThreadImpl* impl) : mImpl(impl) { ref(); /* Never call delete */ }
 
   private:
 	// Handle initializing the object.
 	/*virtual*/ void initialize_impl(void);
 
 	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Handle aborting from current bs_run state.
 	/*virtual*/ void abort_impl(void);
 
-	// Handle cleaning up from initialization (or post abort) state.
-	/*virtual*/ void finish_impl(void) { }
-
 	// Implemenation of state_str for run states.
 	/*virtual*/ char const* state_str_impl(state_type run_state) const;
 
diff --git a/indra/aistatemachine/aitimer.cpp b/indra/aistatemachine/aitimer.cpp
index 5a37a6991..1c51b6178 100644
--- a/indra/aistatemachine/aitimer.cpp
+++ b/indra/aistatemachine/aitimer.cpp
@@ -31,11 +31,6 @@
 #include "linden_common.h"
 #include "aitimer.h"
 
-enum timer_state_type {
-  AITimer_start = AIStateMachine::max_state,
-  AITimer_expired
-};
-
 char const* AITimer::state_str_impl(state_type run_state) const
 {
   switch(run_state)
@@ -43,6 +38,7 @@ char const* AITimer::state_str_impl(state_type run_state) const
 	AI_CASE_RETURN(AITimer_start);
 	AI_CASE_RETURN(AITimer_expired);
   }
+  llassert(false);
   return "UNKNOWN STATE";
 }
 
@@ -54,12 +50,12 @@ void AITimer::initialize_impl(void)
 
 void AITimer::expired(void)
 {
-  set_state(AITimer_expired);
+  advance_state(AITimer_expired);
 }
 
-void AITimer::multiplex_impl(void)
+void AITimer::multiplex_impl(state_type run_state)
 {
-  switch (mRunState)
+  switch (run_state)
   {
 	case AITimer_start:
 	{
@@ -79,18 +75,3 @@ void AITimer::abort_impl(void)
 {
   mFrameTimer.cancel();
 }
-
-void AITimer::finish_impl(void)
-{
-  // Kill object by default.
-  // This can be overridden by calling run() from the callback function.
-  kill();
-}
-
-void AIPersistentTimer::finish_impl(void)
-{
-  // Don't kill object by default.
-  if (aborted())
-	kill();
-  // Callback function should always call kill() or run().
-}
diff --git a/indra/aistatemachine/aitimer.h b/indra/aistatemachine/aitimer.h
index c10559429..5b028c6e0 100644
--- a/indra/aistatemachine/aitimer.h
+++ b/indra/aistatemachine/aitimer.h
@@ -59,6 +59,18 @@
 // just reuse the old ones (call the same callback).
 //
 class AITimer : public AIStateMachine {
+  protected:
+	// The base class of this state machine.
+	typedef AIStateMachine direct_base_type;
+
+	// The different states of the state machine.
+	enum timer_state_type {
+	  AITimer_start = direct_base_type::max_state,
+	  AITimer_expired
+	};
+  public:
+	static state_type const max_state = AITimer_expired + 1;
+
   private:
 	AIFrameTimer mFrameTimer;		//!< The actual timer that this object wraps.
 	F64 mInterval;					//!< Input variable: interval after which the event will be generated, in seconds.
@@ -90,14 +102,11 @@ class AITimer : public AIStateMachine {
 	/*virtual*/ void initialize_impl(void);
 
 	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Handle aborting from current bs_run state.
 	/*virtual*/ void abort_impl(void);
 
-	// Handle cleaning up from initialization (or post abort) state.
-	/*virtual*/ void finish_impl(void);
-
 	// Implemenation of state_str for run states.
 	/*virtual*/ char const* state_str_impl(state_type run_state) const;
 
@@ -106,12 +115,4 @@ class AITimer : public AIStateMachine {
 	void expired(void);
 };
 
-// Same as above but does not delete itself automatically by default after use.
-// Call kill() on it yourself (from the callback function) when you're done with it!
-class AIPersistentTimer : public AITimer {
-  protected:
-	// Handle cleaning up from initialization (or post abort) state.
-	/*virtual*/ void finish_impl(void);
-};
-
 #endif
diff --git a/indra/llcommon/aithreadid.cpp b/indra/llcommon/aithreadid.cpp
index e8dd17097..5e8b58bf6 100644
--- a/indra/llcommon/aithreadid.cpp
+++ b/indra/llcommon/aithreadid.cpp
@@ -51,6 +51,11 @@ void AIThreadID::set_current_thread_id(void)
 }
 
 #ifndef LL_DARWIN
+void AIThreadID::clear(void)
+{
+	mID = undefinedID;
+}
+
 void AIThreadID::reset(void)
 {
 	mID = lCurrentThread;
diff --git a/indra/llcommon/aithreadid.h b/indra/llcommon/aithreadid.h
index 3b3b1500c..cc7197b21 100644
--- a/indra/llcommon/aithreadid.h
+++ b/indra/llcommon/aithreadid.h
@@ -63,6 +63,7 @@ public:
 	static void set_main_thread_id(void);					// Called once to set sMainThreadID.
 	static void set_current_thread_id(void);				// Called once for every thread to set lCurrentThread.
 #ifndef LL_DARWIN
+	LL_COMMON_API void clear(void);
 	LL_COMMON_API void reset(void);
 	LL_COMMON_API bool equals_current_thread(void) const;
 	LL_COMMON_API static bool in_main_thread(void);
@@ -74,6 +75,7 @@ public:
 	static apr_os_thread_t getCurrentThread_inline(void) { return lCurrentThread; }
 #else
 	// Both variants are inline on OS X.
+	void clear(void) { mID = undefinedID; }
 	void reset(void) { mID = apr_os_thread_current(); }
 	void reset_inline(void) { mID = apr_os_thread_current(); }
 	bool equals_current_thread(void) const { return apr_os_thread_equal(mID, apr_os_thread_current()); }
diff --git a/indra/llcommon/aithreadsafe.h b/indra/llcommon/aithreadsafe.h
index 461bb4cc5..bb0ac7e05 100644
--- a/indra/llcommon/aithreadsafe.h
+++ b/indra/llcommon/aithreadsafe.h
@@ -479,9 +479,6 @@ public:
 	// Only for use by AITHREADSAFESIMPLE, see below.
 	AIThreadSafeSimple(T* object) { llassert(object == AIThreadSafeBits<T>::ptr()); }
 
-	// If MUTEX is a LLCondition then this can be used to wake up the waiting thread.
-	void signal() { mMutex.signal(); }
-
 #if LL_DEBUG
 	// Can only be locked when there still exists an AIAccess object that
 	// references this object and will access it upon destruction.
@@ -622,6 +619,8 @@ struct AIAccessConst
 
 	// If MUTEX is an LLCondition, then this can be used to wait for a signal.
 	void wait() { this->mWrapper.mMutex.wait(); }
+	// If MUTEX is a LLCondition then this can be used to wake up the waiting thread.
+	void signal() { this->mWrapper.mMutex.signal(); }
 
 protected:
 	AIThreadSafeSimple<T, MUTEX>& mWrapper;		//!< Reference to the object that we provide access to.
diff --git a/indra/llcommon/llhttpstatuscodes.h b/indra/llcommon/llhttpstatuscodes.h
index 83dde17d9..31beb9bb4 100644
--- a/indra/llcommon/llhttpstatuscodes.h
+++ b/indra/llcommon/llhttpstatuscodes.h
@@ -90,6 +90,16 @@ const S32 HTTP_VERSION_NOT_SUPPORTED = 505;
 // These status codes should not be sent over the wire
 //   and indicate something went wrong internally.
 // If you get these they are not normal.
-const S32 HTTP_INTERNAL_ERROR = 499;
+// Note that these are only related to curl, not to webkit.
+const S32 HTTP_INTERNAL_ERROR_LOW_SPEED = 494;				// The transfer (receiving data) stalled or was too slow.
+const S32 HTTP_INTERNAL_ERROR_CURL_LOCKUP = 495;			// Curl never returned at all for 10 minutes!?!
+const S32 HTTP_INTERNAL_ERROR_CURL_BADSOCKET = 496;			// Curl was aborted because the socket went bad!?!
+const S32 HTTP_INTERNAL_ERROR_CURL_TIMEOUT = 497;			// Curl returned a timeout error.
+const S32 HTTP_INTERNAL_ERROR_CURL_OTHER = 498;				// Any other curl error.
+const S32 HTTP_INTERNAL_ERROR_OTHER = 499;					// Every other internal error.
+
+// Return true if status is an internal error (not received from a server but generated internally).
+bool inline is_internal_http_error(S32 status) { return status >= HTTP_INTERNAL_ERROR_LOW_SPEED && status <= HTTP_INTERNAL_ERROR_OTHER; }
+bool inline is_internal_http_error_that_warrants_a_retry(S32 status) { return status >= HTTP_INTERNAL_ERROR_LOW_SPEED && status <= HTTP_INTERNAL_ERROR_CURL_OTHER; }
 
 #endif
diff --git a/indra/llcommon/llthread.h b/indra/llcommon/llthread.h
index f64200c95..1f6765935 100644
--- a/indra/llcommon/llthread.h
+++ b/indra/llcommon/llthread.h
@@ -470,6 +470,7 @@ public:
 
 	void unref()
 	{
+		llassert(mRef > 0);
 		if (!--mRef) delete this;
 	}
 	S32 getNumRefs() const
diff --git a/indra/llcrashlogger/llcrashlogger.cpp b/indra/llcrashlogger/llcrashlogger.cpp
index 7880271c3..c09361144 100644
--- a/indra/llcrashlogger/llcrashlogger.cpp
+++ b/indra/llcrashlogger/llcrashlogger.cpp
@@ -49,6 +49,7 @@
 #include "llhttpclient.h"
 #include "llsdserialize.h"
 #include "llcurl.h"
+#include "aistatemachine.h"
 
 LLPumpIO* gServicePump;
 BOOL gBreak = false;
@@ -56,6 +57,7 @@ BOOL gSent = false;
 
 class AIHTTPTimeoutPolicy;
 extern AIHTTPTimeoutPolicy crashLoggerResponder_timeout;
+extern void startEngineThread(void);
 
 class LLCrashLoggerResponder : public LLHTTPClient::ResponderWithResult
 {
@@ -374,7 +376,7 @@ void LLCrashLogger::updateApplication(const std::string& message)
 {
 	gServicePump->pump();
     gServicePump->callback();
-	//FIXME: AIStateMachine::mainloop(); needs CPU cycles. Can't call it from here though, because it uses gSavedSettings which is part of newview.
+	gMainThreadEngine.mainloop();
 }
 
 bool LLCrashLogger::init()
@@ -382,6 +384,17 @@ bool LLCrashLogger::init()
 	// Initialize curl
 	AICurlInterface::initCurl();
 
+	// Initialize state machine engines.
+	AIEngine::setMaxCount(100);				// StateMachineMaxTime
+
+	// Start state machine thread.
+	startEngineThread();
+
+	// Start curl thread.
+	AICurlInterface::startCurlThread(64,	// CurlMaxTotalConcurrentConnections
+									 8,		// CurlConcurrentConnectionsPerHost
+									 true);	// NoVerifySSLCert
+
 	// We assume that all the logs we're looking for reside on the current drive
 	gDirUtilp->initAppDirs("SecondLife");
 
diff --git a/indra/llmessage/aicurl.cpp b/indra/llmessage/aicurl.cpp
index 2bb76b5ab..6ea7528bc 100644
--- a/indra/llmessage/aicurl.cpp
+++ b/indra/llmessage/aicurl.cpp
@@ -418,7 +418,8 @@ void cleanupCurl(void)
   stopCurlThread();
   if (CurlMultiHandle::getTotalMultiHandles() != 0)
 	llwarns << "Not all CurlMultiHandle objects were destroyed!" << llendl;
-  AIStateMachine::flush();
+  gMainThreadEngine.flush();			// Not really related to curl, but why not.
+  gStateMachineThreadEngine.flush();
   clearCommandQueue();
   Stats::print();
   ssl_cleanup();
@@ -770,6 +771,22 @@ void CurlEasyRequest::setoptString(CURLoption option, std::string const& value)
   setopt(option, value.c_str());
 }
 
+void CurlEasyRequest::setPut(U32 size, bool keepalive)
+{
+  DoutCurl("PUT size is " << size << " bytes.");
+  mContentLength = size;
+
+  // The server never replies with 100-continue, so suppress the "Expect: 100-continue" header that libcurl adds by default.
+  addHeader("Expect:");
+  if (size > 0 && keepalive)
+  {
+	addHeader("Connection: keep-alive");
+	addHeader("Keep-alive: 300");
+  }
+  setopt(CURLOPT_UPLOAD, 1);
+  setopt(CURLOPT_INFILESIZE, size);
+}
+
 void CurlEasyRequest::setPost(AIPostFieldPtr const& postdata, U32 size, bool keepalive)
 {
   llassert_always(postdata->data());
@@ -787,6 +804,7 @@ void CurlEasyRequest::setPost_raw(U32 size, char const* data, bool keepalive)
 	// data == NULL when we're going to read the data using CURLOPT_READFUNCTION.
 	DoutCurl("POST size is " << size << " bytes.");
   }
+  mContentLength = size;
 
   // The server never replies with 100-continue, so suppress the "Expect: 100-continue" header that libcurl adds by default.
   addHeader("Expect:");
@@ -872,13 +890,13 @@ void CurlEasyRequest::setSSLCtxCallback(curl_ssl_ctx_callback callback, void* us
 static size_t noHeaderCallback(char* ptr, size_t size, size_t nmemb, void* userdata)
 {
   llmaybewarns << "Calling noHeaderCallback(); curl session aborted." << llendl;
-  return 0;							// Cause a CURL_WRITE_ERROR
+  return 0;							// Cause a CURLE_WRITE_ERROR
 }
 
 static size_t noWriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata)
 {
   llmaybewarns << "Calling noWriteCallback(); curl session aborted." << llendl;
-  return 0;							// Cause a CURL_WRITE_ERROR
+  return 0;							// Cause a CURLE_WRITE_ERROR
 }
 
 static size_t noReadCallback(char* ptr, size_t size, size_t nmemb, void* userdata)
@@ -1276,7 +1294,7 @@ static int const HTTP_REDIRECTS_DEFAULT = 10;
 
 LLChannelDescriptors const BufferedCurlEasyRequest::sChannels;
 
-BufferedCurlEasyRequest::BufferedCurlEasyRequest() : mRequestTransferedBytes(0), mResponseTransferedBytes(0), mBufferEventsTarget(NULL), mStatus(HTTP_INTERNAL_ERROR)
+BufferedCurlEasyRequest::BufferedCurlEasyRequest() : mRequestTransferedBytes(0), mResponseTransferedBytes(0), mBufferEventsTarget(NULL), mStatus(HTTP_INTERNAL_ERROR_OTHER)
 {
   AICurlInterface::Stats::BufferedCurlEasyRequest_count++;
 }
@@ -1311,7 +1329,7 @@ BufferedCurlEasyRequest::~BufferedCurlEasyRequest()
 
 void BufferedCurlEasyRequest::timed_out(void)
 {
-  mResponder->finished(CURLE_OK, HTTP_INTERNAL_ERROR, "Request timeout, aborted.", sChannels, mOutput);
+  mResponder->finished(CURLE_OK, HTTP_INTERNAL_ERROR_CURL_LOCKUP, "Request timeout, aborted.", sChannels, mOutput);
   if (mResponder->needsHeaders())
   {
 	send_buffer_events_to(NULL);	// Revoke buffer events: we send them to the responder.
@@ -1321,7 +1339,7 @@ void BufferedCurlEasyRequest::timed_out(void)
 
 void BufferedCurlEasyRequest::bad_socket(void)
 {
-  mResponder->finished(CURLE_OK, HTTP_INTERNAL_ERROR, "File descriptor went bad! Aborted.", sChannels, mOutput);
+  mResponder->finished(CURLE_OK, HTTP_INTERNAL_ERROR_CURL_BADSOCKET, "File descriptor went bad! Aborted.", sChannels, mOutput);
   if (mResponder->needsHeaders())
   {
 	send_buffer_events_to(NULL);	// Revoke buffer events: we send them to the responder.
@@ -1342,7 +1360,7 @@ void BufferedCurlEasyRequest::resetState(void)
   mRequestTransferedBytes = 0;
   mResponseTransferedBytes = 0;
   mBufferEventsTarget = NULL;
-  mStatus = HTTP_INTERNAL_ERROR;
+  mStatus = HTTP_INTERNAL_ERROR_OTHER;
 }
 
 void BufferedCurlEasyRequest::print_diagnostics(CURLcode code)
diff --git a/indra/llmessage/aicurl.h b/indra/llmessage/aicurl.h
index 16d1b14fd..3113e7e18 100644
--- a/indra/llmessage/aicurl.h
+++ b/indra/llmessage/aicurl.h
@@ -52,8 +52,6 @@
 #include "stdtypes.h"		// U16, S32, U32, F64
 #include "llatomic.h"		// LLAtomicU32
 #include "aithreadsafe.h"
-#include "llhttpstatuscodes.h"
-#include "llhttpclient.h"
 
 // Debug Settings.
 extern bool gNoVerifySSLCert;
diff --git a/indra/llmessage/aicurleasyrequeststatemachine.cpp b/indra/llmessage/aicurleasyrequeststatemachine.cpp
index b9f486d67..ed2bf820d 100644
--- a/indra/llmessage/aicurleasyrequeststatemachine.cpp
+++ b/indra/llmessage/aicurleasyrequeststatemachine.cpp
@@ -36,10 +36,8 @@
 enum curleasyrequeststatemachine_state_type {
   AICurlEasyRequestStateMachine_addRequest = AIStateMachine::max_state,
   AICurlEasyRequestStateMachine_waitAdded,
-  AICurlEasyRequestStateMachine_added,
-  AICurlEasyRequestStateMachine_timedOut,	// This must be smaller than the rest, so they always overrule.
-  AICurlEasyRequestStateMachine_finished,
-  AICurlEasyRequestStateMachine_removed,	// The removed states must be largest two, so they are never ignored.
+  AICurlEasyRequestStateMachine_timedOut,      // This must be smaller than the rest, so they always overrule.
+  AICurlEasyRequestStateMachine_removed,       // The removed states must be largest two, so they are never ignored.
   AICurlEasyRequestStateMachine_removed_after_finished,
   AICurlEasyRequestStateMachine_bad_file_descriptor
 };
@@ -50,9 +48,7 @@ char const* AICurlEasyRequestStateMachine::state_str_impl(state_type run_state)
   {
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_addRequest);
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_waitAdded);
-	AI_CASE_RETURN(AICurlEasyRequestStateMachine_added);
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_timedOut);
-	AI_CASE_RETURN(AICurlEasyRequestStateMachine_finished);
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_removed);
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_removed_after_finished);
 	AI_CASE_RETURN(AICurlEasyRequestStateMachine_bad_file_descriptor);
@@ -77,14 +73,12 @@ void AICurlEasyRequestStateMachine::initialize_impl(void)
 // CURL-THREAD
 void AICurlEasyRequestStateMachine::added_to_multi_handle(AICurlEasyRequest_wat&)
 {
-  set_state(AICurlEasyRequestStateMachine_added);
 }
 
 // CURL-THREAD
 void AICurlEasyRequestStateMachine::finished(AICurlEasyRequest_wat&)
 {
   mFinished = true;
-  set_state(AICurlEasyRequestStateMachine_finished);
 }
 
 // CURL-THREAD
@@ -93,7 +87,7 @@ void AICurlEasyRequestStateMachine::removed_from_multi_handle(AICurlEasyRequest_
   llassert(mFinished || mTimedOut);		// If we neither finished nor timed out, then why is this being removed?
   										// Note that allowing this would cause an assertion later on for removing
 										// a BufferedCurlEasyRequest with a still active Responder.
-  set_state(mFinished ? AICurlEasyRequestStateMachine_removed_after_finished : AICurlEasyRequestStateMachine_removed);
+  advance_state(mFinished ? AICurlEasyRequestStateMachine_removed_after_finished : AICurlEasyRequestStateMachine_removed);
 }
 
 // CURL-THREAD
@@ -102,7 +96,7 @@ void AICurlEasyRequestStateMachine::bad_file_descriptor(AICurlEasyRequest_wat&)
   if (!mFinished)
   {
 	mFinished = true;
-	set_state(AICurlEasyRequestStateMachine_bad_file_descriptor);
+	advance_state(AICurlEasyRequestStateMachine_bad_file_descriptor);
   }
 }
 
@@ -114,59 +108,46 @@ void AICurlEasyRequestStateMachine::queued_for_removal(AICurlEasyRequest_wat&)
 }
 #endif
 
-void AICurlEasyRequestStateMachine::multiplex_impl(void)
+void AICurlEasyRequestStateMachine::multiplex_impl(state_type run_state)
 {
-  mSetStateLock.lock();
-  state_type current_state = mRunState;
-  mSetStateLock.unlock();
-  switch (current_state)
+  switch (run_state)
   {
 	case AICurlEasyRequestStateMachine_addRequest:
 	{
 	  set_state(AICurlEasyRequestStateMachine_waitAdded);
-	  idle(AICurlEasyRequestStateMachine_waitAdded);	// Wait till AICurlEasyRequestStateMachine::added_to_multi_handle() is called.
+	  idle();							// Wait till AICurlEasyRequestStateMachine::added_to_multi_handle() is called.
 	  // Only AFTER going idle, add request to curl thread; this is needed because calls to set_state() are
 	  // ignored when the statemachine is not idle, and theoretically the callbacks could be called
 	  // immediately after this call.
 	  mAdded = true;
 	  mCurlEasyRequest.addRequest();	// This causes the state to be changed, now or later, to
-	  									//   AICurlEasyRequestStateMachine_added, then
-										//   AICurlEasyRequestStateMachine_finished and then
 										//   AICurlEasyRequestStateMachine_removed_after_finished.
 
-	  // The first two states might be skipped thus, and the state at this point is one of
+	  // The state at this point is thus one of
 	  // 1) AICurlEasyRequestStateMachine_waitAdded (idle)
-	  // 2) AICurlEasyRequestStateMachine_added (running)
-	  // 3) AICurlEasyRequestStateMachine_finished (running)
-	  // 4) AICurlEasyRequestStateMachine_removed_after_finished (running)
+	  // 2) AICurlEasyRequestStateMachine_removed_after_finished (running)
 
 	  if (mTotalDelayTimeout > 0.f)
 	  {
 		// Set an inactivity timer.
 		// This shouldn't really be necessary, except in the case of a bug
 		// in libcurl; but lets be sure and set a timer for inactivity.
-		mTimer = new AIPersistentTimer;			// Do not delete timer upon expiration.
+		mTimer = new AITimer;
 		mTimer->setInterval(mTotalDelayTimeout);
 		mTimer->run(this, AICurlEasyRequestStateMachine_timedOut, false, false);
 	  }
 	  break;
 	}
-	case AICurlEasyRequestStateMachine_added:
+	case AICurlEasyRequestStateMachine_waitAdded:
 	{
-	  // The request was added to the multi handle. This is a no-op, which is good cause
-	  // this state might be skipped anyway ;).
-	  idle(current_state);				// Wait for the next event.
-
-	  // The state at this point is one of
-	  // 1) AICurlEasyRequestStateMachine_added (idle)
-	  // 2) AICurlEasyRequestStateMachine_finished (running)
-	  // 3) AICurlEasyRequestStateMachine_removed_after_finished (running)
+	  // Nothing to do.
+	  idle();
 	  break;
 	}
 	case AICurlEasyRequestStateMachine_timedOut:
 	{
 	  // It is possible that exactly at this point the state changes into
-	  // AICurlEasyRequestStateMachine_finished, with as result that mTimedOut
+	  // AICurlEasyRequestStateMachine_removed_after_finished, with as result that mTimedOut
 	  // is set while we will continue with that state. Hence that mTimedOut
 	  // is explicitly reset in that state.
 
@@ -176,10 +157,9 @@ void AICurlEasyRequestStateMachine::multiplex_impl(void)
 	  llassert(mAdded);
 	  mAdded = false;
 	  mCurlEasyRequest.removeRequest();
-	  idle(current_state);				// Wait till AICurlEasyRequestStateMachine::removed_from_multi_handle() is called.
+	  idle();							// Wait till AICurlEasyRequestStateMachine::removed_from_multi_handle() is called.
 	  break;
 	}
-	case AICurlEasyRequestStateMachine_finished:
 	case AICurlEasyRequestStateMachine_removed_after_finished:
 	{
 	  if (!mHandled)
@@ -199,12 +179,6 @@ void AICurlEasyRequestStateMachine::multiplex_impl(void)
 		easy_request_w->processOutput();
 	  }
 
-	  if (current_state == AICurlEasyRequestStateMachine_finished)
-	  {
-	    idle(current_state);				// Wait till AICurlEasyRequestStateMachine::removed_from_multi_handle() is called.
-	    break;
-	  }
-
 	  // See above.
 	  mTimedOut = false;
 	  /* Fall-Through */
@@ -261,17 +235,14 @@ void AICurlEasyRequestStateMachine::finish_impl(void)
   }
   if (mTimer)
   {
-	// Note that even if the timer expired, it wasn't deleted because we used AIPersistentTimer; so mTimer is still valid.
 	// Stop the timer, if it's still running.
 	if (!mHandled)
 	  mTimer->abort();
   }
-  // Auto clean up ourselves.
-  kill();
 }
 
 AICurlEasyRequestStateMachine::AICurlEasyRequestStateMachine(void) :
-    mTimer(NULL), mTotalDelayTimeout(AIHTTPTimeoutPolicy::getDebugSettingsCurlTimeout().getTotalDelay())
+    mTotalDelayTimeout(AIHTTPTimeoutPolicy::getDebugSettingsCurlTimeout().getTotalDelay())
 {
   Dout(dc::statemachine, "Calling AICurlEasyRequestStateMachine(void) [" << (void*)this << "] [" << (void*)mCurlEasyRequest.get() << "]");
   AICurlInterface::Stats::AICurlEasyRequestStateMachine_count++;
diff --git a/indra/llmessage/aicurleasyrequeststatemachine.h b/indra/llmessage/aicurleasyrequeststatemachine.h
index 8a6441874..662efbe20 100644
--- a/indra/llmessage/aicurleasyrequeststatemachine.h
+++ b/indra/llmessage/aicurleasyrequeststatemachine.h
@@ -62,7 +62,7 @@ class AICurlEasyRequestStateMachine : public AIStateMachine, public AICurlEasyHa
 	bool mTimedOut;						// Set if the expiration timer timed out.
 	bool mFinished;						// Set by the curl thread to signal it finished.
 	bool mHandled;						// Set when we processed the received data.
-	AITimer* mTimer;					// Expiration timer.
+	LLPointer<AITimer> mTimer;			// Expiration timer.
 	F32 mTotalDelayTimeout;				// The time out value for mTimer.
 
   public:
@@ -99,7 +99,7 @@ class AICurlEasyRequestStateMachine : public AIStateMachine, public AICurlEasyHa
 	/*virtual*/ void initialize_impl(void);
 
 	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Handle aborting from current bs_run state.
 	/*virtual*/ void abort_impl(void);
diff --git a/indra/llmessage/aicurlprivate.h b/indra/llmessage/aicurlprivate.h
index bb39bdddb..3b2777b40 100644
--- a/indra/llmessage/aicurlprivate.h
+++ b/indra/llmessage/aicurlprivate.h
@@ -36,9 +36,11 @@
 #include "llrefcount.h"
 #include "aicurlperhost.h"
 #include "aihttptimeout.h"
+#include "llhttpclient.h"
 
 class AIHTTPHeaders;
 class AICurlEasyRequestStateMachine;
+class AITransferInfo;
 
 namespace AICurlPrivate {
 
@@ -212,6 +214,7 @@ class CurlEasyRequest : public CurlEasyHandle {
   private:
 	void setPost_raw(U32 size, char const* data, bool keepalive);
   public:
+	void setPut(U32 size, bool keepalive = true);
 	void setPost(U32 size, bool keepalive = true) { setPost_raw(size, NULL, keepalive); }
 	void setPost(AIPostFieldPtr const& postdata, U32 size, bool keepalive = true);
 	void setPost(char const* data, U32 size, bool keepalive = true) { setPost(new AIPostField(data), size, keepalive); }
@@ -297,6 +300,7 @@ class CurlEasyRequest : public CurlEasyHandle {
   protected:
 	curl_slist* mHeaders;
 	AICurlEasyHandleEvents* mHandleEventsTarget;
+	U32 mContentLength;		// Non-zero if known (only set for PUT and POST).
 	CURLcode mResult;		//AIFIXME: this does not belong in the request object, but belongs in the response object.
 
 	AIHTTPTimeoutPolicy const* mTimeoutPolicy;
@@ -319,12 +323,12 @@ class CurlEasyRequest : public CurlEasyHandle {
 	// Accessor for mTimeout with optional creation of orphaned object (if lockobj != NULL).
 	LLPointer<curlthread::HTTPTimeout>& httptimeout(void) { if (!mTimeout) { create_timeout_object(); mTimeoutIsOrphan = true; } return mTimeout; }
 	// Return true if no data has been received on the latest socket (if any) for too long.
-	bool has_stalled(void) const { return mTimeout && mTimeout->has_stalled(); }
+	bool has_stalled(void) { return mTimeout && mTimeout->has_stalled(); }
 
   protected:
 	// This class may only be created as base class of BufferedCurlEasyRequest.
 	// Throws AICurlNoEasyHandle.
-	CurlEasyRequest(void) : mHeaders(NULL), mHandleEventsTarget(NULL), mResult(CURLE_FAILED_INIT), mTimeoutPolicy(NULL), mTimeoutIsOrphan(false)
+	CurlEasyRequest(void) : mHeaders(NULL), mHandleEventsTarget(NULL), mContentLength(0), mResult(CURLE_FAILED_INIT), mTimeoutPolicy(NULL), mTimeoutIsOrphan(false)
 #if defined(CWDEBUG) || defined(DEBUG_CURLIO)
 		, mDebugIsHeadOrGetMethod(false)
 #endif
@@ -433,7 +437,7 @@ class BufferedCurlEasyRequest : public CurlEasyRequest {
 	ThreadSafeBufferedCurlEasyRequest const* get_lockobj(void) const;
 	// Return true when an error code was received that can occur before the upload finished.
 	// So far the only such error I've seen is HTTP_BAD_REQUEST.
-	bool upload_error_status(void) const { return mStatus == HTTP_BAD_REQUEST /*&& mStatus != HTTP_INTERNAL_ERROR*/; }
+	bool upload_error_status(void) const { return mStatus == HTTP_BAD_REQUEST; }
 
 	// Return true when prepRequest was already called and the object has not been
 	// invalidated as a result of calling timed_out().
diff --git a/indra/llmessage/aicurlthread.cpp b/indra/llmessage/aicurlthread.cpp
index c732c9226..85be547b2 100644
--- a/indra/llmessage/aicurlthread.cpp
+++ b/indra/llmessage/aicurlthread.cpp
@@ -832,8 +832,9 @@ class AICurlThread : public LLThread
 {
   public:
 	static AICurlThread* sInstance;
-	LLMutex mWakeUpMutex;
-	bool mWakeUpFlag;			// Protected by mWakeUpMutex.
+	LLMutex mWakeUpMutex;		// Set while a thread is waking up the curl thread.
+	LLMutex mWakeUpFlagMutex;	// Set when the curl thread is sleeping (in or about to enter select()).
+	bool mWakeUpFlag;			// Protected by mWakeUpFlagMutex.
 
   public:
 	// MAIN-THREAD
@@ -1067,11 +1068,10 @@ void AICurlThread::cleanup_wakeup_fds(void)
 #endif
 }
 
-// MAIN-THREAD
+// OTHER THREADS
 void AICurlThread::wakeup_thread(bool stop_thread)
 {
   DoutEntering(dc::curl, "AICurlThread::wakeup_thread");
-  llassert(is_main_thread());
 
   // If we are already exiting the viewer then return immediately.
   if (!mRunning)
@@ -1079,12 +1079,29 @@ void AICurlThread::wakeup_thread(bool stop_thread)
 
   // Last time we are run?
   if (stop_thread)
-	mRunning = false;
+	mRunning = false;			// Thread-safe because all other threads were already stopped.
+
+  // Note, we do not want this function to be blocking the calling thread; therefore we only use tryLock()s.
+
+  // Stop two threads running the following code concurrently.
+  if (!mWakeUpMutex.tryLock())
+  {
+	// If we failed to obtain mWakeUpMutex then another thread is (or was) in AICurlThread::wakeup_thread,
+	// or curl was holding the lock for a micro second at the start of process_commands.
+	// In the first case, curl might or might not yet have been woken up because of that, but if it was
+	// then it could not have started processing the commands yet, because it needs to obtain mWakeUpMutex
+	// between being woken up and processing the commands.
+	// Either way, the command that this thread called this function for was already in the queue (it's
+	// added before this function is called) but the command(s) that another thread called this function
+	// for were not processed yet. Hence, it's safe to exit here as our command(s) will be processed too.
+	return;
+  }
 
   // Try if curl thread is still awake and if so, pass the new commands directly.
-  if (mWakeUpMutex.tryLock())
+  if (mWakeUpFlagMutex.tryLock())
   {
 	mWakeUpFlag = true;
+	mWakeUpFlagMutex.unlock();
 	mWakeUpMutex.unlock();
 	return;
   }
@@ -1111,7 +1128,10 @@ void AICurlThread::wakeup_thread(bool stop_thread)
   {
     len = write(mWakeUpFd_in, "!", 1);
     if (len == -1 && errno == EAGAIN)
+	{
+	  mWakeUpMutex.unlock();
 	  return;		// Unread characters are still in the pipe, so no need to add more.
+	}
   }
   while(len == -1 && errno == EINTR);
   if (len == -1)
@@ -1120,6 +1140,12 @@ void AICurlThread::wakeup_thread(bool stop_thread)
   }
   llassert_always(len == 1);
 #endif
+
+  // Release the lock here and not sooner, for the sole purpose of making sure
+  // that not two threads execute the above code concurrently. If the above code
+  // is thread-safe (maybe it is?) then we could release this lock arbitrarily
+  // sooner indeed - or even not lock it at all.
+  mWakeUpMutex.unlock();
 }
 
 apr_status_t AICurlThread::join_thread(void)
@@ -1239,6 +1265,16 @@ void AICurlThread::process_commands(AICurlMultiHandle_wat const& multi_handle_w)
 {
   DoutEntering(dc::curl, "AICurlThread::process_commands(void)");
 
+  // Block here until the thread that woke us up released mWakeUpMutex.
+  // This is necessary to make sure that a third thread added commands
+  // too then either it will signal us later, or we process those commands
+  // now, too.
+  mWakeUpMutex.lock();
+  // Note that if at THIS point another thread tries to obtain mWakeUpMutex in AICurlThread::wakeup_thread
+  // and fails, it is ok that it leaves that function without waking us up too: we're awake and
+  // about to process any commands!
+  mWakeUpMutex.unlock();
+
   // If we get here then the main thread called wakeup_thread() recently.
   for(;;)
   {
@@ -1247,9 +1283,9 @@ void AICurlThread::process_commands(AICurlMultiHandle_wat const& multi_handle_w)
 	  command_queue_wat command_queue_w(command_queue);
 	  if (command_queue_w->empty())
 	  {
-		mWakeUpMutex.lock();
+		mWakeUpFlagMutex.lock();
 		mWakeUpFlag = false;
-		mWakeUpMutex.unlock();
+		mWakeUpFlagMutex.unlock();
 		break;
 	  }
 	  // Move the next command from the queue into command_being_processed.
@@ -1312,10 +1348,10 @@ void AICurlThread::run(void)
 	  // Process every command in command_queue before filling the fd_set passed to select().
 	  for(;;)
 	  {
-		mWakeUpMutex.lock();
+		mWakeUpFlagMutex.lock();
 		if (mWakeUpFlag)
 		{
-		  mWakeUpMutex.unlock();
+		  mWakeUpFlagMutex.unlock();
 		  process_commands(multi_handle_w);
 		  continue;
 		}
@@ -1324,7 +1360,7 @@ void AICurlThread::run(void)
 	  // wakeup_thread() is also called after setting mRunning to false.
 	  if (!mRunning)
 	  {
-		mWakeUpMutex.unlock();
+		mWakeUpFlagMutex.unlock();
 		break;
 	  }
 
@@ -1400,7 +1436,7 @@ void AICurlThread::run(void)
 #endif
 #endif
 	  ready = select(nfds, read_fd_set, write_fd_set, NULL, &timeout);
-	  mWakeUpMutex.unlock();
+	  mWakeUpFlagMutex.unlock();
 #ifdef CWDEBUG
 #ifdef DEBUG_CURLIO
 	  Dout(dc::finish|cond_error_cf(ready == -1), ready);
@@ -1901,7 +1937,7 @@ void BufferedCurlEasyRequest::setStatusAndReason(U32 status, std::string const&
   // Sanity check. If the server replies with a redirect status then we better have that option turned on!
   if ((status >= 300 && status < 400) && mResponder && !mResponder->redirect_status_ok())
   {
-	llerrs << "Received " << status << " (" << reason << ") for responder \"" << mTimeoutPolicy->name() << "\" which has no followRedir()!" << llendl;
+	llerrs << "Received " << status << " (" << reason << ") for responder \"" << mResponder->getName() << "\" which has no followRedir()!" << llendl;
   }
 }
 
@@ -1913,7 +1949,7 @@ void BufferedCurlEasyRequest::processOutput(void)
   CURLcode code;
   AITransferInfo info;
   getResult(&code, &info);
-  if (code == CURLE_OK && mStatus != HTTP_INTERNAL_ERROR)
+  if (code == CURLE_OK && !is_internal_http_error(mStatus))
   {
 	getinfo(CURLINFO_RESPONSE_CODE, &responseCode);
 	// If getResult code is CURLE_OK then we should have decoded the first header line ourselves.
@@ -1925,8 +1961,30 @@ void BufferedCurlEasyRequest::processOutput(void)
   }
   else
   {
-	responseCode = HTTP_INTERNAL_ERROR;
 	responseReason = (code == CURLE_OK) ? mReason : std::string(curl_easy_strerror(code));
+	switch (code)
+	{
+	  case CURLE_FAILED_INIT:
+		responseCode = HTTP_INTERNAL_ERROR_OTHER;
+		break;
+	  case CURLE_OPERATION_TIMEDOUT:
+		responseCode = HTTP_INTERNAL_ERROR_CURL_TIMEOUT;
+		break;
+	  case CURLE_WRITE_ERROR:
+		responseCode = HTTP_INTERNAL_ERROR_LOW_SPEED;
+		break;
+	  default:
+		responseCode = HTTP_INTERNAL_ERROR_CURL_OTHER;
+		break;
+	}
+	if (responseCode == HTTP_INTERNAL_ERROR_LOW_SPEED)
+	{
+		// Rewrite error to something understandable.
+		responseReason = llformat("Connection to \"%s\" stalled: download speed dropped below %u bytes/s for %u seconds (up till that point, %s received a total of %u bytes). "
+			"To change these values, go to Advanced --> Debug Settings and change CurlTimeoutLowSpeedLimit and CurlTimeoutLowSpeedTime respectively.",
+			mResponder->getURL().c_str(), mResponder->getHTTPTimeoutPolicy().getLowSpeedLimit(), mResponder->getHTTPTimeoutPolicy().getLowSpeedTime(),
+			mResponder->getName(), mResponseTransferedBytes);
+	}
 	setopt(CURLOPT_FRESH_CONNECT, TRUE);
   }
 
@@ -2000,8 +2058,9 @@ size_t BufferedCurlEasyRequest::curlReadCallback(char* data, size_t size, size_t
   S32 bytes = size * nmemb;		// The maximum amount to read.
   self_w->mLastRead = self_w->getInput()->readAfter(sChannels.out(), self_w->mLastRead, (U8*)data, bytes);
   self_w->mRequestTransferedBytes += bytes;		// Accumulate data sent to the server.
+  llassert(self_w->mRequestTransferedBytes <= self_w->mContentLength);	// Content-Length should always be known, and we should never be sending more.
   // Timeout administration.
-  if (self_w->httptimeout()->data_sent(bytes))
+  if (self_w->httptimeout()->data_sent(bytes, self_w->mRequestTransferedBytes >= self_w->mContentLength))
   {
 	// Transfer timed out. Return CURL_READFUNC_ABORT which will abort with error CURLE_ABORTED_BY_CALLBACK.
 	return CURL_READFUNC_ABORT;
@@ -2063,7 +2122,7 @@ size_t BufferedCurlEasyRequest::curlHeaderCallback(char* data, size_t size, size
 	  }
 	  // Either way, this status value is not understood (or taken into account).
 	  // Set it to internal error so that the rest of code treats it as an error.
-	  status = HTTP_INTERNAL_ERROR;
+	  status = HTTP_INTERNAL_ERROR_OTHER;
 	}
 	self_w->received_HTTP_header();
 	self_w->setStatusAndReason(status, reason);
diff --git a/indra/llmessage/aihttptimeout.cpp b/indra/llmessage/aihttptimeout.cpp
index 499921a2e..352862ca1 100644
--- a/indra/llmessage/aihttptimeout.cpp
+++ b/indra/llmessage/aihttptimeout.cpp
@@ -66,7 +66,7 @@ struct AIAccess {
 struct AIHTTPTimeoutPolicy {
   U16 getReplyDelay(void) const { return 60; }
   U16 getLowSpeedTime(void) const { return 30; }
-  U32 getLowSpeedLimit(void) const { return 56000; }
+  U32 getLowSpeedLimit(void) const { return 7000; }
   static bool connect_timed_out(std::string const&) { return false; }
 };
 
@@ -84,6 +84,11 @@ public:
 
 #include "aihttptimeout.h"
 
+// If this is set, treat dc::curlio as off in the assertion below.
+#if defined(CWDEBUG) || defined(DEBUG_CURLIO)
+bool gCurlIo;
+#endif
+
 namespace AICurlPrivate {
 namespace curlthread {
 
@@ -100,7 +105,7 @@ U64 HTTPTimeout::sClockCount;										// Clock count, set once per select() exi
 // queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
 //                                                    ^ ^ ^       ^   ^      ^
 //                                                    | | |       |   |      |
-bool HTTPTimeout::data_sent(size_t n)
+bool HTTPTimeout::data_sent(size_t n, bool finished)
 {
   // Generate events.
   if (!mLowSpeedOn)
@@ -109,7 +114,7 @@ bool HTTPTimeout::data_sent(size_t n)
 	reset_lowspeed();
   }
   // Detect low speed.
-  return lowspeed(n);
+  return lowspeed(n, finished);
 }
 
 // CURL-THREAD
@@ -122,6 +127,7 @@ void HTTPTimeout::reset_lowspeed(void)
 {
   mLowSpeedClock = sClockCount;
   mLowSpeedOn = true;
+  mLastBytesSent = false;	// We're just starting!
   mLastSecond = -1;			// This causes lowspeed to initialize the rest.
   mStalled = (U64)-1;		// Stop reply delay timer.
   DoutCurl("reset_lowspeed: mLowSpeedClock = " << mLowSpeedClock << "; mStalled = -1");
@@ -169,7 +175,7 @@ bool HTTPTimeout::data_received(size_t n/*,*/
 	  // using CURLOPT_DEBUGFUNCTION. Note that mDebugIsHeadOrGetMethod is only valid when the debug channel 'curlio' is on,
 	  // because it is set in the debug callback function.
 	  // This is also normal if we received a HTTP header with an error status, since that can interrupt our upload.
-	  Debug(llassert(upload_error_status || AICurlEasyRequest_wat(*mLockObj)->mDebugIsHeadOrGetMethod || !dc::curlio.is_on()));
+	  Debug(llassert(upload_error_status || AICurlEasyRequest_wat(*mLockObj)->mDebugIsHeadOrGetMethod || !dc::curlio.is_on() || gCurlIo));
 	  // 'Upload finished' detection failed, generate it now.
 	  upload_finished();
 	}
@@ -193,9 +199,9 @@ bool HTTPTimeout::data_received(size_t n/*,*/
 // queued--><--DNS lookup + connect + send headers-->[<--send body (if any)-->]<--replydelay--><--receive headers + body--><--done
 //                                                    ^ ^ ^       ^   ^      ^                 ^  ^   ^     ^    ^  ^ ^   ^
 //                                                    | | |       |   |      |                 |  |   |     |    |  | |   |
-bool HTTPTimeout::lowspeed(size_t bytes)
+bool HTTPTimeout::lowspeed(size_t bytes, bool finished)
 {
-  //DoutCurlEntering("HTTPTimeout::lowspeed(" << bytes << ")");		commented out... too spammy for normal use.
+  //DoutCurlEntering("HTTPTimeout::lowspeed(" << bytes << ", " << finished << ")");		commented out... too spammy for normal use.
 
   // The algorithm to determine if we timed out if different from how libcurls CURLOPT_LOW_SPEED_TIME works.
   //
@@ -219,6 +225,9 @@ bool HTTPTimeout::lowspeed(size_t bytes)
   // and caused something so evil and hard to find that... NEVER AGAIN!
   llassert(second >= 0);
 
+  // finished should be false until the very last call to this function.
+  mLastBytesSent = finished;
+
   // If this is the same second as last time, just add the number of bytes to the current bucket.
   if (second == mLastSecond)
   {
@@ -274,21 +283,39 @@ bool HTTPTimeout::lowspeed(size_t bytes)
   mBuckets[mBucket] = bytes;
 
   // Check if we timed out.
-  U32 const low_speed_limit = mPolicy->getLowSpeedLimit();
-  U32 mintotalbytes = low_speed_limit * low_speed_time;
+  U32 const low_speed_limit = mPolicy->getLowSpeedLimit();	// In bytes/s
+  U32 mintotalbytes = low_speed_limit * low_speed_time;		// In bytes.
   DoutCurl("Transfered " << mTotalBytes << " bytes in " << llmin(second, (S32)low_speed_time) << " seconds after " << second << " second" << ((second == 1) ? "" : "s") << ".");
   if (second >= low_speed_time)
   {
 	DoutCurl("Average transfer rate is " << (mTotalBytes / low_speed_time) << " bytes/s (low speed limit is " << low_speed_limit << " bytes/s)");
 	if (mTotalBytes < mintotalbytes)
 	{
+	  if (finished)
+	  {
+		llwarns <<
+#ifdef CWDEBUG
+		(void*)get_lockobj() << ": "
+#endif
+		"Transfer rate timeout (average transfer rate below " << low_speed_limit <<
+		" bytes/s for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") <<
+		") but we just sent the LAST bytes! Waiting an additional 4 seconds." << llendl;
+		// Lets hope these last bytes will make it and do not time out on transfer speed anymore.
+		// Just give these bytes 4 more seconds to be written to the socket (after which we'll
+		// assume that the 'upload finished' detection failed and we'll wait another ReplyDelay
+		// seconds before finally, actually timing out.
+		mStalled = sClockCount + 4 / sClockWidth;
+		DoutCurl("mStalled set to sClockCount (" << sClockCount << ") + " << (mStalled - sClockCount) << " (4 seconds)");
+		return false;
+	  }
 	  // The average transfer rate over the passed low_speed_time seconds is too low. Abort the transfer.
 	  llwarns <<
 #ifdef CWDEBUG
 		(void*)get_lockobj() << ": "
 #endif
 		"aborting slow connection (average transfer rate below " << low_speed_limit <<
-		" for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") << ")." << llendl;
+		" bytes/s for more than " << low_speed_time << " second" << ((low_speed_time == 1) ? "" : "s") << ")." << llendl;
+	  // This causes curl to exit with CURLE_WRITE_ERROR.
 	  return true;
 	}
   }
@@ -327,7 +354,7 @@ bool HTTPTimeout::lowspeed(size_t bytes)
 	  llassert_always(bucket < low_speed_time);
 	  total_bytes -= mBuckets[bucket];	// Empty this bucket.
 	}
-	while(total_bytes >= 1);	// Use 1 here instead of mintotalbytes, to test that total_bytes indeed always reaches zero.
+	while(total_bytes >= mintotalbytes);
   }
   // If this function isn't called again within max_stall_time seconds, we stalled.
   mStalled = sClockCount + max_stall_time / sClockWidth;
@@ -377,6 +404,19 @@ void HTTPTimeout::done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode
   DoutCurl("done: mStalled set to -1");
 }
 
+bool HTTPTimeout::maybe_upload_finished(void)
+{
+  if (!mUploadFinished && mLastBytesSent)
+  {
+	// Assume that 'upload finished' detection failed and the server is slow with a reply.
+	// Switch to waiting for a reply.
+	upload_finished();
+	return true;
+  }
+  // The upload certainly finished or certainly did not finish.
+  return false;
+}
+
 // Libcurl uses GetTickCount on windows, with a resolution of 10 to 16 ms.
 // As a result, we can not assume that namelookup_time == 0 has a special meaning.
 #define LOWRESTIMER LL_WINDOWS
@@ -493,6 +533,10 @@ void HTTPTimeout::print_diagnostics(CurlEasyRequest const* curl_easy_request, ch
   {
 	llinfos << "The request upload finished successfully." << llendl;
   }
+  else if (mLastBytesSent)
+  {
+	llinfos << "All bytes where sent to libcurl for upload." << llendl;
+  }
   if (mLastSecond > 0 && mLowSpeedOn)
   {
 	llinfos << "The " << (mNothingReceivedYet ? "upload" : "download") << " did last " << mLastSecond << " second" << ((mLastSecond == 1) ? "" : "s") << ", before it timed out." << llendl;
diff --git a/indra/llmessage/aihttptimeout.h b/indra/llmessage/aihttptimeout.h
index ab7895a3b..aa5e6feef 100644
--- a/indra/llmessage/aihttptimeout.h
+++ b/indra/llmessage/aihttptimeout.h
@@ -79,6 +79,7 @@ class HTTPTimeout : public LLRefCount {
 	U16 mBucket;								// The bucket corresponding to mLastSecond.
 	bool mNothingReceivedYet;					// Set when created, reset when the HTML reply header from the server is received.
 	bool mLowSpeedOn;							// Set while uploading or downloading data.
+	bool mLastBytesSent;						// Set when the last bytes were sent to libcurl to be uploaded.
 	bool mUploadFinished;						// Used to keep track of whether upload_finished was called yet.
 	S32 mLastSecond;							// The time at which lowspeed() was last called, in seconds since mLowSpeedClock.
 	S32 mOverwriteSecond;						// The second at which the first bucket of this transfer will be overwritten.
@@ -94,7 +95,7 @@ class HTTPTimeout : public LLRefCount {
 
   public:
 	HTTPTimeout(AIHTTPTimeoutPolicy const* policy, ThreadSafeBufferedCurlEasyRequest* lock_obj) :
-		mPolicy(policy), mNothingReceivedYet(true), mLowSpeedOn(false), mUploadFinished(false), mStalled((U64)-1)
+		mPolicy(policy), mNothingReceivedYet(true), mLowSpeedOn(false), mLastBytesSent(false), mUploadFinished(false), mStalled((U64)-1)
 #if defined(CWDEBUG) || defined(DEBUG_CURLIO)
 		, mLockObj(lock_obj)
 #endif
@@ -104,7 +105,7 @@ class HTTPTimeout : public LLRefCount {
 	void upload_finished(void);
 
 	// Called when data is sent. Returns true if transfer timed out.
-	bool data_sent(size_t n);
+	bool data_sent(size_t n, bool finished);
 
 	// Called when data is received. Returns true if transfer timed out.
 	bool data_received(size_t n/*,*/ ASSERT_ONLY_COMMA(bool upload_error_status = false));
@@ -112,8 +113,8 @@ class HTTPTimeout : public LLRefCount {
 	// Called immediately before done() after curl finished, with code.
 	void done(AICurlEasyRequest_wat const& curlEasyRequest_w, CURLcode code);
 
-	// Accessor.
-	bool has_stalled(void) const { return mStalled < sClockCount;  }
+	// Returns true when we REALLY timed out. Might call upload_finished heuristically.
+	bool has_stalled(void) { return mStalled < sClockCount && !maybe_upload_finished(); }
 
 	// Called from BufferedCurlEasyRequest::processOutput if a timeout occurred.
 	void print_diagnostics(CurlEasyRequest const* curl_easy_request, char const* eff_url);
@@ -127,11 +128,19 @@ class HTTPTimeout : public LLRefCount {
 	void reset_lowspeed(void);
 
 	// Common low speed detection, Called from data_sent or data_received.
-	bool lowspeed(size_t bytes);
+	bool lowspeed(size_t bytes, bool finished = false);
+
+	// Return false when we timed out on reply delay, or didn't sent all bytes yet.
+	// Otherwise calls upload_finished() and return true;
+	bool maybe_upload_finished(void);
 };
 
 } // namespace curlthread
 } // namespace AICurlPrivate
 
+#if defined(CWDEBUG) || defined(DEBUG_CURLIO)
+extern bool gCurlIo;
+#endif
+
 #endif
 
diff --git a/indra/llmessage/aihttptimeoutpolicy.cpp b/indra/llmessage/aihttptimeoutpolicy.cpp
index 1c4c1af81..d5e0dd3b6 100644
--- a/indra/llmessage/aihttptimeoutpolicy.cpp
+++ b/indra/llmessage/aihttptimeoutpolicy.cpp
@@ -88,7 +88,7 @@ U16 const AITP_default_DNS_lookup_grace = 60;			// Allow for 60 seconds long DNS
 U16 const AITP_default_maximum_connect_time = 10;		// Allow the SSL/TLS connection through a proxy, including handshakes, to take up to 10 seconds.
 U16 const AITP_default_maximum_reply_delay = 60;		// Allow the server 60 seconds to do whatever it has to do before starting to send data.
 U16 const AITP_default_low_speed_time = 30;				// If a transfer speed drops below AITP_default_low_speed_limit bytes/s for 30 seconds, terminate the transfer.
-U32 const AITP_default_low_speed_limit = 56000;			// In bytes per second (use for CURLOPT_LOW_SPEED_LIMIT).
+U32 const AITP_default_low_speed_limit = 7000;			// In bytes per second (use for CURLOPT_LOW_SPEED_LIMIT).
 U16 const AITP_default_maximum_curl_transaction = 300;	// Allow large files to be transfered over slow connections.
 U16 const AITP_default_maximum_total_delay = 600;		// Avoid "leaking" by terminating anything that wasn't completed after 10 minutes.
 
@@ -105,6 +105,7 @@ AIHTTPTimeoutPolicy& AIHTTPTimeoutPolicy::operator=(AIHTTPTimeoutPolicy const& r
   mLowSpeedLimit = rhs.mLowSpeedLimit;
   mMaximumCurlTransaction = rhs.mMaximumCurlTransaction;
   mMaximumTotalDelay = rhs.mMaximumTotalDelay;
+  changed();
   return *this;
 }
 
@@ -136,19 +137,27 @@ struct PolicyOp {
 class AIHTTPTimeoutPolicyBase : public AIHTTPTimeoutPolicy {
   private:
 	std::vector<AIHTTPTimeoutPolicy*> mDerived;	// Policies derived from this one.
+	PolicyOp const* mOp;						// Operator we applied to base to get ourselves.
 
   public:
 	AIHTTPTimeoutPolicyBase(U16 dns_lookup_grace, U16 subsequent_connects, U16 reply_delay,
 							U16 low_speed_time, U32 low_speed_limit,
 							U16 curl_transaction, U16 total_delay) :
-		AIHTTPTimeoutPolicy(NULL, dns_lookup_grace, subsequent_connects, reply_delay, low_speed_time, low_speed_limit, curl_transaction, total_delay) { }
+		AIHTTPTimeoutPolicy(NULL, dns_lookup_grace, subsequent_connects, reply_delay, low_speed_time, low_speed_limit, curl_transaction, total_delay),
+		mOp(NULL) { }
 
 	// Derive base from base.
-	AIHTTPTimeoutPolicyBase(AIHTTPTimeoutPolicyBase& rhs, PolicyOp const& op) : AIHTTPTimeoutPolicy(rhs) { op.perform(this); }
+	AIHTTPTimeoutPolicyBase(AIHTTPTimeoutPolicyBase& rhs, PolicyOp& op) : AIHTTPTimeoutPolicy(rhs), mOp(&op) { rhs.derived(this); mOp->perform(this); }
 
 	// Called for every derived policy.
 	void derived(AIHTTPTimeoutPolicy* derived) { mDerived.push_back(derived); }
 
+	// Called when our base changed.
+	/*virtual*/ void base_changed(void);
+
+	// Called when we ourselves changed.
+	/*virtual*/ void changed(void);
+
 	// Provide public acces to sDebugSettingsCurlTimeout for this compilation unit.
 	static AIHTTPTimeoutPolicyBase& getDebugSettingsCurlTimeout(void) { return sDebugSettingsCurlTimeout; }
 
@@ -157,6 +166,27 @@ class AIHTTPTimeoutPolicyBase : public AIHTTPTimeoutPolicy {
 	AIHTTPTimeoutPolicyBase& operator=(AIHTTPTimeoutPolicy const& rhs);
 };
 
+void AIHTTPTimeoutPolicyBase::base_changed(void)
+{
+  AIHTTPTimeoutPolicy::base_changed();
+  if (mOp)
+	mOp->perform(this);
+  changed();
+}
+
+void AIHTTPTimeoutPolicyBase::changed(void)
+{
+  for (std::vector<AIHTTPTimeoutPolicy*>::iterator iter = mDerived.begin(); iter != mDerived.end(); ++iter)
+	(*iter)->base_changed();
+}
+
+void AIHTTPTimeoutPolicy::changed(void)
+{
+  Dout(dc::notice, "Policy \"" << mName << "\" changed into: DNSLookup: " << mDNSLookupGrace << "; Connect: " << mMaximumConnectTime <<
+	  "; ReplyDelay: " << mMaximumReplyDelay << "; LowSpeedTime: " << mLowSpeedTime << "; LowSpeedLimit: " << mLowSpeedLimit <<
+	  "; MaxTransaction: " << mMaximumCurlTransaction << "; MaxTotalDelay:" << mMaximumTotalDelay);
+}
+
 AIHTTPTimeoutPolicy::AIHTTPTimeoutPolicy(AIHTTPTimeoutPolicy& base) :
 	mName(NULL),
 	mBase(static_cast<AIHTTPTimeoutPolicyBase*>(&base)),
@@ -192,6 +222,19 @@ AIHTTPTimeoutPolicy::AIHTTPTimeoutPolicy(char const* name, AIHTTPTimeoutPolicyBa
   mBase->derived(this);
 }
 
+void AIHTTPTimeoutPolicy::base_changed(void)
+{
+  // The same as *this = *mBase; but can't use operator= because of an assert that checks that mBase is not set.
+  mDNSLookupGrace = mBase->mDNSLookupGrace;
+  mMaximumConnectTime = mBase->mMaximumConnectTime;
+  mMaximumReplyDelay = mBase->mMaximumReplyDelay;
+  mLowSpeedTime = mBase->mLowSpeedTime;
+  mLowSpeedLimit = mBase->mLowSpeedLimit;
+  mMaximumCurlTransaction = mBase->mMaximumCurlTransaction;
+  mMaximumTotalDelay = mBase->mMaximumTotalDelay;
+  changed();
+}
+
 //static
 void AIHTTPTimeoutPolicy::setDefaultCurlTimeout(AIHTTPTimeoutPolicy const& timeout)
 {
@@ -621,7 +664,7 @@ AIHTTPTimeoutPolicyBase HTTPTimeoutPolicy_default(
 		AITP_default_maximum_curl_transaction,
 		AITP_default_maximum_total_delay);
 
-//static. Initialized here, but shortly overwritten by Debug Settings.
+//static. Initialized here, but shortly overwritten by Debug Settings (except for the crash logger, in which case these are the actual values).
 AIHTTPTimeoutPolicyBase AIHTTPTimeoutPolicy::sDebugSettingsCurlTimeout(
 		AITP_default_DNS_lookup_grace,
 		AITP_default_maximum_connect_time,
@@ -631,8 +674,8 @@ AIHTTPTimeoutPolicyBase AIHTTPTimeoutPolicy::sDebugSettingsCurlTimeout(
 		AITP_default_maximum_curl_transaction,
 		AITP_default_maximum_total_delay);
 
-// Note: Broken compiler doesn't allow as to use temporaries for the Operator ojects,
-// so they are instantiated separately.
+// Note: All operator objects (Transaction, Connect, etc) must be globals (not temporaries)!
+// To enforce this they are passes as reference to non-const (but will never be changed).
 
 // This used to be '5 seconds'.
 Transaction transactionOp5s(5);
diff --git a/indra/llmessage/aihttptimeoutpolicy.h b/indra/llmessage/aihttptimeoutpolicy.h
index 9d77ffd5c..a8bb80ca5 100644
--- a/indra/llmessage/aihttptimeoutpolicy.h
+++ b/indra/llmessage/aihttptimeoutpolicy.h
@@ -110,6 +110,12 @@ class AIHTTPTimeoutPolicy {
 	// Called when a connect to a hostname timed out.
 	static bool connect_timed_out(std::string const& hostname);
 
+	// Called when the base that this policy was based on changed.
+	virtual void base_changed(void);
+
+	// Called when we ourselves changed.
+	virtual void changed(void);
+
   protected:
 	// Used by AIHTTPTimeoutPolicyBase::AIHTTPTimeoutPolicyBase(AIHTTPTimeoutPolicyBase&).
 	AIHTTPTimeoutPolicy(AIHTTPTimeoutPolicy&);
diff --git a/indra/llmessage/llhttpclient.cpp b/indra/llmessage/llhttpclient.cpp
index 554198299..881e7652d 100644
--- a/indra/llmessage/llhttpclient.cpp
+++ b/indra/llmessage/llhttpclient.cpp
@@ -195,18 +195,20 @@ public:
 	LLAssetType::EType mAssetType;
 };
 
-static void request(
-	const std::string& url,
+//static
+void LLHTTPClient::request(
+	std::string const& url,
 	LLURLRequest::ERequestAction method,
 	Injector* body_injector,
 	LLHTTPClient::ResponderPtr responder,
 	AIHTTPHeaders& headers/*,*/
 	DEBUG_CURLIO_PARAM(EDebugCurl debug),
-	EKeepAlive keepalive = keep_alive,
-	bool is_auth = false,
-	bool no_compression = false,
-	AIStateMachine* parent = NULL,
-	AIStateMachine::state_type new_parent_state = 0)
+	EKeepAlive keepalive,
+	EDoesAuthentication does_auth,
+	EAllowCompressedReply allow_compression,
+	AIStateMachine* parent,
+	AIStateMachine::state_type new_parent_state,
+	AIEngine* default_engine)
 {
 	llassert(responder);
 
@@ -219,7 +221,7 @@ static void request(
 	LLURLRequest* req;
 	try
 	{
-		req = new LLURLRequest(method, url, body_injector, responder, headers, keepalive, is_auth, no_compression);
+		req = new LLURLRequest(method, url, body_injector, responder, headers, keepalive, does_auth, allow_compression);
 #ifdef DEBUG_CURLIO
 		req->mCurlEasyRequest.debug(debug);
 #endif
@@ -231,7 +233,7 @@ static void request(
 		return ;
 	}
 
-	req->run(parent, new_parent_state, parent != NULL);
+	req->run(parent, new_parent_state, parent != NULL, true, default_engine);
 }
 
 void LLHTTPClient::getByteRange(std::string const& url, S32 offset, S32 bytes, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
@@ -240,22 +242,22 @@ void LLHTTPClient::getByteRange(std::string const& url, S32 offset, S32 bytes, R
 	{
 		headers.addHeader("Range", llformat("bytes=%d-%d", offset, offset + bytes - 1));
 	}
-    request(url, LLURLRequest::HTTP_GET, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+    request(url, HTTP_GET, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
 
 void LLHTTPClient::head(std::string const& url, ResponderHeadersOnly* responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
-	request(url, LLURLRequest::HTTP_HEAD, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_HEAD, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
 
 void LLHTTPClient::get(std::string const& url, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
-	request(url, LLURLRequest::HTTP_GET, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_GET, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
 
 void LLHTTPClient::getHeaderOnly(std::string const& url, ResponderHeadersOnly* responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
-	request(url, LLURLRequest::HTTP_HEAD, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_HEAD, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
 
 void LLHTTPClient::get(std::string const& url, LLSD const& query, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
@@ -302,7 +304,7 @@ AIHTTPTimeoutPolicy const& LLHTTPClient::ResponderBase::getHTTPTimeoutPolicy(voi
 void LLHTTPClient::ResponderBase::decode_llsd_body(U32 status, std::string const& reason, LLChannelDescriptors const& channels, buffer_ptr_t const& buffer, LLSD& content)
 {
 	AICurlInterface::Stats::llsd_body_count++;
-	if (status == HTTP_INTERNAL_ERROR)
+	if (is_internal_http_error(status))
 	{
 		// In case of an internal error (ie, a curl error), a description of the (curl) error is the best we can do.
 		// In any case, the body if anything was received at all, can not be relied upon.
@@ -353,7 +355,7 @@ void LLHTTPClient::ResponderBase::decode_llsd_body(U32 status, std::string const
 void LLHTTPClient::ResponderBase::decode_raw_body(U32 status, std::string const& reason, LLChannelDescriptors const& channels, buffer_ptr_t const& buffer, std::string& content)
 {
 	AICurlInterface::Stats::raw_body_count++;
-	if (status == HTTP_INTERNAL_ERROR)
+	if (is_internal_http_error(status))
 	{
 		// In case of an internal error (ie, a curl error), a description of the (curl) error is the best we can do.
 		// In any case, the body if anything was received at all, can not be relied upon.
@@ -487,7 +489,8 @@ void BlockingResponder::wait(void)
 	// We're the main thread, so we have to give AIStateMachine CPU cycles.
 	while (!mFinished)
 	{
-	  AIStateMachine::mainloop();
+	  // AIFIXME: this can probably be removed once curl is detached from the main thread.
+	  gMainThreadEngine.mainloop();
 	  ms_sleep(10);
 	}
   }
@@ -609,11 +612,10 @@ static LLSD blocking_request(
 
 	responder->wait();
 
-	S32 http_status = HTTP_INTERNAL_ERROR;
 	LLSD response = LLSD::emptyMap();
 	CURLcode result = responder->result_code();
+	S32 http_status = responder->http_status();
 
-	http_status = responder->http_status();
 	bool http_success = http_status >= 200 && http_status < 300;
 	if (result == CURLE_OK && http_success)
 	{
@@ -685,17 +687,17 @@ U32 LLHTTPClient::blockingGetRaw(const std::string& url, std::string& body/*,*/
 
 void LLHTTPClient::put(std::string const& url, LLSD const& body, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
-	request(url, LLURLRequest::HTTP_PUT, new LLSDInjector(body), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_PUT, new LLSDInjector(body), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), no_keep_alive, no_does_authentication, no_allow_compressed_reply);
 }
 
 void LLHTTPClient::post(std::string const& url, LLSD const& body, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive, AIStateMachine* parent, AIStateMachine::state_type new_parent_state)
 {
-	request(url, LLURLRequest::HTTP_POST, new LLSDInjector(body), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, false, false, parent, new_parent_state);
+	request(url, HTTP_POST, new LLSDInjector(body), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, no_does_authentication, allow_compressed_reply, parent, new_parent_state);
 }
 
 void LLHTTPClient::postXMLRPC(std::string const& url, XMLRPC_REQUEST xmlrpc_request, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive)
 {
-  	request(url, LLURLRequest::HTTP_POST, new XMLRPCInjector(xmlrpc_request), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, true, false);		// Does use compression.
+  	request(url, HTTP_POST, new XMLRPCInjector(xmlrpc_request), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, does_authentication, allow_compressed_reply);
 }
 
 void LLHTTPClient::postXMLRPC(std::string const& url, char const* method, XMLRPC_VALUE value, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive)
@@ -706,33 +708,33 @@ void LLHTTPClient::postXMLRPC(std::string const& url, char const* method, XMLRPC
 	XMLRPC_RequestSetData(xmlrpc_request, value);
 	// XMLRPCInjector takes ownership of xmlrpc_request and will free it when done.
 	// LLURLRequest takes ownership of the XMLRPCInjector object and will free it when done.
-  	request(url, LLURLRequest::HTTP_POST, new XMLRPCInjector(xmlrpc_request), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, true, true);		// Does not use compression.
+  	request(url, HTTP_POST, new XMLRPCInjector(xmlrpc_request), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive, does_authentication, no_allow_compressed_reply);
 }
 
 void LLHTTPClient::postRaw(std::string const& url, char const* data, S32 size, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive)
 {
-	request(url, LLURLRequest::HTTP_POST, new RawInjector(data, size), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
+	request(url, HTTP_POST, new RawInjector(data, size), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
 }
 
 void LLHTTPClient::postFile(std::string const& url, std::string const& filename, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive)
 {
-	request(url, LLURLRequest::HTTP_POST, new FileInjector(filename), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
+	request(url, HTTP_POST, new FileInjector(filename), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
 }
 
 void LLHTTPClient::postFile(std::string const& url, LLUUID const& uuid, LLAssetType::EType asset_type, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug), EKeepAlive keepalive)
 {
-	request(url, LLURLRequest::HTTP_POST, new VFileInjector(uuid, asset_type), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
+	request(url, HTTP_POST, new VFileInjector(uuid, asset_type), responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug), keepalive);
 }
 
 // static
 void LLHTTPClient::del(std::string const& url, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
-	request(url, LLURLRequest::HTTP_DELETE, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_DELETE, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
 
 // static
 void LLHTTPClient::move(std::string const& url, std::string const& destination, ResponderPtr responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug))
 {
 	headers.addHeader("Destination", destination);
-	request(url, LLURLRequest::HTTP_MOVE, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
+	request(url, HTTP_MOVE, NULL, responder, headers/*,*/ DEBUG_CURLIO_PARAM(debug));
 }
diff --git a/indra/llmessage/llhttpclient.h b/indra/llmessage/llhttpclient.h
index 6142117b0..6dc10080c 100644
--- a/indra/llmessage/llhttpclient.h
+++ b/indra/llmessage/llhttpclient.h
@@ -46,10 +46,13 @@ class AIHTTPTimeoutPolicy;
 class LLBufferArray;
 class LLChannelDescriptors;
 class AIStateMachine;
+class Injector;
+class AIEngine;
 
 extern AIHTTPTimeoutPolicy responderIgnore_timeout;
 typedef struct _xmlrpc_request* XMLRPC_REQUEST;
 typedef struct _xmlrpc_value* XMLRPC_VALUE;
+extern AIEngine gMainThreadEngine;
 
 // Output parameter of AICurlPrivate::CurlEasyRequest::getResult.
 // Used in XMLRPCResponder.
@@ -72,6 +75,16 @@ enum EKeepAlive {
   keep_alive
 };
 
+enum EDoesAuthentication {
+  no_does_authentication = 0,
+  does_authentication
+};
+
+enum EAllowCompressedReply {
+  no_allow_compressed_reply = 0,
+  allow_compressed_reply
+};
+
 #ifdef DEBUG_CURLIO
 enum EDebugCurl {
   debug_off = 0,
@@ -84,6 +97,20 @@ enum EDebugCurl {
 
 class LLHTTPClient {
 public:
+	/** 
+	 * @brief This enumeration is for specifying the type of request.
+	 */
+	enum ERequestAction
+	{
+		INVALID,
+		HTTP_HEAD,
+		HTTP_GET,
+		HTTP_PUT,
+		HTTP_POST,
+		HTTP_DELETE,
+		HTTP_MOVE, // Caller will need to set 'Destination' header
+		REQUEST_ACTION_COUNT
+	};
 
 	/** @name Responder base classes */
 	//@{
@@ -363,7 +390,7 @@ public:
 		}
 
 	public:
-		LegacyPolledResponder(void) : mStatus(HTTP_INTERNAL_ERROR) { }
+		LegacyPolledResponder(void) : mStatus(HTTP_INTERNAL_ERROR_OTHER) { }
 
 		// Accessors.
 		U32 http_status(void) const { return mStatus; }
@@ -393,6 +420,21 @@ public:
 
 	//@}
 
+	/** General API to request a transfer. */
+	static void request(
+		std::string const& url,
+		ERequestAction method,
+		Injector* body_injector,
+		ResponderPtr responder,
+		AIHTTPHeaders& headers/*,*/
+		DEBUG_CURLIO_PARAM(EDebugCurl debug),
+		EKeepAlive keepalive = keep_alive,
+		EDoesAuthentication does_auth = no_does_authentication,
+		EAllowCompressedReply allow_compression = allow_compressed_reply,
+		AIStateMachine* parent = NULL,
+		/*AIStateMachine::state_type*/ U32 new_parent_state = 0,
+		AIEngine* default_engine = &gMainThreadEngine);
+
 	/** @name non-blocking API */
 	//@{
 	static void head(std::string const& url, ResponderHeadersOnly* responder, AIHTTPHeaders& headers/*,*/ DEBUG_CURLIO_PARAM(EDebugCurl debug = debug_off));
diff --git a/indra/llmessage/llurlrequest.cpp b/indra/llmessage/llurlrequest.cpp
index 8893972c6..c8be0939b 100644
--- a/indra/llmessage/llurlrequest.cpp
+++ b/indra/llmessage/llurlrequest.cpp
@@ -46,7 +46,6 @@
 #include "llscopedvolatileaprpool.h"
 #include "llfasttimer.h"
 #include "message.h"
-static const U32 HTTP_STATUS_PIPE_ERROR = 499;
 
 /**
  * String constants
@@ -60,7 +59,7 @@ const std::string CONTEXT_TRANSFERED_BYTES("transfered_bytes");
 // static
 std::string LLURLRequest::actionAsVerb(LLURLRequest::ERequestAction action)
 {
-	static int const array_size = HTTP_MOVE + 1;	// INVALID == 0
+	static int const array_size = LLHTTPClient::REQUEST_ACTION_COUNT;	// INVALID == 0
 	static char const* const VERBS[array_size] =
 	{
 		"(invalid)",
@@ -71,14 +70,14 @@ std::string LLURLRequest::actionAsVerb(LLURLRequest::ERequestAction action)
 		"DELETE",
 		"MOVE"
 	};
-	return VERBS[action >= array_size ? INVALID : action];
+	return VERBS[action >= array_size ? LLHTTPClient::INVALID : action];
 }
 
 // This might throw AICurlNoEasyHandle.
 LLURLRequest::LLURLRequest(LLURLRequest::ERequestAction action, std::string const& url, Injector* body,
-	LLHTTPClient::ResponderPtr responder, AIHTTPHeaders& headers, bool keepalive, bool is_auth, bool no_compression) :
-    mAction(action), mURL(url), mKeepAlive(keepalive), mIsAuth(is_auth), mNoCompression(no_compression),
-	mBody(body), mResponder(responder), mHeaders(headers)
+	LLHTTPClient::ResponderPtr responder, AIHTTPHeaders& headers, bool keepalive, bool is_auth, bool compression) :
+    mAction(action), mURL(url), mKeepAlive(keepalive), mIsAuth(is_auth), mNoCompression(!compression),
+	mBody(body), mResponder(responder), mHeaders(headers), mResponderNameCache(responder ? responder->getName() : "<uninitialized>")
 {
 }
 
@@ -93,13 +92,13 @@ void LLURLRequest::initialize_impl(void)
 		useProxy(false);
 	}
 
-	if (mAction == HTTP_PUT || mAction == HTTP_POST)
+	if (mAction == LLHTTPClient::HTTP_PUT || mAction == LLHTTPClient::HTTP_POST)
 	{
 		// If the Content-Type header was passed in we defer to the caller's wisdom,
 		// but if they did not specify a Content-Type, then ask the injector.
 		mHeaders.addHeader("Content-Type", mBody->contentType(), AIHTTPHeaders::keep_existing_header);
 	}
-	else if (mAction != HTTP_HEAD)
+	else if (mAction != LLHTTPClient::HTTP_HEAD)
 	{
 		// Check to see if we have already set Accept or not. If no one
 		// set it, set it to application/llsd+xml since that's what we
@@ -107,7 +106,7 @@ void LLURLRequest::initialize_impl(void)
 		mHeaders.addHeader("Accept", "application/llsd+xml", AIHTTPHeaders::keep_existing_header);
 	}
 
-	if (mAction == HTTP_POST && gMessageSystem)
+	if (mAction == LLHTTPClient::HTTP_POST && gMessageSystem)
 	{
 		mHeaders.addHeader("X-SecondLife-UDP-Listen-Port", llformat("%d", gMessageSystem->mPort));
 	}
@@ -199,12 +198,12 @@ bool LLURLRequest::configure(AICurlEasyRequest_wat const& curlEasyRequest_w)
 	{
 		switch(mAction)
 		{
-		case HTTP_HEAD:
+		case LLHTTPClient::HTTP_HEAD:
 			curlEasyRequest_w->setopt(CURLOPT_NOBODY, 1);
 			rv = true;
 			break;
 
-		case HTTP_GET:
+		case LLHTTPClient::HTTP_GET:
 			curlEasyRequest_w->setopt(CURLOPT_HTTPGET, 1);
 
 			// Set Accept-Encoding to allow response compression
@@ -212,18 +211,18 @@ bool LLURLRequest::configure(AICurlEasyRequest_wat const& curlEasyRequest_w)
 			rv = true;
 			break;
 
-		case HTTP_PUT:
-		{
-			// Disable the expect http 1.1 extension. POST and PUT default
-			// to using this, causing the broken server to get confused.
-			curlEasyRequest_w->addHeader("Expect:");
-			curlEasyRequest_w->setopt(CURLOPT_UPLOAD, 1);
-			curlEasyRequest_w->setopt(CURLOPT_INFILESIZE, mBodySize);
+		case LLHTTPClient::HTTP_PUT:
+
+			// Set the handle for an http put
+			curlEasyRequest_w->setPut(mBodySize, mKeepAlive);
+
+			// Set Accept-Encoding to allow response compression
+			curlEasyRequest_w->setoptString(CURLOPT_ENCODING, mNoCompression ? "identity" : "");
 			rv = true;
 			break;
-		}
-		case HTTP_POST:
-		{
+
+		case LLHTTPClient::HTTP_POST:
+
 			// Set the handle for an http post
 			curlEasyRequest_w->setPost(mBodySize, mKeepAlive);
 
@@ -231,14 +230,14 @@ bool LLURLRequest::configure(AICurlEasyRequest_wat const& curlEasyRequest_w)
 			curlEasyRequest_w->setoptString(CURLOPT_ENCODING, mNoCompression ? "identity" : "");
 			rv = true;
 			break;
-		}
-		case HTTP_DELETE:
+
+		case LLHTTPClient::HTTP_DELETE:
 			// Set the handle for an http post
 			curlEasyRequest_w->setoptString(CURLOPT_CUSTOMREQUEST, "DELETE");
 			rv = true;
 			break;
 
-		case HTTP_MOVE:
+		case LLHTTPClient::HTTP_MOVE:
 			// Set the handle for an http post
 			curlEasyRequest_w->setoptString(CURLOPT_CUSTOMREQUEST, "MOVE");
 			rv = true;
@@ -259,3 +258,33 @@ bool LLURLRequest::configure(AICurlEasyRequest_wat const& curlEasyRequest_w)
 	}
 	return rv;
 }
+
+// Called from AIStateMachine::mainloop, but put here because we don't want to include llurlrequest.h there of course.
+void print_statemachine_diagnostics(U64 total_clocks, U64 max_delta, AIEngine::queued_type::const_reference slowest_element)
+{
+  AIStateMachine const& slowest_state_machine = slowest_element.statemachine();
+  LLURLRequest const* request = dynamic_cast<LLURLRequest const*>(&slowest_state_machine);
+  F64 const tfactor = 1000 / calc_clock_frequency();
+  std::ostringstream msg;
+  if (total_clocks > max_delta)
+  {
+	  msg << "AIStateMachine::mainloop did run for " << (total_clocks * tfactor) << " ms. The slowest ";
+  }
+  else
+  {
+	  msg << "AIStateMachine::mainloop: A ";
+  }
+  msg << "state machine ";
+  if (request)
+  {
+	  msg << "(" << request->getResponderName() << ") ";
+  }
+  msg << "ran for " << (max_delta * tfactor) << " ms";
+  if (slowest_state_machine.getRuntime() > max_delta)
+  {
+	  msg << " (" << (slowest_state_machine.getRuntime() * tfactor) << " ms in total now)";
+  }
+  msg << ".";
+  llwarns << msg.str() << llendl;
+}
+
diff --git a/indra/llmessage/llurlrequest.h b/indra/llmessage/llurlrequest.h
index 9cdb132bb..1f9c09914 100644
--- a/indra/llmessage/llurlrequest.h
+++ b/indra/llmessage/llurlrequest.h
@@ -51,20 +51,7 @@ class Injector
 
 class LLURLRequest : public AICurlEasyRequestStateMachine {
   public:
-	/** 
-	 * @brief This enumeration is for specifying the type of request.
-	 */
-	enum ERequestAction
-	{
-		INVALID,
-		HTTP_HEAD,
-		HTTP_GET,
-		HTTP_PUT,
-		HTTP_POST,
-		HTTP_DELETE,
-		HTTP_MOVE, // Caller will need to set 'Destination' header
-		REQUEST_ACTION_COUNT
-	};
+	typedef LLHTTPClient::ERequestAction ERequestAction;
 
 	/**
 	 * @brief Turn the request action into an http verb.
@@ -79,6 +66,11 @@ class LLURLRequest : public AICurlEasyRequestStateMachine {
 	 */
 	LLURLRequest(ERequestAction action, std::string const& url, Injector* body, LLHTTPClient::ResponderPtr responder, AIHTTPHeaders& headers, bool keepalive, bool is_auth, bool no_compression);
 
+	/**
+	 * @brief Cached value of responder->getName() as passed to the constructor.
+	 */
+	char const* getResponderName(void) const { return mResponderNameCache; }
+
   protected:
 	// Call abort(), not delete.
 	/*virtual*/ ~LLURLRequest() { }
@@ -118,6 +110,7 @@ class LLURLRequest : public AICurlEasyRequestStateMachine {
 	U32 mBodySize;
 	LLHTTPClient::ResponderPtr mResponder;
 	AIHTTPHeaders mHeaders;
+	char const* mResponderNameCache;
 
   protected:
 	// Handle initializing the object.
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index 2fcca4dd5..2b7f2fb86 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -4492,17 +4492,6 @@ This should be as low as possible, but too low may break functionality</string>
       <key>Value</key>
       <integer>16</integer>
     </map>
-    <key>CurlMaximumNumberOfHandles</key>
-    <map>
-      <key>Comment</key>
-      <string>Maximum number of handles curl can use (requires restart)</string>
-      <key>Persist</key>
-      <integer>1</integer>
-      <key>Type</key>
-      <string>S32</string>
-      <key>Value</key>
-      <integer>256</integer>
-    </map>
     <key>CurlTimeoutDNSLookup</key>
     <map>
       <key>Comment</key>
@@ -4545,7 +4534,7 @@ This should be as low as possible, but too low may break functionality</string>
       <key>Type</key>
       <string>U32</string>
       <key>Value</key>
-      <real>56000</real>
+      <real>7000</real>
     </map>
     <key>CurlTimeoutLowSpeedTime</key>
     <map>
diff --git a/indra/newview/linux_tools/launch_url.sh b/indra/newview/linux_tools/launch_url.sh
index 404ea36f2..c528ee762 100755
--- a/indra/newview/linux_tools/launch_url.sh
+++ b/indra/newview/linux_tools/launch_url.sh
@@ -45,6 +45,18 @@ if [ ! -z "$XBROWSER" ]; then
     echo "$0: Trying some others..."
 fi
 
+# Launcher for any desktop.
+if which xdg-open >/dev/null;  then
+    xdg-open "$URL"
+	case $? in
+	  0) exit ;;
+	  1) echo "xdg-open: Error in command line syntax." ;;
+	  2) echo "xdg-open: One of the files passed on the command line did not exist." ;;
+	  3) echo "xdg-open: A required tool could not be found." ;;
+	  4) echo "xdg-open: The action failed." ;;
+	esac
+fi
+
 # Launcher the default GNOME browser.
 if [ ! -z "$GNOME_DESKTOP_SESSION_ID" ] && which gnome-open >/dev/null; then
     gnome-open "$URL" &
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index e3e300a7d..97e0ce24e 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -234,6 +234,9 @@ extern BOOL gRandomizeFramerate;
 extern BOOL gPeriodicSlowFrame;
 extern BOOL gDebugGL;
 
+extern void startEngineThread(void);
+extern void stopEngineThread(void);
+
 ////////////////////////////////////////////////////////////
 // All from the last globals push...
 const F32 DEFAULT_AFK_TIMEOUT = 5.f * 60.f; // time with no input before user flagged as Away From Keyboard
@@ -657,7 +660,7 @@ bool LLAppViewer::init()
 
     mAlloc.setProfilingEnabled(gSavedSettings.getBOOL("MemProfiling"));
 
-	AIStateMachine::setMaxCount(gSavedSettings.getU32("StateMachineMaxTime"));
+	AIEngine::setMaxCount(gSavedSettings.getU32("StateMachineMaxTime"));
 
 	{
 		AIHTTPTimeoutPolicy policy_tmp(
@@ -1824,6 +1827,7 @@ bool LLAppViewer::cleanup()
 	llinfos << "Message system deleted." << llendflush;
 
 	LLApp::stopErrorThread();			// The following call is not thread-safe. Have to stop all threads.
+	stopEngineThread();
 	AICurlInterface::cleanupCurl();
 
 	// Cleanup settings last in case other classes reference them.
@@ -1901,6 +1905,9 @@ bool LLAppViewer::initThreads()
 		LLWatchdog::getInstance()->init(watchdog_killer_callback);
 	}
 
+	// State machine thread.
+	startEngineThread();
+
 	AICurlInterface::startCurlThread(gSavedSettings.getU32("CurlMaxTotalConcurrentConnections"),
 		                             gSavedSettings.getU32("CurlConcurrentConnectionsPerHost"),
 		                             gSavedSettings.getBOOL("NoVerifySSLCert"));
@@ -3854,7 +3861,7 @@ void LLAppViewer::idle()
 
 	{
 		LLFastTimer t(FTM_STATEMACHINE);
-		AIStateMachine::mainloop();
+		gMainThreadEngine.mainloop();
 	}
 
 	// Must wait until both have avatar object and mute list, so poll
diff --git a/indra/newview/llfloatersnapshot.cpp b/indra/newview/llfloatersnapshot.cpp
index 2671f0543..d4830adbf 100644
--- a/indra/newview/llfloatersnapshot.cpp
+++ b/indra/newview/llfloatersnapshot.cpp
@@ -995,18 +995,8 @@ LLSnapshotLivePreview::EAspectSizeProblem LLSnapshotLivePreview::getAspectSizePr
 	// llround(window_height * scale_factor) respectively (since we set uncrop = true).
 	F32 raw_aspect = (F32)mRawSnapshotWidth / mRawSnapshotHeight;
 	// The smaller dimension might have been rounded up to 0.5 up or down. Calculate upper and lower limits.
-	F32 lower_raw_aspect;
-	F32 upper_raw_aspect;
-	if (mRawSnapshotWidth < mRawSnapshotHeight)
-	{
-		lower_raw_aspect = (mRawSnapshotWidth - 0.5) / mRawSnapshotHeight;
-		upper_raw_aspect = (mRawSnapshotWidth + 0.5) / mRawSnapshotHeight;
-	}
-	else
-	{
-		lower_raw_aspect = mRawSnapshotWidth / (mRawSnapshotHeight + 0.5);
-		upper_raw_aspect = mRawSnapshotWidth / (mRawSnapshotHeight - 0.5);
-	}
+	F32 lower_raw_aspect = (mRawSnapshotWidth - 0.5) / (mRawSnapshotHeight + 0.5);
+	F32 upper_raw_aspect = (mRawSnapshotWidth + 0.5) / (mRawSnapshotHeight - 0.5);
 	// Find out if mRawSnapshot was cropped already.
 	bool const allow_vertical_crop = window_height * upper_raw_aspect >= window_width;			// mRawSnapshot was cropped horizontally.
 	bool const allow_horizontal_crop = window_width / lower_raw_aspect >= window_height;		// mRawSnapshot was cropped vertically.
diff --git a/indra/newview/llinventorymodelbackgroundfetch.cpp b/indra/newview/llinventorymodelbackgroundfetch.cpp
index 9e27b1ad4..3dce0c96a 100644
--- a/indra/newview/llinventorymodelbackgroundfetch.cpp
+++ b/indra/newview/llinventorymodelbackgroundfetch.cpp
@@ -547,7 +547,7 @@ void LLInventoryModelFetchDescendentsResponder::error(U32 status, const std::str
 						
 	fetcher->incrFetchCount(-1);
 
-	if (status==499) // timed out
+	if (is_internal_http_error_that_warrants_a_retry(status)) // timed out
 	{
 		for(LLSD::array_const_iterator folder_it = mRequestSD["folders"].beginArray();
 			folder_it != mRequestSD["folders"].endArray();
diff --git a/indra/newview/llmarketplacefunctions.cpp b/indra/newview/llmarketplacefunctions.cpp
index 0b19efde6..7db143bc2 100644
--- a/indra/newview/llmarketplacefunctions.cpp
+++ b/indra/newview/llmarketplacefunctions.cpp
@@ -172,6 +172,7 @@ namespace LLMarketplaceImport
 
 			if ((status == MarketplaceErrorCodes::IMPORT_REDIRECT) ||
 				(status == MarketplaceErrorCodes::IMPORT_AUTHENTICATION_ERROR) ||
+				(status == MarketplaceErrorCodes::IMPORT_JOB_LOW_SPEED) ||
 				(status == MarketplaceErrorCodes::IMPORT_JOB_TIMEOUT))
 			{
 				if (gSavedSettings.getBOOL("InventoryOutboxLogging"))
@@ -228,6 +229,7 @@ namespace LLMarketplaceImport
 			}
 
 			if ((status == MarketplaceErrorCodes::IMPORT_AUTHENTICATION_ERROR) ||
+				(status == MarketplaceErrorCodes::IMPORT_JOB_LOW_SPEED) ||
 				(status == MarketplaceErrorCodes::IMPORT_JOB_TIMEOUT))
 			{
 				if (gSavedSettings.getBOOL("InventoryOutboxLogging"))
diff --git a/indra/newview/llmarketplacefunctions.h b/indra/newview/llmarketplacefunctions.h
index 4c4e07903..af7c864c0 100644
--- a/indra/newview/llmarketplacefunctions.h
+++ b/indra/newview/llmarketplacefunctions.h
@@ -35,6 +35,7 @@
 
 #include "llsingleton.h"
 #include "llstring.h"
+#include "llhttpstatuscodes.h"
 
 
 LLSD getMarketplaceStringSubstitutions();
@@ -44,13 +45,14 @@ namespace MarketplaceErrorCodes
 {
 	enum eCode
 	{
-		IMPORT_DONE = 200,
-		IMPORT_PROCESSING = 202,
-		IMPORT_REDIRECT = 302,
-		IMPORT_AUTHENTICATION_ERROR = 401,
-		IMPORT_DONE_WITH_ERRORS = 409,
-		IMPORT_JOB_FAILED = 410,
-		IMPORT_JOB_TIMEOUT = 499,
+		IMPORT_DONE = HTTP_OK,
+		IMPORT_PROCESSING = HTTP_ACCEPTED,
+		IMPORT_REDIRECT = HTTP_FOUND,
+		IMPORT_AUTHENTICATION_ERROR = HTTP_UNAUTHORIZED,
+		IMPORT_DONE_WITH_ERRORS = HTTP_CONFLICT,
+		IMPORT_JOB_FAILED = HTTP_GONE,
+		IMPORT_JOB_LOW_SPEED = HTTP_INTERNAL_ERROR_LOW_SPEED,
+		IMPORT_JOB_TIMEOUT = HTTP_INTERNAL_ERROR_CURL_TIMEOUT
 	};
 }
 
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index 32fad66b2..88f6c8e38 100644
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -1351,17 +1351,17 @@ void AIMeshUpload::initialize_impl()
 	set_state(AIMeshUpload_start);
 }
 
-void AIMeshUpload::multiplex_impl()
+void AIMeshUpload::multiplex_impl(state_type run_state)
 {
-	switch (mRunState)
+	switch (run_state)
 	{
 		case AIMeshUpload_start:
 			mMeshUpload.run(this, AIMeshUpload_threadFinished);
-			idle(AIMeshUpload_start);					// Wait till the thread finished.
+			idle();										// Wait till the thread finished.
 			break;
 		case AIMeshUpload_threadFinished:
 			mMeshUpload->postRequest(mWholeModelUploadURL, this);
-			idle(AIMeshUpload_threadFinished);			// Wait till the responder finished.
+			idle();										// Wait till the responder finished.
 			break;
 		case AIMeshUpload_responderFinished:
 			finish();
@@ -1402,14 +1402,6 @@ void LLMeshUploadThread::postRequest(std::string& whole_model_upload_url, AIMesh
 	}
 }
 
-void AIMeshUpload::abort_impl()
-{
-}
-
-void AIMeshUpload::finish_impl()
-{
-}
-
 void dump_llsd_to_file(const LLSD& content, std::string filename)
 {
 	if (gSavedSettings.getBOOL("MeshUploadLogXML"))
@@ -1788,7 +1780,7 @@ void LLMeshLODResponder::completedRaw(U32 status, const std::string& reason,
 
 	if (data_size < (S32)mRequestedBytes)
 	{
-		if (status == 499 || status == 503)
+		if (is_internal_http_error_that_warrants_a_retry(status) || status == HTTP_SERVICE_UNAVAILABLE)
 		{	//timeout or service unavailable, try again
 			LLMeshRepository::sHTTPRetryCount++;
 			gMeshRepo.mThread->loadMeshLOD(mMeshParams, mLOD);
@@ -1842,7 +1834,7 @@ void LLMeshSkinInfoResponder::completedRaw(U32 status, const std::string& reason
 
 	if (data_size < (S32)mRequestedBytes)
 	{
-		if (status == 499 || status == 503)
+		if (is_internal_http_error_that_warrants_a_retry(status) || status == HTTP_SERVICE_UNAVAILABLE)
 		{	//timeout or service unavailable, try again
 			LLMeshRepository::sHTTPRetryCount++;
 			gMeshRepo.mThread->loadMeshSkinInfo(mMeshID);
@@ -1896,7 +1888,7 @@ void LLMeshDecompositionResponder::completedRaw(U32 status, const std::string& r
 
 	if (data_size < (S32)mRequestedBytes)
 	{
-		if (status == 499 || status == 503)
+		if (is_internal_http_error_that_warrants_a_retry(status) || status == HTTP_SERVICE_UNAVAILABLE)
 		{	//timeout or service unavailable, try again
 			LLMeshRepository::sHTTPRetryCount++;
 			gMeshRepo.mThread->loadMeshDecomposition(mMeshID);
@@ -1950,7 +1942,7 @@ void LLMeshPhysicsShapeResponder::completedRaw(U32 status, const std::string& re
 
 	if (data_size < (S32)mRequestedBytes)
 	{
-		if (status == 499 || status == 503)
+		if (is_internal_http_error_that_warrants_a_retry(status) || status == HTTP_SERVICE_UNAVAILABLE)
 		{	//timeout or service unavailable, try again
 			LLMeshRepository::sHTTPRetryCount++;
 			gMeshRepo.mThread->loadMeshPhysicsShape(mMeshID);
@@ -2001,13 +1993,13 @@ void LLMeshHeaderResponder::completedRaw(U32 status, const std::string& reason,
 		//	<< "Header responder failed with status: "
 		//	<< status << ": " << reason << llendl;
 
-		// 503 (service unavailable) or 499 (timeout)
+		// HTTP_SERVICE_UNAVAILABLE (503) or HTTP_INTERNAL_ERROR_*'s.
 		// can be due to server load and can be retried
 
 		// TODO*: Add maximum retry logic, exponential backoff
 		// and (somewhat more optional than the others) retries
 		// again after some set period of time
-		if (status == 503 || status == 499)
+		if (is_internal_http_error_that_warrants_a_retry(status) || status == HTTP_SERVICE_UNAVAILABLE)
 		{	//retry
 			LLMeshRepository::sHTTPRetryCount++;
 			LLMeshRepoThread::HeaderRequest req(mMeshParams);
diff --git a/indra/newview/llmeshrepository.h b/indra/newview/llmeshrepository.h
index 3db85edf9..d26316a85 100644
--- a/indra/newview/llmeshrepository.h
+++ b/indra/newview/llmeshrepository.h
@@ -445,11 +445,9 @@ public:
 
 protected:
 	// Implement AIStateMachine.
-	/*virtual*/ const char* state_str_impl(state_type) const;
+	/*virtual*/ const char* state_str_impl(state_type run_state) const;
 	/*virtual*/ void initialize_impl();
-	/*virtual*/ void multiplex_impl();
-	/*virtual*/ void abort_impl();
-	/*virtual*/ void finish_impl();
+	/*virtual*/ void multiplex_impl(state_type run_state);
 };
 
 class LLMeshRepository
diff --git a/indra/newview/llpanelplace.cpp b/indra/newview/llpanelplace.cpp
index 2afff003e..5bbf93ed3 100644
--- a/indra/newview/llpanelplace.cpp
+++ b/indra/newview/llpanelplace.cpp
@@ -203,16 +203,25 @@ void LLPanelPlace::setLandTypeString(const std::string& land_type)
 
 void LLPanelPlace::setErrorStatus(U32 status, const std::string& reason)
 {
-	// We only really handle 404 and 499 errors
+	// We only really handle 404 and timeout errors
 	std::string error_text;
-	if(status == 404)
+	if (status == HTTP_NOT_FOUND)
 	{	
 		error_text = getString("server_error_text");
 	}
-	else if(status == 499)
+	else if (status == HTTP_UNAUTHORIZED)		// AIFIXME: Is this indeed the error we get when we don't have access rights for this?
 	{
 		error_text = getString("server_forbidden_text");
 	}
+	else if (status == HTTP_INTERNAL_ERROR_LOW_SPEED || status == HTTP_INTERNAL_ERROR_CURL_TIMEOUT)
+	{
+		error_text = getString("internal_timeout_text");
+	}
+	else
+	{
+		llwarns << "Unexpected error (" << status << "): " << reason << llendl;
+		error_text = llformat("Unexpected Error (%u): %s", status, reason.c_str());
+	}
 	mDescEditor->setText(error_text);
 }
 
diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp
index 2df9556a2..3ecc2aa4d 100644
--- a/indra/newview/llstartup.cpp
+++ b/indra/newview/llstartup.cpp
@@ -261,10 +261,6 @@ extern S32 gStartImageHeight;
 // local globals
 //
 
-#if defined(CWDEBUG) || defined(DEBUG_CURLIO)
-static bool gCurlIo;
-#endif
-
 static LLHost gAgentSimHost;
 static BOOL gSkipOptionalUpdate = FALSE;
 
diff --git a/indra/newview/lltexturefetch.cpp b/indra/newview/lltexturefetch.cpp
index 33065e3c4..1d3634573 100644
--- a/indra/newview/lltexturefetch.cpp
+++ b/indra/newview/lltexturefetch.cpp
@@ -1334,18 +1334,17 @@ bool LLTextureFetchWorker::doWork(S32 param)
 					mRequestedOffset--;
 				}
 
-				try
-				{
-					// Will call callbackHttpGet when curl request completes
-					AIHTTPHeaders headers("Accept", "image/x-j2c");
-					LLHTTPClient::getByteRange(mUrl, mRequestedOffset, mRequestedSize,
-												new HTTPGetResponder(mFetcher, mID, LLTimer::getTotalTime(), mRequestedSize, mRequestedOffset, true), headers);
-					res = true;
-				}
-				catch(AICurlNoEasyHandle const& error)
-				{
-					llwarns << error.what() << llendl;
-				}
+				  // Will call callbackHttpGet when curl request completes
+				  AIHTTPHeaders headers("Accept", "image/x-j2c");
+				  // Call LLHTTPClient::request directly instead of LLHTTPClient::getByteRange, because we want to pass a NULL AIEngine.
+				  if (mRequestedOffset > 0 || mRequestedSize > 0)
+				  {
+					  headers.addHeader("Range", llformat("bytes=%d-%d", mRequestedOffset, mRequestedOffset + mRequestedSize - 1));
+				  }
+				  LLHTTPClient::request(mUrl, LLHTTPClient::HTTP_GET, NULL,
+					  new HTTPGetResponder(mFetcher, mID, LLTimer::getTotalTime(), mRequestedSize, mRequestedOffset, true),
+					  headers/*,*/ DEBUG_CURLIO_PARAM(false), keep_alive, no_does_authentication, allow_compressed_reply, NULL, 0, NULL);
+				  res = true;
 			}
 			if (!res)
 			{
@@ -1370,14 +1369,21 @@ bool LLTextureFetchWorker::doWork(S32 param)
 			if (mRequestedSize < 0)
 			{
 				S32 max_attempts;
-				if (mGetStatus == HTTP_NOT_FOUND || mGetStatus == 499)
+				if (mGetStatus == HTTP_NOT_FOUND || mGetStatus == HTTP_INTERNAL_ERROR_CURL_TIMEOUT || mGetStatus == HTTP_INTERNAL_ERROR_LOW_SPEED)
 				{
 					mHTTPFailCount = max_attempts = 1; // Don't retry
 					if(mGetStatus == HTTP_NOT_FOUND)
 						llwarns << "Texture missing from server (404): " << mUrl << llendl;
-					else if (mGetStatus == 499) 
+					else if (mGetStatus == HTTP_INTERNAL_ERROR_CURL_TIMEOUT || mGetStatus == HTTP_INTERNAL_ERROR_LOW_SPEED)
 					{
-						llwarns << "No response from server (499): " << mUrl << llendl;
+						if (mGetStatus == HTTP_INTERNAL_ERROR_CURL_TIMEOUT)
+						{
+							llwarns << "No response from server (HTTP_INTERNAL_ERROR_CURL_TIMEOUT): " << mUrl << llendl;
+						}
+						else
+						{
+							llwarns << "Slow response from server (HTTP_INTERNAL_ERROR_LOW_SPEED): " << mUrl << llendl;
+						}
 						SGHostBlackList::add(mUrl, 60.0, mGetStatus);
 					}
 					//roll back to try UDP
diff --git a/indra/newview/llviewercontrol.cpp b/indra/newview/llviewercontrol.cpp
index 76646c83e..05d5836d6 100644
--- a/indra/newview/llviewercontrol.cpp
+++ b/indra/newview/llviewercontrol.cpp
@@ -134,7 +134,7 @@ static bool handleTerrainScaleChanged(const LLSD& inputvalue)
 bool handleStateMachineMaxTimeChanged(const LLSD& newvalue)
 {
 	F32 StateMachineMaxTime = newvalue.asFloat();
-	AIStateMachine::setMaxCount(StateMachineMaxTime);
+	AIEngine::setMaxCount(StateMachineMaxTime);
 	return true;
 }
 
diff --git a/indra/newview/llwebprofile.cpp b/indra/newview/llwebprofile.cpp
index b6f8e0a65..7286766c6 100644
--- a/indra/newview/llwebprofile.cpp
+++ b/indra/newview/llwebprofile.cpp
@@ -97,7 +97,7 @@ public:
 		}
 
 		// *TODO: 404 = not supported by the grid
-		// *TODO: increase timeout or handle 499 Expired
+		// *TODO: increase timeout or handle HTTP_INTERNAL_ERROR_* time errors.
 
 		// Convert config to LLSD.
 		const Json::Value data = root["data"];
diff --git a/indra/newview/llxmlrpcresponder.h b/indra/newview/llxmlrpcresponder.h
index 25615f236..3c32cd2a6 100644
--- a/indra/newview/llxmlrpcresponder.h
+++ b/indra/newview/llxmlrpcresponder.h
@@ -35,7 +35,6 @@
 #define LLXMLRPCRESPONDER_H
 
 #include <string>
-#include "llurlrequest.h"	// Injector
 #include "llcurl.h"
 #include "llhttpstatuscodes.h"
 
diff --git a/indra/newview/skins/default/xui/en-us/panel_place.xml b/indra/newview/skins/default/xui/en-us/panel_place.xml
index f3ccd14ce..03670395e 100644
--- a/indra/newview/skins/default/xui/en-us/panel_place.xml
+++ b/indra/newview/skins/default/xui/en-us/panel_place.xml
@@ -52,4 +52,7 @@
 	<string name="server_forbidden_text">
 		Information about this location is unavailable due to access restrictions.  Please check your permissions with the parcel owner.
 	</string>
+	<string name="internal_timeout_text">
+		Information about this location is unavailable due to a network timeout, please try again later.
+	</string>
 </panel>
diff --git a/indra/newview/statemachine/aievent.cpp b/indra/newview/statemachine/aievent.cpp
index 8682f7670..aa39120af 100644
--- a/indra/newview/statemachine/aievent.cpp
+++ b/indra/newview/statemachine/aievent.cpp
@@ -76,7 +76,6 @@ typedef AIAccess<AIRegisteredStateMachines> registered_statemachines_wat;
 // static
 void AIEvent::Register(AIEvents event, AIStateMachine* statemachine, bool one_shot)
 {
-	statemachine->idle();
 	registered_statemachines_wat registered_statemachines_w(registered_statemachines_list[event]);
 	registered_statemachines_w->Register(statemachine, one_shot);
 }
diff --git a/indra/newview/statemachine/aifetchinventoryfolder.cpp b/indra/newview/statemachine/aifetchinventoryfolder.cpp
index 1302c1a3a..d8e952c0d 100644
--- a/indra/newview/statemachine/aifetchinventoryfolder.cpp
+++ b/indra/newview/statemachine/aifetchinventoryfolder.cpp
@@ -49,19 +49,20 @@ class AIInventoryFetchDescendentsObserver : public LLInventoryFetchDescendentsOb
   protected:
 	/*virtual*/ void done()
 	{
-	  mStateMachine->set_state(AIFetchInventoryFolder_folderCompleted);
+	  mStateMachine->advance_state(AIFetchInventoryFolder_folderCompleted);
 	  delete this;
 	}
 
   private:
-	AIStateMachine* mStateMachine;
+	LLPointer<AIStateMachine> mStateMachine;
 };
 
 AIInventoryFetchDescendentsObserver::AIInventoryFetchDescendentsObserver(AIStateMachine* statemachine, LLUUID const& folder) : 
 	mStateMachine(statemachine),
 	LLInventoryFetchDescendentsObserver(folder)
 {
-	mStateMachine->idle();
+	// Call idle() on the parent state machine before passing it.
+	llassert(mStateMachine->waiting());
 	startFetch();
 	if(isFinished())
 	{
@@ -97,14 +98,15 @@ void AIFetchInventoryFolder::initialize_impl(void)
   set_state(AIFetchInventoryFolder_checkFolderExists);
   if (!gInventory.isInventoryUsable())
   {
-	// This immediately calls this->idle(), and then when the event occurs cont().
+	idle();
+	// This calls this->cont() when the event occurs.
 	AIEvent::Register(AIEvent::LLInventoryModel_mIsAgentInvUsable_true, this);
   }
 }
 
-void AIFetchInventoryFolder::multiplex_impl(void)
+void AIFetchInventoryFolder::multiplex_impl(state_type run_state)
 {
-  switch (mRunState)
+  switch (run_state)
   {
 	case AIFetchInventoryFolder_checkFolderExists:
 	{
@@ -172,6 +174,7 @@ void AIFetchInventoryFolder::multiplex_impl(void)
 	}
 	case AIFetchInventoryFolder_fetchDescendents:
 	{
+	  idle();	// Wait till the state is set to AIFetchInventoryFolder_folderCompleted.
 	  // This sets the state to AIFetchInventoryFolder_folderCompleted once the folder is complete.
 	  new AIInventoryFetchDescendentsObserver(this, mFolderUUID);
 	  break;
@@ -193,10 +196,6 @@ void AIFetchInventoryFolder::multiplex_impl(void)
   }
 }
 
-void AIFetchInventoryFolder::abort_impl(void)
-{
-}
-
 void AIFetchInventoryFolder::finish_impl(void)
 {
   if (mNeedNotifyObservers)
diff --git a/indra/newview/statemachine/aifetchinventoryfolder.h b/indra/newview/statemachine/aifetchinventoryfolder.h
index 564dce16e..2b10c0351 100644
--- a/indra/newview/statemachine/aifetchinventoryfolder.h
+++ b/indra/newview/statemachine/aifetchinventoryfolder.h
@@ -138,10 +138,7 @@ class AIFetchInventoryFolder : public AIStateMachine {
 	/*virtual*/ void initialize_impl(void);
 
 	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
-
-	// Handle aborting from current bs_run state.
-	/*virtual*/ void abort_impl(void);
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Handle cleaning up from initialization (or post abort) state.
 	/*virtual*/ void finish_impl(void);
diff --git a/indra/newview/statemachine/aifilepicker.cpp b/indra/newview/statemachine/aifilepicker.cpp
index e9f59efbf..c3675c4e1 100644
--- a/indra/newview/statemachine/aifilepicker.cpp
+++ b/indra/newview/statemachine/aifilepicker.cpp
@@ -66,7 +66,7 @@ char const* AIFilePicker::state_str_impl(state_type run_state) const
 	return "UNKNOWN STATE";
 }
 
-AIFilePicker::AIFilePicker(void) : mPluginManager(NULL), mAutoKill(false), mCanceled(false)
+AIFilePicker::AIFilePicker(void) : mPluginManager(NULL), mCanceled(false)
 {
 }
 
@@ -345,7 +345,7 @@ void AIFilePicker::initialize_impl(void)
 	set_state(AIFilePicker_initialize_plugin);
 }
 
-void AIFilePicker::multiplex_impl(void)
+void AIFilePicker::multiplex_impl(state_type run_state)
 {
 	mPluginManager->update();										// Give the plugin some CPU for it's messages.
 	LLPluginClassBasic* plugin = mPluginManager->getPlugin();
@@ -355,7 +355,7 @@ void AIFilePicker::multiplex_impl(void)
 		abort();
 		return;
 	}
-	switch (mRunState)
+	switch (run_state)
 	{
 		case AIFilePicker_initialize_plugin:
 		{
@@ -430,10 +430,6 @@ void AIFilePicker::multiplex_impl(void)
 	}
 }
 
-void AIFilePicker::abort_impl(void)
-{
-}
-
 void AIFilePicker::finish_impl(void)
 {
 	if (mPluginManager)
@@ -442,12 +438,6 @@ void AIFilePicker::finish_impl(void)
 		mPluginManager = NULL;
 	}
 	mFilter.clear();		// Check that open is called before calling run (again).
-	if (mAutoKill)
-	{
-		// The default behavior is to delete the plugin. This can be overridden in
-		// the callback by calling run() again.
-		kill();
-	}
 }
 
 // This function is called when a new message is received from the plugin.
@@ -467,7 +457,7 @@ void AIFilePicker::receivePluginMessage(const LLPluginMessage &message)
 		if (message_name == "canceled")
 		{
 			LL_DEBUGS("Plugin") << "received message \"canceled\"" << LL_ENDL;
-			set_state(AIFilePicker_canceled);
+			advance_state(AIFilePicker_canceled);
 		}
 		else if (message_name == "done")
 		{
@@ -478,7 +468,7 @@ void AIFilePicker::receivePluginMessage(const LLPluginMessage &message)
 			{
 				mFilenames.push_back(*filename);
 			}
-			set_state(AIFilePicker_done);
+			advance_state(AIFilePicker_done);
 		}
 		else
 		{
diff --git a/indra/newview/statemachine/aifilepicker.h b/indra/newview/statemachine/aifilepicker.h
index a56a8677f..a5ee00a61 100644
--- a/indra/newview/statemachine/aifilepicker.h
+++ b/indra/newview/statemachine/aifilepicker.h
@@ -136,8 +136,8 @@ new AIFilePicker
   which sets the state to AIFilePicker_canceled or AIFilePicker_done
   respectively, causing a call to AIStateMachine::finish(), which calls
   AIFilePicker::finish_impl which destroys the plugin (mPluginBase),
-  the plugin manager (mPluginManager) and calls AIStateMachine::kill()
-  causing the AIFilePicker to be deleted.
+  the plugin manager (mPluginManager) after which the state machine
+  calls unref() causing the AIFilePicker to be deleted.
 
 */
 
@@ -155,7 +155,7 @@ public:
 	AIFilePicker(void);
 
 	// Create a dynamically created AIFilePicker object.
-	static AIFilePicker* create(bool auto_kill = true) { AIFilePicker* filepicker = new AIFilePicker; filepicker->mAutoKill = auto_kill; return filepicker; }
+	static AIFilePicker* create(void) { AIFilePicker* filepicker = new AIFilePicker; return filepicker; }
 
 	// The starting directory that the user will be in when the file picker opens
 	// will be the same as the directory used the last time the file picker was
@@ -191,7 +191,6 @@ private:
 	typedef std::map<std::string, std::string> context_map_type;	//!< Type of mContextMap.
 	static AIThreadSafeSimpleDC<context_map_type> sContextMap;		//!< Map context (ie, "snapshot" or "image") to last used folder.
 	std::string mContext;											//!< Some key to indicate the context (remembers the folder per key).
-	bool mAutoKill;													//!< True if the default behavior is to delete itself after being finished.
 
 	// Input variables (cache variable between call to open and run).
 	open_type mOpenType;					//!< Set to whether opening a filepicker to select for saving one file, for loading one file, or loading multiple files.
@@ -215,10 +214,7 @@ protected:
 	/*virtual*/ void initialize_impl(void);
 
 	// Handle mRunState.
-	/*virtual*/ void multiplex_impl(void);
-
-	// Handle aborting from current bs_run state.
-	/*virtual*/ void abort_impl(void);
+	/*virtual*/ void multiplex_impl(state_type run_state);
 
 	// Handle cleaning up from initialization (or post abort) state.
 	/*virtual*/ void finish_impl(void);