diff --git a/indra/llmath/llcalc.cpp b/indra/llmath/llcalc.cpp
index 6f127c916..597d0815f 100644
--- a/indra/llmath/llcalc.cpp
+++ b/indra/llmath/llcalc.cpp
@@ -54,28 +54,20 @@ LLCalc* LLCalc::sInstance = NULL;
 
 LLCalc::LLCalc() : mLastErrorPos(0)
 {
-//	mUserVariables = new calc_map_t;
-	mVariables = new calc_map_t;
-	mConstants = new calc_map_t;
-		
 	// Init table of constants
-	(*mConstants)["PI"] = F_PI;
-	(*mConstants)["TWO_PI"] = F_TWO_PI;
-	(*mConstants)["PI_BY_TWO"] = F_PI_BY_TWO;
-	(*mConstants)["SQRT2"] = F_SQRT2;
-	(*mConstants)["DEG_TO_RAD"] = DEG_TO_RAD;
-	(*mConstants)["RAD_TO_DEG"] = RAD_TO_DEG;
-	(*mConstants)["GRAVITY"] = GRAVITY;
-	(*mConstants)["ALMOST_ZERO"] = F_ALMOST_ZERO;
-	(*mConstants)["ALMOST_ONE"] = F_ALMOST_ONE;
-	(*mConstants)["THE_ANSWER"] = 42;
+	mConstants["PI"] = F_PI;
+	mConstants["TWO_PI"] = F_TWO_PI;
+	mConstants["PI_BY_TWO"] = F_PI_BY_TWO;
+	mConstants["SQRT_TWO_PI"] = F_SQRT_TWO_PI;
+	mConstants["SQRT2"] = F_SQRT2;
+	mConstants["SQRT3"] = F_SQRT3;
+	mConstants["DEG_TO_RAD"] = DEG_TO_RAD;
+	mConstants["RAD_TO_DEG"] = RAD_TO_DEG;
+	mConstants["GRAVITY"] = GRAVITY;
 }
 
 LLCalc::~LLCalc()
 {
-	delete mConstants;
-	delete mVariables;
-//	delete mUserVariables;	
 }
 
 //static
@@ -94,17 +86,17 @@ LLCalc* LLCalc::getInstance()
 
 void LLCalc::setVar(const std::string& name, const F32& value)
 {
-	(*mVariables)[name] = value;
+	mVariables[name] = value;
 }
 
 void LLCalc::clearVar(const std::string& name)
 {
-	mVariables->erase(name);
+	mVariables.erase(name);
 }
 
 void LLCalc::clearAllVariables()
 {
-	mVariables->clear();
+	mVariables.clear();
 }
 
 /*
@@ -123,7 +115,7 @@ bool LLCalc::evalString(const std::string& expression, F32& result)
 	std::string expr_upper = expression;
 	LLStringUtil::toUpper(expr_upper);
 	
-	LLCalcParser calc(result, mConstants, mVariables);
+	LLCalcParser calc(result, &mConstants, &mVariables);
 
 	mLastErrorPos = 0;
 	std::string::iterator start = expr_upper.begin();
diff --git a/indra/llmath/llcalc.h b/indra/llmath/llcalc.h
index 23c83f623..886e0bc8f 100644
--- a/indra/llmath/llcalc.h
+++ b/indra/llmath/llcalc.h
@@ -69,8 +69,8 @@ public:
 private:
 	std::string::size_type	mLastErrorPos;
 	
-	calc_map_t*	mConstants;
-	calc_map_t*	mVariables;
+	calc_map_t	mConstants;
+	calc_map_t	mVariables;
 	
 	// *TODO: Add support for storing user defined variables, and stored functions.
 	//	Will need UI work, and a means to save them between sessions.
diff --git a/indra/llmath/llcalcparser.h b/indra/llmath/llcalcparser.h
index a759a3571..600e17366 100644
--- a/indra/llmath/llcalcparser.h
+++ b/indra/llmath/llcalcparser.h
@@ -73,7 +73,9 @@ struct LLCalcParser : grammar<LLCalcParser>
 				 (str_p("SQRT") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_sqrt)(self,arg1)]) |
 				 (str_p("LOG") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_log)(self,arg1)]) |
 				 (str_p("EXP") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_exp)(self,arg1)]) |
-				 (str_p("ABS") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_fabs)(self,arg1)])
+				 (str_p("ABS") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_fabs)(self,arg1)]) |
+				 (str_p("FLR") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_floor)(self,arg1)]) |
+				 (str_p("CEIL") >> '(' >> expression[unary_func.value = bind(&LLCalcParser::_ceil)(self,arg1)])
 				) >> assert_syntax(ch_p(')'))
 			;
 			
@@ -118,7 +120,8 @@ struct LLCalcParser : grammar<LLCalcParser>
 			term =
 				power[term.value = arg1] >>
 				*(('*' >> assert_syntax(power[term.value *= arg1])) |
-				  ('/' >> assert_syntax(power[term.value /= arg1]))
+				  ('/' >> assert_syntax(power[term.value /= arg1])) |
+				  ('%' >> assert_syntax(power[term.value = bind(&fmodf)(term.value, arg1)]))
 				)
 			;
 			
@@ -153,7 +156,9 @@ private:
 	F32 _sqrt(const F32& a) const { return sqrt(a); }
 	F32 _log(const F32& a) const { return log(a); }
 	F32 _exp(const F32& a) const { return exp(a); }
-	F32 _fabs(const F32& a) const { return fabs(a) * RAD_TO_DEG; }
+	F32 _fabs(const F32& a) const { return fabs(a); }
+	F32 _floor(const F32& a) const { return llfloor(a); }
+	F32 _ceil(const F32& a) const { return llceil(a); }
 
 	F32 _atan2(const F32& a,const F32& b) const { return atan2(a,b); }
 
diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp
index 263803930..93fd46cc9 100644
--- a/indra/llmath/llcamera.cpp
+++ b/indra/llmath/llcamera.cpp
@@ -168,166 +168,91 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer)
 
 // ---------------- test methods  ---------------- 
 
-S32 LLCamera::AABBInFrustum(const LLVector3 &center, const LLVector3& radius) 
+S32 LLCamera::AABBInFrustum(const LLVector4a &center, const LLVector4a& radius) 
 {
-	static const LLVector3 scaler[] = {
-		LLVector3(-1,-1,-1),
-		LLVector3( 1,-1,-1),
-		LLVector3(-1, 1,-1),
-		LLVector3( 1, 1,-1),
-		LLVector3(-1,-1, 1),
-		LLVector3( 1,-1, 1),
-		LLVector3(-1, 1, 1),
-		LLVector3( 1, 1, 1)
+	static const LLVector4a scaler[] = {
+		LLVector4a(-1,-1,-1),
+		LLVector4a( 1,-1,-1),
+		LLVector4a(-1, 1,-1),
+		LLVector4a( 1, 1,-1),
+		LLVector4a(-1,-1, 1),
+		LLVector4a( 1,-1, 1),
+		LLVector4a(-1, 1, 1),
+		LLVector4a( 1, 1, 1)
 	};
 
 	U8 mask = 0;
-	S32 result = 2;
-
-	/*if (radius.magVecSquared() > mFrustumCornerDist * mFrustumCornerDist)
-	{ //box is larger than frustum, check frustum quads against box planes
-
-		static const LLVector3 dir[] = 
-		{
-			LLVector3(1, 0, 0),
-			LLVector3(-1, 0, 0),
-			LLVector3(0, 1, 0),
-			LLVector3(0, -1, 0),
-			LLVector3(0, 0, 1),
-			LLVector3(0, 0, -1)
-		};
-
-		U32 quads[] = 
-		{
-			0, 1, 2, 3,
-			0, 1, 5, 4,
-			2, 3, 7, 6,
-			3, 0, 7, 4,
-			1, 2, 6, 4,
-			4, 5, 6, 7
-		};
-
-		result = 0;
-
-		BOOL total_inside = TRUE;
-		for (U32 i = 0; i < 6; i++)
-		{ 
-			LLVector3 p = center + radius.scaledVec(dir[i]);
-			F32 d = -p*dir[i];
-
-			for (U32 j = 0; j <	6; j++)
-			{ //for each quad
-				F32 dist = mAgentFrustum[quads[j*4+0]]*dir[i] + d;
-				if (dist > 0)
-				{ //at least one frustum point is outside the AABB
-					total_inside = FALSE;
-					for (U32 k = 1; k < 4; k++)
-					{ //for each other point on quad
-						if ( mAgentFrustum[quads[j*4+k]]*dir[i]+d  <= 0.f)
-						{ //quad is straddling some plane of AABB
-							return 1;
-						}
-					}
-				}
-				else
-				{
-					for (U32 k = 1; k < 4; k++)
-					{
-						if (mAgentFrustum[quads[j*4+k]]*dir[i]+d > 0.f)
-						{
-							return 1;
-						}
-					}
-				}
-			}
-		}
-
-		if (total_inside)
-		{
-			result = 1;
-		}
-	}
-	else*/
+	bool result = false;
+	LLVector4a rscale, maxp, minp;
+	LLSimdScalar d;
+	for (U32 i = 0; i < mPlaneCount; i++)
 	{
-		for (U32 i = 0; i < mPlaneCount; i++)
+		mask = mPlaneMask[i];
+		if (mask != 0xff)
 		{
-			mask = mPlaneMask[i];
-			if (mask == 0xff)
-			{
-				continue;
-			}
-			LLPlane p = mAgentPlanes[i];
-			LLVector3 n = LLVector3(p);
-			float d = p.mV[3];
-			LLVector3 rscale = radius.scaledVec(scaler[mask]);
-
-			LLVector3 minp = center - rscale;
-			LLVector3 maxp = center + rscale;
-
-			if (n * minp > -d) 
+			const LLPlane& p(mAgentPlanes[i]);
+			p.getAt<3>(d);
+			rscale.setMul(radius, scaler[mask]);
+			minp.setSub(center, rscale);
+			d = -d;
+			if (p.dot3(minp).getF32() > d) 
 			{
 				return 0;
 			}
-		
-			if (n * maxp > -d)
+			
+			if(!result)
 			{
-				result = 1;
+				maxp.setAdd(center, rscale);
+				result = (p.dot3(maxp).getF32() > d);
 			}
 		}
 	}
 
-	
-	return result;
+	return result?1:2;
 }
 
-S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 &center, const LLVector3& radius) 
+
+S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius) 
 {
-	static const LLVector3 scaler[] = {
-		LLVector3(-1,-1,-1),
-		LLVector3( 1,-1,-1),
-		LLVector3(-1, 1,-1),
-		LLVector3( 1, 1,-1),
-		LLVector3(-1,-1, 1),
-		LLVector3( 1,-1, 1),
-		LLVector3(-1, 1, 1),
-		LLVector3( 1, 1, 1)
+	static const LLVector4a scaler[] = {
+		LLVector4a(-1,-1,-1),
+		LLVector4a( 1,-1,-1),
+		LLVector4a(-1, 1,-1),
+		LLVector4a( 1, 1,-1),
+		LLVector4a(-1,-1, 1),
+		LLVector4a( 1,-1, 1),
+		LLVector4a(-1, 1, 1),
+		LLVector4a( 1, 1, 1)
 	};
 
 	U8 mask = 0;
-	S32 result = 2;
-
+	bool result = false;
+	LLVector4a rscale, maxp, minp;
+	LLSimdScalar d;
 	for (U32 i = 0; i < mPlaneCount; i++)
 	{
-		if (i == 5)
-		{
-			continue;
-		}
-
 		mask = mPlaneMask[i];
-		if (mask == 0xff)
+		if ((i != 5) && (mask != 0xff))
 		{
-			continue;
-		}
-		LLPlane p = mAgentPlanes[i];
-		LLVector3 n = LLVector3(p);
-		float d = p.mV[3];
-		LLVector3 rscale = radius.scaledVec(scaler[mask]);
-
-		LLVector3 minp = center - rscale;
-		LLVector3 maxp = center + rscale;
-
-		if (n * minp > -d) 
-		{
-			return 0;
-		}
-	
-		if (n * maxp > -d)
-		{
-			result = 1;
+			const LLPlane& p(mAgentPlanes[i]);
+			p.getAt<3>(d);
+			rscale.setMul(radius, scaler[mask]);
+			minp.setSub(center, rscale);
+			d = -d;
+			if (p.dot3(minp).getF32() > d) 
+			{
+				return 0;
+			}
+			
+			if(!result)
+			{
+				maxp.setAdd(center, rscale);
+				result = (p.dot3(maxp).getF32() > d);
+			}
 		}
 	}
 
-	return result;
+	return result?1:2;
 }
 
 int LLCamera::sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius) 
@@ -715,9 +640,10 @@ void LLCamera::calculateWorldFrustumPlanes()
 	F32 d;
 	LLVector3 center = mOrigin - mXAxis*mNearPlane;
 	mWorldPlanePos = center;
+	LLVector3 pnorm;	
 	for (int p=0; p<4; p++)
 	{
-		LLVector3 pnorm = LLVector3(mLocalPlanes[p]);
+		mLocalPlanes[p].getVector3(pnorm);
 		LLVector3 norm = rotateToAbsolute(pnorm);
 		norm.normVec();
 		d = -(center * norm);
@@ -727,13 +653,15 @@ void LLCamera::calculateWorldFrustumPlanes()
 	LLVector3 zaxis(0, 0, 1.0f);
 	F32 yaw = getYaw();
 	{
-		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_LEFT]);
+		LLVector3 tnorm;
+		mLocalPlanes[PLANE_LEFT].getVector3(tnorm);
 		tnorm.rotVec(yaw, zaxis);
 		d = -(mOrigin * tnorm);
 		mHorizPlanes[HORIZ_PLANE_LEFT] = LLPlane(tnorm, d);
 	}
 	{
-		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_RIGHT]);
+		LLVector3 tnorm;
+		mLocalPlanes[PLANE_RIGHT].getVector3(tnorm);
 		tnorm.rotVec(yaw, zaxis);
 		d = -(mOrigin * tnorm);
 		mHorizPlanes[HORIZ_PLANE_RIGHT] = LLPlane(tnorm, d);
diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h
index 56afca025..3b0ff65a5 100644
--- a/indra/llmath/llcamera.h
+++ b/indra/llmath/llcamera.h
@@ -191,8 +191,8 @@ public:
 	S32 sphereInFrustum(const LLVector3 &center, const F32 radius) const;
 	S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); }
 	S32 sphereInFrustumFull(const LLVector3 &center, const F32 radius) const { return sphereInFrustum(center, radius); }
-	S32 AABBInFrustum(const LLVector3 &center, const LLVector3& radius);
-	S32 AABBInFrustumNoFarClip(const LLVector3 &center, const LLVector3& radius);
+	S32 AABBInFrustum(const LLVector4a& center, const LLVector4a& radius);
+	S32 AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius);
 
 	//does a quick 'n dirty sphere-sphere check
 	S32 sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius); 
diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h
index cbc8388a7..c80223bc2 100644
--- a/indra/llmath/llmath.h
+++ b/indra/llmath/llmath.h
@@ -61,32 +61,11 @@
 #endif
 
 // Single Precision Floating Point Routines
-#ifndef sqrtf
-#define sqrtf(x)	((F32)sqrt((F64)(x)))
-#endif
-#ifndef fsqrtf
-#define fsqrtf(x)	sqrtf(x)
-#endif
-
-#ifndef cosf
-#define cosf(x)		((F32)cos((F64)(x)))
-#endif
-#ifndef sinf
-#define sinf(x)		((F32)sin((F64)(x)))
-#endif
-#ifndef tanf
+// (There used to be more defined here, but they appeared to be redundant and 
+// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
+/*#ifndef tanf
 #define tanf(x)		((F32)tan((F64)(x)))
-#endif
-#ifndef acosf
-#define acosf(x)	((F32)acos((F64)(x)))
-#endif
-
-#ifndef powf
-#define powf(x,y)	((F32)pow((F64)(x),(F64)(y)))
-#endif
-#ifndef expf
-#define expf(x)		((F32)exp((F64)(x)))
-#endif
+#endif*/
 
 const F32	GRAVITY			= -9.8f;
 
@@ -206,7 +185,7 @@ inline S32 llfloor( F32 f )
 		}
 		return result;
 #else
-		return (S32)floorf(f);
+		return (S32)floor(f);
 #endif
 }
 
diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h
index eda350e93..4382005a4 100644
--- a/indra/llmath/lloctree.h
+++ b/indra/llmath/lloctree.h
@@ -96,23 +96,30 @@ public:
 	typedef LLOctreeNode<T>		oct_node;
 	typedef LLOctreeListener<T>	oct_listener;
 
-	static const U8 OCTANT_POSITIVE_X = 0x01;
-	static const U8 OCTANT_POSITIVE_Y = 0x02;
-	static const U8 OCTANT_POSITIVE_Z = 0x04;
-		
-	LLOctreeNode(	LLVector3d center, 
-					LLVector3d size, 
+	/*void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}*/
+
+	LLOctreeNode(	const LLVector4a& center, 
+					const LLVector4a& size, 
 					BaseType* parent, 
 					U8 octant = 255)
 	:	mParent((oct_node*)parent), 
-		mCenter(center), 
-		mSize(size), 
 		mOctant(octant) 
 	{ 
+		mCenter = center;
+		mSize = size;
+
 		updateMinMax();
 		if ((mOctant == 255) && mParent)
 		{
-			mOctant = ((oct_node*) mParent)->getOctant(mCenter.mdV);
+			mOctant = ((oct_node*) mParent)->getOctant(mCenter);
 		}
 
 		clearChildren();
@@ -129,39 +136,24 @@ public:
 	}
 
 	inline const BaseType* getParent()	const			{ return mParent; }
-	inline void setParent(BaseType* parent)			{ mParent = (oct_node*) parent; }
-	inline const LLVector3d& getCenter() const			{ return mCenter; }
-	inline const LLVector3d& getSize() const			{ return mSize; }
-	inline void setCenter(const LLVector3d center)			{ mCenter = center; }
-	inline void setSize(const LLVector3d size)				{ mSize = size; }
+	inline void setParent(BaseType* parent)				{ mParent = (oct_node*) parent; }
+	inline const LLVector4a& getCenter() const			{ return mCenter; }
+	inline const LLVector4a& getSize() const			{ return mSize; }
+	inline void setCenter(const LLVector4a& center)		{ mCenter = center; }
+	inline void setSize(const LLVector4a& size)			{ mSize = size; }
     inline oct_node* getNodeAt(T* data)					{ return getNodeAt(data->getPositionGroup(), data->getBinRadius()); }
 	inline U8 getOctant() const							{ return mOctant; }
 	inline const oct_node*	getOctParent() const		{ return (const oct_node*) getParent(); }
 	inline oct_node* getOctParent() 					{ return (oct_node*) getParent(); }
 	
-	U8 getOctant(const F64 pos[]) const	//get the octant pos is in
+	U8 getOctant(const LLVector4a& pos) const			//get the octant pos is in
 	{
-		U8 ret = 0;
-
-		if (pos[0] > mCenter.mdV[0])
-		{
-			ret |= OCTANT_POSITIVE_X;
-		}
-		if (pos[1] > mCenter.mdV[1])
-		{
-			ret |= OCTANT_POSITIVE_Y;
-		}
-		if (pos[2] > mCenter.mdV[2])
-		{
-			ret |= OCTANT_POSITIVE_Z;
-		}
-
-		return ret;
+		return (U8) (pos.greaterThan(mCenter).getGatheredBits() & 0x7);
 	}
 	
-	inline bool isInside(const LLVector3d& pos, const F64& rad) const
+	inline bool isInside(const LLVector4a& pos, const F32& rad) const
 	{
-		return rad <= mSize.mdV[0]*2.0 && isInside(pos); 
+		return rad <= mSize[0]*2.f && isInside(pos); 
 	}
 
 	inline bool isInside(T* data) const			
@@ -169,15 +161,16 @@ public:
 		return isInside(data->getPositionGroup(), data->getBinRadius());
 	}
 
-	bool isInside(const LLVector3d& pos) const
+	bool isInside(const LLVector4a& pos) const
 	{
-		const F64& x = pos.mdV[0];
-		const F64& y = pos.mdV[1];
-		const F64& z = pos.mdV[2];
-			
-		if (x > mMax.mdV[0] || x <= mMin.mdV[0] ||
-			y > mMax.mdV[1] || y <= mMin.mdV[1] ||
-			z > mMax.mdV[2] || z <= mMin.mdV[2])
+		S32 gt = pos.greaterThan(mMax).getGatheredBits() & 0x7;
+		if (gt)
+		{
+			return false;
+		}
+
+		S32 lt = pos.lessEqual(mMin).getGatheredBits() & 0x7;
+		if (lt)
 		{
 			return false;
 		}
@@ -187,11 +180,8 @@ public:
 	
 	void updateMinMax()
 	{
-		for (U32 i = 0; i < 3; i++)
-		{
-			mMax.mdV[i] = mCenter.mdV[i] + mSize.mdV[i];
-			mMin.mdV[i] = mCenter.mdV[i] - mSize.mdV[i];
-		}
+		mMax.setAdd(mCenter, mSize);
+		mMin.setSub(mCenter, mSize);
 	}
 
 	inline oct_listener* getOctListener(U32 index) 
@@ -218,20 +208,20 @@ public:
 				(radius <= p_size && radius > size);
 	}
 
-	static void pushCenter(LLVector3d &center, const LLVector3d &size, const T* data)
+	static void pushCenter(LLVector4a &center, const LLVector4a &size, const T* data)
 	{
-		const LLVector3d& pos = data->getPositionGroup();
-		for (U32 i = 0; i < 3; i++)
-		{
-			if (pos.mdV[i] > center.mdV[i])
-			{
-				center.mdV[i] += size.mdV[i];
-			}
-			else 
-			{
-				center.mdV[i] -= size.mdV[i];
-			}
-		}
+		const LLVector4a& pos = data->getPositionGroup();
+
+		LLVector4Logical gt = pos.greaterThan(center);
+
+		LLVector4a up;
+		up = _mm_and_ps(size, gt);
+
+		LLVector4a down;
+		down = _mm_andnot_ps(gt, size);
+
+		center.add(up);
+		center.sub(down);
 	}
 
 	void accept(oct_traveler* visitor)				{ visitor->visit(this); }
@@ -273,14 +263,14 @@ public:
 	}
 
 
-	oct_node* getNodeAt(const LLVector3d& pos, const F32& rad)
+	oct_node* getNodeAt(const LLVector4a& pos, const F32& rad)
 	{ 
 		LLOctreeNode<T>* node = this;
 
 		if (node->isInside(pos, rad))
 		{		
 			//do a quick search by octant
-			U8 octant = node->getOctant(pos.mdV);
+			U8 octant = node->getOctant(pos);
 			
 			//traverse the tree until we find a node that has no node
 			//at the appropriate octant or is smaller than the object.  
@@ -291,7 +281,7 @@ public:
 			while (next_node != 255 && node->getSize()[0] >= rad)
 			{	
 				node = node->getChild(next_node);
-				octant = node->getOctant(pos.mdV);
+				octant = node->getOctant(pos);
 				next_node = node->mChildMap[octant];
 			}
 		}
@@ -347,16 +337,21 @@ public:
 				}
 				
 				//it's here, but no kids are in the right place, make a new kid
-				LLVector3d center(getCenter());
-				LLVector3d size(getSize()*0.5);
+				LLVector4a center = getCenter();
+				LLVector4a size = getSize();
+				size.mul(0.5f);
 		        		
 				//push center in direction of data
 				LLOctreeNode<T>::pushCenter(center, size, data);
 
 				// handle case where floating point number gets too small
-				if( llabs(center.mdV[0] - getCenter().mdV[0]) < F_APPROXIMATELY_ZERO &&
-					llabs(center.mdV[1] - getCenter().mdV[1]) < F_APPROXIMATELY_ZERO &&
-					llabs(center.mdV[2] - getCenter().mdV[2]) < F_APPROXIMATELY_ZERO)
+				LLVector4a val;
+				val.setSub(center, getCenter());
+				val.setAbs(val);
+								
+				S32 lt = val.lessThan(LLVector4a::getEpsilon()).getGatheredBits() & 0x7;
+
+				if( lt == 0x7 )
 				{
 					mData.insert(data);
 					BaseType::insert(data);
@@ -374,7 +369,7 @@ public:
 				//make sure no existing node matches this position
 				for (U32 i = 0; i < getChildCount(); i++)
 				{
-					if (mChild[i]->getCenter() == center)
+					if (mChild[i]->getCenter().equals3(center))
 					{
 						OCT_ERRS << "Octree detected duplicate child center and gave up." << llendl;
 						return false;
@@ -503,18 +498,18 @@ public:
 	{
 #if LL_OCTREE_PARANOIA_CHECK
 
-		if (child->getSize() == getSize())
+		if (child->getSize().equals3(getSize()))
 		{
 			OCT_ERRS << "Child size is same as parent size!" << llendl;
 		}
 
 		for (U32 i = 0; i < getChildCount(); i++)
 		{
-			if(mChild[i]->getSize() != child->getSize()) 
+			if(!mChild[i]->getSize().equals3(child->getSize())) 
 			{
 				OCT_ERRS <<"Invalid octree child size." << llendl;
 			}
-			if (mChild[i]->getCenter() == child->getCenter())
+			if (mChild[i]->getCenter().equals3(child->getCenter()))
 			{
 				OCT_ERRS <<"Duplicate octree child position." << llendl;
 			}
@@ -605,11 +600,11 @@ protected:
 		MIN = 3
 	} eDName;
 
-	LLVector3d mCenter;
-	LLVector3d mSize;
-	LLVector3d mMax;
-	LLVector3d mMin;
-
+	LLVector4a mCenter;
+	LLVector4a mSize;
+	LLVector4a mMax;
+	LLVector4a mMin;
+	
 	oct_node* mParent;
 	U8 mOctant;
 
@@ -628,9 +623,9 @@ public:
 	typedef LLOctreeNode<T>	BaseType;
 	typedef LLOctreeNode<T>		oct_node;
 
-	LLOctreeRoot(	const LLVector3d &center, 
-					const LLVector3d &size, 
-					BaseType* parent)
+	LLOctreeRoot(const LLVector4a& center, 
+				 const LLVector4a& size, 
+				 BaseType* parent)
 	:	BaseType(center, size, parent)
 	{
 	}
@@ -683,18 +678,23 @@ public:
 			return false;
 		}
 		
-		const F64 MAX_MAG = 1024.0*1024.0;
+		LLVector4a MAX_MAG;
+		MAX_MAG.splat(1024.f*1024.f);
 
-		const LLVector3d& v = data->getPositionGroup();
-		if (!(fabs(v.mdV[0]-this->mCenter.mdV[0]) < MAX_MAG &&
-		      fabs(v.mdV[1]-this->mCenter.mdV[1]) < MAX_MAG &&
-		      fabs(v.mdV[2]-this->mCenter.mdV[2]) < MAX_MAG))
+		const LLVector4a& v = data->getPositionGroup();
+
+		LLVector4a val;
+		val.setSub(v, BaseType::mCenter);
+		val.setAbs(val);
+		S32 lt = val.lessThan(MAX_MAG).getGatheredBits() & 0x7;
+
+		if (lt != 0x7)
 		{
-			//OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl;
+			OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl;
 			return false;
 		}
 
-		if (this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))
+		if (this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))
 		{
 			//we got it, just act like a branch
 			oct_node* node = getNodeAt(data);
@@ -710,31 +710,34 @@ public:
 		else if (this->getChildCount() == 0)
 		{
 			//first object being added, just wrap it up
-			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
+			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
 			{
-				LLVector3d center, size;
+				LLVector4a center, size;
 				center = this->getCenter();
 				size = this->getSize();
 				LLOctreeNode<T>::pushCenter(center, size, data);
 				this->setCenter(center);
-				this->setSize(size*2);
+				size.mul(2.f);
+				this->setSize(size);
 				this->updateMinMax();
 			}
 			LLOctreeNode<T>::insert(data);
 		}
 		else
 		{
-			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
+			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
 			{
 				//the data is outside the root node, we need to grow
-				LLVector3d center(this->getCenter());
-				LLVector3d size(this->getSize());
+				LLVector4a center(this->getCenter());
+				LLVector4a size(this->getSize());
 
 				//expand this node
-				LLVector3d newcenter(center);
+				LLVector4a newcenter(center);
 				LLOctreeNode<T>::pushCenter(newcenter, size, data);
 				this->setCenter(newcenter);
-				this->setSize(size*2);
+				LLVector4a size2 = size;
+				size2.mul(2.f);
+				this->setSize(size2);
 				this->updateMinMax();
 
 				//copy our children to a new branch
diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h
index 8803d0774..32590c195 100644
--- a/indra/llmath/llplane.h
+++ b/indra/llmath/llplane.h
@@ -42,13 +42,17 @@
 // The plane normal = [A, B, C]
 // The closest approach = D / sqrt(A*A + B*B + C*C)
 
-class LLPlane : public LLVector4
+class LLPlane
 {
 public:
+	
+	// Constructors
 	LLPlane() {}; // no default constructor
 	LLPlane(const LLVector3 &p0, F32 d) { setVec(p0, d); }
 	LLPlane(const LLVector3 &p0, const LLVector3 &n) { setVec(p0, n); }
-	inline void setVec(const LLVector3 &p0, F32 d) { LLVector4::setVec(p0[0], p0[1], p0[2], d); }
+	inline void setVec(const LLVector3 &p0, F32 d) { mV.set(p0[0], p0[1], p0[2], d); }
+	
+	// Set
 	inline void setVec(const LLVector3 &p0, const LLVector3 &n)
 	{
 		F32 d = -(p0 * n);
@@ -64,39 +68,38 @@ public:
 		F32 d = -(w * p0);
 		setVec(w, d);
 	}
-
-	inline LLPlane& operator=(const LLVector4& v2) {  LLVector4::setVec(v2[0],v2[1],v2[2],v2[3]); return *this;}
-
-	inline void set(const LLPlane& p2) { LLVector4::setVec(p2); }
+	
+	inline LLPlane& operator=(const LLVector4& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;}
+	
+	inline LLPlane& operator=(const LLVector4a& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;}	
+	
+	inline void set(const LLPlane& p2) { mV = p2.mV; }
 	
 	// 
 	F32 dist(const LLVector3 &v2) const { return mV[0]*v2[0] + mV[1]*v2[1] + mV[2]*v2[2] + mV[3]; }
 	
+	inline LLSimdScalar dot3(const LLVector4a& b) const { return mV.dot3(b); }
+	
+	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
+	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead	
+	inline F32 operator[](const S32 idx) const { return mV[idx]; }
+	
+	// preferable when index is known at compile time
+	template <int N> LL_FORCE_INLINE void getAt(LLSimdScalar& v) const { v = mV.getScalarAt<N>(); } 
+	
 	// reset the vector to 0, 0, 0, 1
-	inline void clear() { LLVector4::setVec(0, 0, 0, 1); }
+	inline void clear() { mV.set(0, 0, 0, 1); }
 	
 	inline void getVector3(LLVector3& vec) const { vec.set(mV[0], mV[1], mV[2]); }
-
-	// Retrieve the mask indicating which of the x, y, or z axis are greater or equal to zero.
-	inline U8 calcPlaneMask() const
-	{
-		U8 mask = 0;
 	
-		if (mV[0] >= 0)
-		{
-			mask |= 1;
-		}
-		if (mV[1] >= 0)
-		{
-			mask |= 2;
-		}
-		if (mV[2] >= 0)
-		{
-			mask |= 4;
-		}
-
-		return mask;
+	// Retrieve the mask indicating which of the x, y, or z axis are greater or equal to zero.
+	inline U8 calcPlaneMask() 
+	{ 
+		return mV.greaterEqual(LLVector4a::getZero()).getGatheredBits() & LLVector4Logical::MASK_XYZ;
 	}
+		
+private:
+	LLVector4a mV;
 };
 
 
diff --git a/indra/llmath/llquantize.h b/indra/llmath/llquantize.h
index 2192427f0..c043f7f75 100644
--- a/indra/llmath/llquantize.h
+++ b/indra/llmath/llquantize.h
@@ -35,10 +35,16 @@
 #define LL_LLQUANTIZE_H
 
 const U16 U16MAX = 65535;
+LL_ALIGN_16( const F32 F_U16MAX_4A[4] ) = { 65535.f, 65535.f, 65535.f, 65535.f };
+
 const F32 OOU16MAX = 1.f/(F32)(U16MAX);
+LL_ALIGN_16( const F32 F_OOU16MAX_4A[4] ) = { OOU16MAX, OOU16MAX, OOU16MAX, OOU16MAX };
 
 const U8 U8MAX = 255;
+LL_ALIGN_16( const F32 F_U8MAX_4A[4] ) = { 255.f, 255.f, 255.f, 255.f };
+
 const F32 OOU8MAX = 1.f/(F32)(U8MAX);
+LL_ALIGN_16( const F32 F_OOU8MAX_4A[4] ) = { OOU8MAX, OOU8MAX, OOU8MAX, OOU8MAX };
 
 const U8 FIRSTVALIDCHAR = 54;
 const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null 
diff --git a/indra/llmath/v2math.cpp b/indra/llmath/v2math.cpp
index 220336e0c..2603127f7 100644
--- a/indra/llmath/v2math.cpp
+++ b/indra/llmath/v2math.cpp
@@ -92,7 +92,7 @@ F32	dist_vec(const LLVector2 &a, const LLVector2 &b)
 {
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
-	return fsqrtf( x*x + y*y );
+	return (F32) sqrt( x*x + y*y );
 }
 
 F32	dist_vec_squared(const LLVector2 &a, const LLVector2 &b)
diff --git a/indra/llmath/v2math.h b/indra/llmath/v2math.h
index ae26c85ce..35fd1b604 100644
--- a/indra/llmath/v2math.h
+++ b/indra/llmath/v2math.h
@@ -225,7 +225,7 @@ inline void	LLVector2::setVec(const F32 *vec)
 
 inline F32 LLVector2::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }
 
 inline F32 LLVector2::lengthSquared(void) const
@@ -235,7 +235,7 @@ inline F32 LLVector2::lengthSquared(void) const
 
 inline F32		LLVector2::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -262,7 +262,7 @@ inline bool LLVector2::isFinite() const
 // deprecated
 inline F32		LLVector2::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }
 
 // deprecated
@@ -274,7 +274,7 @@ inline F32		LLVector2::magVecSquared(void) const
 // deprecated
 inline F32		LLVector2::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h
index 1915d8050..95a3de8b6 100644
--- a/indra/llmath/v3color.h
+++ b/indra/llmath/v3color.h
@@ -284,7 +284,7 @@ inline F32		LLColor3::brightness(void) const
 
 inline F32		LLColor3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32		LLColor3::lengthSquared(void) const
@@ -294,7 +294,7 @@ inline F32		LLColor3::lengthSquared(void) const
 
 inline F32		LLColor3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag)
@@ -310,7 +310,7 @@ inline F32		LLColor3::normalize(void)
 // deprecated
 inline F32		LLColor3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 // deprecated
@@ -322,7 +322,7 @@ inline F32		LLColor3::magVecSquared(void) const
 // deprecated
 inline F32		LLColor3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag)
@@ -444,7 +444,7 @@ inline F32		distVec(const LLColor3 &a, const LLColor3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F32		distVec_squared(const LLColor3 &a, const LLColor3 &b)
diff --git a/indra/llmath/v3dmath.h b/indra/llmath/v3dmath.h
index 96ae27a52..211b0b91a 100644
--- a/indra/llmath/v3dmath.h
+++ b/indra/llmath/v3dmath.h
@@ -240,7 +240,7 @@ inline const LLVector3d&	LLVector3d::setVec(const F64 *vec)
 
 inline F64 LLVector3d::normVec(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -262,7 +262,7 @@ inline F64 LLVector3d::normVec(void)
 
 inline F64 LLVector3d::normalize(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -286,7 +286,7 @@ inline F64 LLVector3d::normalize(void)
 
 inline F64	LLVector3d::magVec(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }
 
 inline F64	LLVector3d::magVecSquared(void) const
@@ -296,7 +296,7 @@ inline F64	LLVector3d::magVecSquared(void) const
 
 inline F64	LLVector3d::length(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }
 
 inline F64	LLVector3d::lengthSquared(void) const
@@ -406,7 +406,7 @@ inline F64	dist_vec(const LLVector3d &a, const LLVector3d &b)
 	F64 x = a.mdV[0] - b.mdV[0];
 	F64 y = a.mdV[1] - b.mdV[1];
 	F64 z = a.mdV[2] - b.mdV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F64	dist_vec_squared(const LLVector3d &a, const LLVector3d &b)
diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h
index 1cfa1edaa..b87db741b 100644
--- a/indra/llmath/v3math.h
+++ b/indra/llmath/v3math.h
@@ -285,7 +285,7 @@ inline void	LLVector3::setVec(const F32 *vec)
 
 inline F32 LLVector3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -308,7 +308,7 @@ inline F32 LLVector3::normalize(void)
 // deprecated
 inline F32 LLVector3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -332,7 +332,7 @@ inline F32 LLVector3::normVec(void)
 
 inline F32	LLVector3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32	LLVector3::lengthSquared(void) const
@@ -342,7 +342,7 @@ inline F32	LLVector3::lengthSquared(void) const
 
 inline F32	LLVector3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32	LLVector3::magVecSquared(void) const
@@ -476,7 +476,7 @@ inline F32	dist_vec(const LLVector3 &a, const LLVector3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F32	dist_vec_squared(const LLVector3 &a, const LLVector3 &b)
diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h
index f5a9adcfc..3a4f8475c 100644
--- a/indra/llmath/v4color.h
+++ b/indra/llmath/v4color.h
@@ -392,7 +392,7 @@ inline const LLColor4&	LLColor4::setAlpha(F32 a)
 
 inline F32		LLColor4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLColor4::lengthSquared(void) const
@@ -402,7 +402,7 @@ inline F32		LLColor4::lengthSquared(void) const
 
 inline F32		LLColor4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag)
@@ -418,7 +418,7 @@ inline F32		LLColor4::normalize(void)
 // deprecated
 inline F32		LLColor4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 // deprecated
@@ -430,7 +430,7 @@ inline F32		LLColor4::magVecSquared(void) const
 // deprecated
 inline F32		LLColor4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag)
diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h
index ae0d707f9..8b7df774e 100644
--- a/indra/llmath/v4coloru.h
+++ b/indra/llmath/v4coloru.h
@@ -300,7 +300,7 @@ inline const LLColor4U&	LLColor4U::setAlpha(U8 a)
 
 inline F32		LLColor4U::length(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }
 
 inline F32		LLColor4U::lengthSquared(void) const
@@ -311,7 +311,7 @@ inline F32		LLColor4U::lengthSquared(void) const
 // deprecated
 inline F32		LLColor4U::magVec(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }
 
 // deprecated
diff --git a/indra/llmath/v4math.h b/indra/llmath/v4math.h
index 4c82e6b62..72a477ed2 100644
--- a/indra/llmath/v4math.h
+++ b/indra/llmath/v4math.h
@@ -321,7 +321,7 @@ inline void	LLVector4::setVec(const F32 *vec)
 
 inline F32		LLVector4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLVector4::lengthSquared(void) const
@@ -331,7 +331,7 @@ inline F32		LLVector4::lengthSquared(void) const
 
 inline F32		LLVector4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLVector4::magVecSquared(void) const
@@ -463,7 +463,7 @@ inline LLVector4 lerp(const LLVector4 &a, const LLVector4 &b, F32 u)
 
 inline F32		LLVector4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -486,7 +486,7 @@ inline F32		LLVector4::normalize(void)
 // deprecated
 inline F32		LLVector4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
diff --git a/indra/newview/llagent.cpp b/indra/newview/llagent.cpp
index 000f4067e..1b8192b36 100644
--- a/indra/newview/llagent.cpp
+++ b/indra/newview/llagent.cpp
@@ -1328,7 +1328,7 @@ void LLAgent::startAutoPilotGlobal(
 	else
 	{
 		// Guess at a reasonable stop distance.
-		mAutoPilotStopDistance = fsqrtf( distance );
+		mAutoPilotStopDistance = (F32) sqrt( distance );
 		if (mAutoPilotStopDistance < 0.5f) 
 		{
 			mAutoPilotStopDistance = 0.5f;
diff --git a/indra/newview/llphysicsmotion.cpp b/indra/newview/llphysicsmotion.cpp
index af775f59f..12e1408e0 100644
--- a/indra/newview/llphysicsmotion.cpp
+++ b/indra/newview/llphysicsmotion.cpp
@@ -756,7 +756,7 @@ BOOL LLPhysicsMotion::onUpdate(F32 time)
 		const F32 area_for_max_settings = 0.0;
 		const F32 area_for_min_settings = 1400.0;
 		const F32 area_for_this_setting = area_for_max_settings + (area_for_min_settings-area_for_max_settings)*(1.0-lod_factor);
-		const F32 pixel_area = fsqrtf(mCharacter->getPixelArea());
+		const F32 pixel_area = (F32) sqrt(mCharacter->getPixelArea());
         
 		const BOOL is_self = (dynamic_cast<LLVOAvatar *>(mCharacter) != NULL && ((LLVOAvatar*)mCharacter)->isSelf());
 		if ((pixel_area > area_for_this_setting) || is_self)
diff --git a/indra/newview/llselectmgr.cpp b/indra/newview/llselectmgr.cpp
index 6bd7637bf..a172dd0b5 100644
--- a/indra/newview/llselectmgr.cpp
+++ b/indra/newview/llselectmgr.cpp
@@ -3658,7 +3658,7 @@ void LLSelectMgr::deselectAllIfTooFar()
 		{
 			if (mDebugSelectMgr)
 			{
-				llinfos << "Selection manager: auto-deselecting, select_dist = " << fsqrtf(select_dist_sq) << llendl;
+				llinfos << "Selection manager: auto-deselecting, select_dist = " << (F32) sqrt(select_dist_sq) << llendl;
 				llinfos << "agent pos global = " << gAgent.getPositionGlobal() << llendl;
 				llinfos << "selection pos global = " << selectionCenter << llendl;
 			}
diff --git a/indra/newview/llviewerjoystick.cpp b/indra/newview/llviewerjoystick.cpp
index f810de5ad..722c31492 100644
--- a/indra/newview/llviewerjoystick.cpp
+++ b/indra/newview/llviewerjoystick.cpp
@@ -714,7 +714,7 @@ void LLViewerJoystick::moveAvatar(bool reset)
 	sDelta[RX_I] += (cur_delta[RX_I] - sDelta[RX_I]) * time * feather;
 	sDelta[RY_I] += (cur_delta[RY_I] - sDelta[RY_I]) * time * feather;
 	
-	handleRun(fsqrtf(sDelta[Z_I]*sDelta[Z_I] + sDelta[X_I]*sDelta[X_I]));
+	handleRun((F32) sqrt(sDelta[Z_I]*sDelta[Z_I] + sDelta[X_I]*sDelta[X_I]));
 	
 	// Allow forward/backward movement some priority
 	if (dom_axis == Z_I)
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index d1c3a40f2..687f746cd 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -1613,7 +1613,7 @@ F32 LLViewerFetchedTexture::calcDecodePriority()
 
 	S32 cur_discard = getCurrentDiscardLevelForFetching();
 	bool have_all_data = (cur_discard >= 0 && (cur_discard <= mDesiredDiscardLevel));
-	F32 pixel_priority = fsqrtf(mMaxVirtualSize);
+	F32 pixel_priority = (F32) sqrt(mMaxVirtualSize);
 
 	F32 priority = 0.f;
 
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 16b04a5ff..49643b32e 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -5411,7 +5411,7 @@ void LLVOAvatar::updateTextures()
 
 	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 	{
-		setDebugText(llformat("%4.0f:%4.0f", fsqrtf(mMinPixelArea),fsqrtf(mMaxPixelArea)));
+		setDebugText(llformat("%4.0f:%4.0f", (F32) sqrt(mMinPixelArea),(F32) sqrt(mMaxPixelArea)));
 	}	
 	
 	if( render_avatar )
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index fa291580c..254952659 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -355,7 +355,7 @@ void LLVOGrass::updateTextures()
 	{
 		if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 		{
-			setDebugText(llformat("%4.0f", fsqrtf(mPixelArea)));
+			setDebugText(llformat("%4.0f", (F32) sqrt(mPixelArea)));
 		}
 		getTEImage(0)->addTextureStats(mPixelArea);
 	}
diff --git a/indra/newview/llvotree.cpp b/indra/newview/llvotree.cpp
index 548dab8b0..80c5b9728 100644
--- a/indra/newview/llvotree.cpp
+++ b/indra/newview/llvotree.cpp
@@ -496,7 +496,7 @@ void LLVOTree::updateTextures()
 	{
 		if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 		{
-			setDebugText(llformat("%4.0f", fsqrtf(mPixelArea)));
+			setDebugText(llformat("%4.0f", (F32) sqrt(mPixelArea)));
 		}
 		mTreeImagep->addTextureStats(mPixelArea);
 	}
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index bad1cfe1d..a08e4f00d 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -635,15 +635,15 @@ void LLVOVolume::updateTextureVirtualSize()
 
 	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 	{
-		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
 	}
 // 	else if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_PRIORITY))
 // 	{
-// 		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+// 		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
 // 	}
 	else if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_FACE_AREA))
 	{
-		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
 	}
 
 	if (mPixelArea == 0)
diff --git a/indra/newview/llworld.cpp b/indra/newview/llworld.cpp
index fbceaa9cc..b1e42c084 100644
--- a/indra/newview/llworld.cpp
+++ b/indra/newview/llworld.cpp
@@ -604,7 +604,7 @@ void LLWorld::updateVisibilities()
 		region_list_t::iterator curiter = iter++;
 		LLViewerRegion* regionp = *curiter;
 		F32 height = regionp->getLand().getMaxZ() - regionp->getLand().getMinZ();
-		F32 radius = 0.5f*fsqrtf(height * height + diagonal_squared);
+		F32 radius = 0.5f*(F32) sqrt(height * height + diagonal_squared);
 		if (!regionp->getLand().hasZData()
 			|| LLViewerCamera::getInstance()->sphereInFrustum(regionp->getCenterAgent(), radius))
 		{
@@ -625,7 +625,7 @@ void LLWorld::updateVisibilities()
 		}
 
 		F32 height = regionp->getLand().getMaxZ() - regionp->getLand().getMinZ();
-		F32 radius = 0.5f*fsqrtf(height * height + diagonal_squared);
+		F32 radius = 0.5f*(F32) sqrt(height * height + diagonal_squared);
 		if (LLViewerCamera::getInstance()->sphereInFrustum(regionp->getCenterAgent(), radius))
 		{
 			regionp->calculateCameraDistance();
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 6eb4b56b4..20cea49dd 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -4692,7 +4692,7 @@ static F32 calc_light_dist(LLVOVolume* light, const LLVector3& cam_pos, F32 max_
 	{
 		return max_dist;
 	}
-	F32 dist = fsqrtf(dist2);
+	F32 dist = (F32) sqrt(dist2);
 	dist *= 1.f / inten;
 	dist -= radius;
 	if (selected)
diff --git a/indra/test/v2math_tut.cpp b/indra/test/v2math_tut.cpp
index 08a091a79..8ec588ce5 100644
--- a/indra/test/v2math_tut.cpp
+++ b/indra/test/v2math_tut.cpp
@@ -92,7 +92,7 @@ namespace tut
 		F32 x = 2.2345f, y = 3.5678f ;
 		LLVector2 vec2(x,y);
 		ensure("magVecSquared:Fail ", is_approx_equal(vec2.magVecSquared(), (x*x + y*y)));
-		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), fsqrtf(x*x + y*y)));
+		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), (F32) sqrt(x*x + y*y)));
 	}
 
 	template<> template<>
@@ -414,7 +414,7 @@ namespace tut
 		ensure_equals("dist_vec_squared values are not equal",val2, val1);
 
 		val1 = 	dist_vec(vec2, vec3);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
 		ensure_equals("dist_vec values are not equal",val2, val1);
 	}
 
@@ -438,7 +438,7 @@ namespace tut
 		LLVector2 vec2(x1, y1);
 
 		F32 vecMag = vec2.normVec();
-		F32 mag = fsqrtf(x1*x1 + y1*y1);
+		F32 mag = (F32) sqrt(x1*x1 + y1*y1);
 
 		F32 oomag = 1.f / mag;
 		val1 = x1 * oomag;
diff --git a/indra/test/v3color_tut.cpp b/indra/test/v3color_tut.cpp
index 26d30682a..e36301ace 100644
--- a/indra/test/v3color_tut.cpp
+++ b/indra/test/v3color_tut.cpp
@@ -99,7 +99,7 @@ namespace tut
 		F32 r = 2.3436212f, g = 1231.f, b = 4.7849321232f;
 		LLColor3 llcolor3(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor3.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}
 
 	template<> template<>
@@ -109,7 +109,7 @@ namespace tut
 		F32 val1, val2,val3;
 		LLColor3 llcolor3(r,g,b);
 		F32 vecMag = llcolor3.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		val1 = r * oomag;
 		val2 = g * oomag;
@@ -292,7 +292,7 @@ namespace tut
 		F32 r1 =1.f, g1 = 2.f,b1 = 1.2f, r2 = -2.3f, g2 = 1.11f, b2 = 1234.234f;
 		LLColor3 llcolor3(r1,g1,b1),llcolor3a(r2,g2,b2);
 		F32 val = distVec(llcolor3,llcolor3a);
-		ensure("distVec failed ", is_approx_equal(fsqrtf((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
+		ensure("distVec failed ", is_approx_equal((F32) sqrt((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
 		
 		F32 val1 = distVec_squared(llcolor3,llcolor3a);
 		ensure("distVec_squared failed ", is_approx_equal(((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val1));
diff --git a/indra/test/v3dmath_tut.cpp b/indra/test/v3dmath_tut.cpp
index 598d72aab..2b06bb552 100644
--- a/indra/test/v3dmath_tut.cpp
+++ b/indra/test/v3dmath_tut.cpp
@@ -409,7 +409,7 @@ namespace tut
 		LLVector3d vec3D(x,y,z);
 		F64 res = (x*x + y*y + z*z) - vec3D.magVecSquared();
 		ensure("1:magVecSquared:Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));
-		res = fsqrtf(x*x + y*y + z*z) - vec3D.magVec();
+		res = (F32) sqrt(x*x + y*y + z*z) - vec3D.magVec();
 		ensure("2:magVec: Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));	
 	}
 
diff --git a/indra/test/v3math_tut.cpp b/indra/test/v3math_tut.cpp
index 7cbcb2304..0ab45a6a0 100644
--- a/indra/test/v3math_tut.cpp
+++ b/indra/test/v3math_tut.cpp
@@ -156,7 +156,7 @@ namespace tut
 		F32 x = 2.32f, y = 1.212f, z = -.12f;
 		LLVector3 vec3(x,y,z);		
 		ensure("1:magVecSquared:Fail ", is_approx_equal(vec3.magVecSquared(), (x*x + y*y + z*z)));
-		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 	}
 
 	template<> template<>
@@ -516,7 +516,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector3 vec3(x1,y1,z1),vec3a(x2,y2,z2);
 		val1 = dist_vec(vec3,vec3a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("1:dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec3,vec3a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
diff --git a/indra/test/v4color_tut.cpp b/indra/test/v4color_tut.cpp
index b9142e73f..ee95a1bae 100644
--- a/indra/test/v4color_tut.cpp
+++ b/indra/test/v4color_tut.cpp
@@ -161,7 +161,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}
 
 	template<> template<>
@@ -170,7 +170,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		F32 vecMag = llcolor4.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		F32 val1 = r * oomag, val2 = g * oomag,	val3 = b * oomag;
 		ensure("1:normVec failed ", (is_approx_equal(val1, llcolor4.mV[0]) && is_approx_equal(val2, llcolor4.mV[1]) && is_approx_equal(val3, llcolor4.mV[2]) && is_approx_equal(vecMag, mag)));
diff --git a/indra/test/v4coloru_tut.cpp b/indra/test/v4coloru_tut.cpp
index 1630b4ede..4b8070e30 100644
--- a/indra/test/v4coloru_tut.cpp
+++ b/indra/test/v4coloru_tut.cpp
@@ -140,7 +140,7 @@ namespace tut
 		U8 r = 0x12, g = 0xFF, b = 0xAF;
 		LLColor4U llcolor4u(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4u.magVecSquared(), (F32)(r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}
 
 	template<> template<>
diff --git a/indra/test/v4math_tut.cpp b/indra/test/v4math_tut.cpp
index cc37be356..c529b17c2 100644
--- a/indra/test/v4math_tut.cpp
+++ b/indra/test/v4math_tut.cpp
@@ -102,7 +102,7 @@ namespace tut
 	{
 		F32 x = 10.f, y = -2.3f, z = -.023f;
 		LLVector4 vec4(x,y,z);
-		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 		ensure("magVecSquared:Fail ", is_approx_equal(vec4.magVecSquared(), (x*x + y*y + z*z)));
 	}
 
@@ -343,7 +343,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector4 vec4(x1,y1,z1),vec4a(x2,y2,z2);
 		val1 = dist_vec(vec4,vec4a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec4,vec4a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));