Yay they updated llstring, who's ready for a full rebuild?

This commit is contained in:
Lirusaito
2019-04-10 10:04:49 -04:00
parent 5e1a102de4
commit ed88e55e04
2 changed files with 453 additions and 12 deletions

View File

@@ -233,7 +233,7 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
{
out += cur_char;
}
i++;
++i;
}
return out;
}
@@ -493,7 +493,7 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
std::string out;
out.reserve(len);
for (S32 i = 0; i < len; i++)
for (S32 i = 0; i < len; ++i)
{
S32 n = wchar_to_utf8chars(utf32str[i], tchars);
tchars[n] = 0;
@@ -576,6 +576,78 @@ std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
}
}
// [RLVa:KB] - Checked: RLVa-2.1.0
std::string utf8str_substr(const std::string& utf8str, const S32 index, const S32 max_len)
{
if (0 == max_len)
{
return std::string();
}
if (utf8str.length() - index <= max_len)
{
return utf8str.substr(index, max_len);
}
else
{
S32 cur_char = max_len;
// If we're ASCII, we don't need to do anything
if ((U8)utf8str[index + cur_char] > 0x7f)
{
// If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
// to the first character
while (0x80 == (0xc0 & utf8str[index + cur_char]))
{
cur_char--;
// Keep moving forward until we hit the first char;
if (cur_char == 0)
{
// Make sure we don't trash memory if we've got a bogus string.
break;
}
}
}
// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
return utf8str.substr(index, cur_char);
}
}
void utf8str_split(std::list<std::string>& split_list, const std::string& utf8str, size_t maxlen, char split_token)
{
split_list.clear();
std::string::size_type lenMsg = utf8str.length(), lenIt = 0;
const char* pstrIt = utf8str.c_str(); std::string strTemp;
while (lenIt < lenMsg)
{
if (lenIt + maxlen < lenMsg)
{
// Find the last split character
const char* pstrTemp = pstrIt + maxlen;
while ( (pstrTemp > pstrIt) && (*pstrTemp != split_token) )
pstrTemp--;
if (pstrTemp > pstrIt)
strTemp = utf8str.substr(lenIt, pstrTemp - pstrIt);
else
strTemp = utf8str_substr(utf8str, lenIt, maxlen);
}
else
{
strTemp = utf8str.substr(lenIt, std::string::npos);
}
split_list.push_back(strTemp);
lenIt += strTemp.length();
pstrIt = utf8str.c_str() + lenIt;
if (*pstrIt == split_token)
lenIt++;
}
}
// [/RLVa:KB]
std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len)
{
if (0 == symbol_len)
@@ -668,6 +740,12 @@ bool LLStringOps::isHexString(const std::string& str)
}
#if LL_WINDOWS
std::string ll_convert_wide_to_string(const wchar_t* in)
{
return ll_convert_wide_to_string(in, CP_UTF8);
}
std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
{
std::string out;
@@ -705,6 +783,11 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
return out;
}
wchar_t* ll_convert_string_to_wide(const std::string& in)
{
return ll_convert_string_to_wide(in, CP_UTF8);
}
wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
{
// From review:
@@ -726,6 +809,67 @@ wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page
w_out[real_output_str_len] = 0;
return w_out;
return {&w_out[0]};
}
S32 wchartchars_to_llwchar(const std::wstring::value_type* inchars, llwchar* outchar)
{
const std::wstring::value_type* base = inchars;
std::wstring::value_type cur_char = *inchars++;
llwchar char32 = cur_char;
if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
{
// Surrogates
char32 = ((llwchar)(cur_char - 0xD800)) << 10;
cur_char = *inchars++;
char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
}
else
{
char32 = (llwchar)cur_char;
}
*outchar = char32;
return inchars - base;
}
LLWString ll_convert_wide_to_wstring(const std::wstring& in)
{
LLWString wout;
auto len = in.size();
if ((len <= 0) || in.empty()) return wout;
size_t i = 0;
// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
const std::wstring::value_type* chars16 = &(*(in.begin()));
while (i < len)
{
llwchar cur_char;
i += wchartchars_to_llwchar(chars16 + i, &cur_char);
wout += cur_char;
}
return wout;
}
std::wstring ll_convert_wstring_to_wide(const LLWString& in)
{
std::wstring out;
size_t i = 0;
while (i < in.size())
{
U32 cur_char = in[i];
if (cur_char > 0xFFFF)
{
out += (0xD7C0 + (cur_char >> 10));
out += (0xDC00 | (cur_char & 0x3FF));
}
else
{
out += cur_char;
}
i++;
}
return out;
}
std::string ll_convert_string_to_utf8_string(const std::string& in)
@@ -736,7 +880,108 @@ std::string ll_convert_string_to_utf8_string(const std::string& in)
return out_utf8;
}
#endif // LL_WINDOWS
namespace
{
void HeapFree_deleter(void* ptr)
{
// instead of LocalFree(), per https://stackoverflow.com/a/31541205
HeapFree(GetProcessHeap(), NULL, ptr);
}
} // anonymous namespace
template<>
std::wstring windows_message<std::wstring>(DWORD error)
{
// derived from https://stackoverflow.com/a/455533
wchar_t* rawptr = nullptr;
auto okay = FormatMessageW(
// use system message tables for GetLastError() codes
FORMAT_MESSAGE_FROM_SYSTEM |
// internally allocate buffer and return its pointer
FORMAT_MESSAGE_ALLOCATE_BUFFER |
// you cannot pass insertion parameters (thanks Gandalf)
FORMAT_MESSAGE_IGNORE_INSERTS |
// ignore line breaks in message definition text
FORMAT_MESSAGE_MAX_WIDTH_MASK,
NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
error, // dwMessageId
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId
(LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t*
0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
NULL); // Arguments, unused
// make a unique_ptr from rawptr so it gets cleaned up properly
std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);
if (okay && bufferptr)
{
// got the message, return it ('okay' is length in characters)
return { bufferptr.get(), okay };
}
// did not get the message, synthesize one
auto format_message_error = GetLastError();
std::wostringstream out;
out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
<< format_message_error << L")";
return out.str();
}
boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
{
auto wkey = ll_convert_string_to_wide(key);
// Take a wild guess as to how big the buffer should be.
std::vector<wchar_t> buffer(1024);
auto n = GetEnvironmentVariableW(wkey, &buffer[0], buffer.size());
// If our initial guess was too short, n will indicate the size (in
// wchar_t's) that buffer should have been, including the terminating nul.
if (n > (buffer.size() - 1))
{
// make it big enough
buffer.resize(n);
// and try again
n = GetEnvironmentVariableW(wkey, &buffer[0], buffer.size());
}
// did that (ultimately) succeed?
if (n)
{
// great, return populated boost::optional
return boost::optional<std::wstring>(&buffer[0]);
}
// not successful
auto last_error = GetLastError();
// Don't bother warning for NOT_FOUND; that's an expected case
if (last_error != ERROR_ENVVAR_NOT_FOUND)
{
LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
<< windows_message<std::string>(last_error) << LL_ENDL;
}
// return empty boost::optional
return {};
}
#else // ! LL_WINDOWS
boost::optional<std::string> llstring_getoptenv(const std::string& key)
{
auto found = getenv(key.c_str());
if (found)
{
// return populated boost::optional
return boost::optional<std::string>(found);
}
else
{
// return empty boost::optional
return {};
}
}
#endif // ! LL_WINDOWS
long LLStringOps::sPacificTimeOffset = 0;
long LLStringOps::sLocalTimeOffset = 0;

View File

@@ -27,6 +27,9 @@
#ifndef LL_LLSTRING_H
#define LL_LLSTRING_H
#include "llwin32headerslean.h"
#include <boost/optional/optional.hpp>
#include <string>
#include <cstdio>
//#include <locale>
@@ -34,8 +37,12 @@
#include <algorithm>
#include <vector>
#include <map>
#include "llsd.h"
#include "llfasttimer.h"
#include "llformat.h"
#include "llsd.h"
// [RLVa:KB] - Checked: RLVa-2.1.0
#include <list>
// [/RLVa:KB]
#if LL_LINUX || LL_SOLARIS
#include <wctype.h>
@@ -305,7 +312,7 @@ public:
static bool isValidIndex(const string_type& string, size_type i)
{
return !string.empty() && (i <= string.size());
return !string.empty() && (0 <= i) && (i <= string.size());
}
static bool contains(const string_type& string, T c, size_type i=0)
@@ -343,6 +350,19 @@ public:
const string_type& string,
const string_type& substr);
/**
* get environment string value with proper Unicode handling
* (key is always UTF-8)
* detect absence by return value == dflt
*/
static string_type getenv(const std::string& key, const string_type& dflt="");
/**
* get optional environment string value with proper Unicode handling
* (key is always UTF-8)
* detect absence by (! return value)
*/
static boost::optional<string_type> getoptenv(const std::string& key);
static void addCRLF(string_type& string);
static void removeCRLF(string_type& string);
static void removeWindowsCR(string_type& string);
@@ -503,6 +523,37 @@ LL_COMMON_API bool iswindividual(llwchar elem);
* Unicode support
*/
/// generic conversion aliases
template<typename TO, typename FROM, typename Enable=void>
struct ll_convert_impl
{
// Don't even provide a generic implementation. We specialize for every
// combination we do support.
TO operator()(const FROM& in) const;
};
// Use a function template to get the nice ll_convert<TO>(from_value) API.
template<typename TO, typename FROM>
TO ll_convert(const FROM& in)
{
return ll_convert_impl<TO, FROM>()(in);
}
// degenerate case
template<typename T>
struct ll_convert_impl<T, T>
{
T operator()(const T& in) const { return in; }
};
// specialize ll_convert_impl<TO, FROM> to return EXPR
#define ll_convert_alias(TO, FROM, EXPR) \
template<> \
struct ll_convert_impl<TO, FROM> \
{ \
TO operator()(const FROM& in) const { return EXPR; } \
}
// Make the incoming string a utf8 string. Replaces any unknown glyph
// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
// of the data may not be recovered.
@@ -510,37 +561,91 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
//
// We should never use UTF16 except when communicating with Win32!
//
// https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t
// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because
// the Windows APIs we want to call are all defined in terms of wchar_t*
// (or worse, LPCTSTR).
// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
// While there is no point coding for an ASCII-only world (! defined(UNICODE)),
// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going
// forward, we should code in terms of wchar_t and std::wstring so as to
// support either setting of /Zc:wchar_t.
// The first link above states that char can be used to hold ASCII or any
// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t
// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base:
// * char and std::string always hold UTF-8 (of which ASCII is a subset). It
// is a BUG if they are used to pass strings in any other multi-byte
// encoding.
// * wchar_t and std::wstring should be our interface to Windows wide-string
// APIs, and therefore hold UTF-16LE.
// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do
// not introduce new uses of U16 or llutf16string for string data.
// * llwchar and LLWString hold UTF-32 strings.
// * Do not introduce char16_t or std::u16string.
// * Do not introduce char32_t or std::u32string.
//
#if _WIN32 && _NATIVE_WCHAR_T_DEFINED
typedef wchar_t utf16strtype;
#else
typedef U16 utf16strtype;
#endif
typedef std::basic_string<utf16strtype> llutf16string;
#if ! defined(LL_WCHAR_T_NATIVE)
// wchar_t is identical to U16, and std::wstring is identical to llutf16string.
// Defining an ll_convert alias involving llutf16string would collide with the
// comparable preferred alias involving std::wstring. (In this scenario, if
// you pass llutf16string, it will engage the std::wstring specialization.)
#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
#else // defined(LL_WCHAR_T_NATIVE)
// wchar_t is a distinct native type, so llutf16string is also a distinct
// type, and there IS a point to converting separately to/from llutf16string.
// (But why? Windows APIs are still defined in terms of wchar_t, and
// in this scenario llutf16string won't work for them!)
#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
#if LL_WINDOWS
// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact,
// wchar_t is native. Everywhere but Windows, we use it for llwchar (see
// stdtypes.h). That makes LLWString identical to std::wstring, so these
// aliases for std::wstring would collide with those for LLWString. Only
// define on Windows, where converting between std::wstring and llutf16string
// means copying chars.
ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end()));
ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end()));
#endif // LL_WINDOWS
#endif // defined(LL_WCHAR_T_NATIVE)
LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
// Same function, better name. JC
inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
// best name of all
ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));
//
LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
#if LL_WINDOWS
inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
@@ -575,6 +680,11 @@ LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wst
*/
LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len);
// [RLVa:KB] - Checked: RLVa-2.1.0
LL_COMMON_API std::string utf8str_substr(const std::string& utf8str, const S32 index, const S32 max_len);
LL_COMMON_API void utf8str_split(std::list<std::string>& split_list, const std::string& utf8str, size_t maxlen, char split_token);
// [/RLVa:KB]
LL_COMMON_API std::string utf8str_trim(const std::string& utf8str);
LL_COMMON_API S32 utf8str_compare_insensitive(
@@ -623,22 +733,77 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
* This replaces the unsafe W2A macro from ATL.
*/
LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
{
return ll_convert_wide_to_string(in.c_str(), code_page);
}
inline std::string ll_convert_wide_to_string(const std::wstring& in)
{
return ll_convert_wide_to_string(in.c_str());
}
ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
/**
* Converts a string to wide string.
*
* It will allocate memory for result string with "new []". Don't forget to release it with "delete []".
*/
LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page);
LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in,
unsigned int code_page);
LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in);
// default CP_UTF8
ll_convert_alias(wchar_t*, std::string, ll_convert_string_to_wide(in));
/**
* Converts incoming string into urf8 string
* Convert a Windows wide string to our LLWString
*/
LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
/**
* Convert LLWString to Windows wide string
*/
LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
/**
* Converts incoming string into utf8 string
*
*/
LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
/// Get Windows message string for passed GetLastError() code
// VS 2013 doesn't let us forward-declare this template, which is what we
// started with, so the implementation could reference the specialization we
// haven't yet declared. Somewhat weirdly, just stating the generic
// implementation in terms of the specialization works, even in this order...
// the general case is just a conversion from the sole implementation
// Microsoft says DWORD is a typedef for unsigned long
// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
// so rather than drag windows.h into everybody's include space...
template<typename STRING>
STRING windows_message(unsigned long error)
{
return ll_convert<STRING>(windows_message<std::wstring>(error));
}
/// There's only one real implementation
template<>
LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error);
/// Get Windows message string, implicitly calling GetLastError()
template<typename STRING>
STRING windows_message() { return windows_message<STRING>(GetLastError()); }
//@}
#endif // LL_WINDOWS
LL_COMMON_API boost::optional<std::wstring> llstring_getoptenv(const std::string& key);
#else // ! LL_WINDOWS
LL_COMMON_API boost::optional<std::string> llstring_getoptenv(const std::string& key);
#endif // ! LL_WINDOWS
/**
* Many of the 'strip' and 'replace' methods of LLStringUtilBase need
@@ -1612,6 +1777,37 @@ bool LLStringUtilBase<T>::endsWith(
return (idx == (string.size() - substr.size()));
}
// static
template<class T>
auto LLStringUtilBase<T>::getoptenv(const std::string& key) -> boost::optional<string_type>
{
auto found(llstring_getoptenv(key));
if (found)
{
// return populated boost::optional
return { ll_convert<string_type>(*found) };
}
else
{
// empty boost::optional
return {};
}
}
// static
template<class T>
auto LLStringUtilBase<T>::getenv(const std::string& key, const string_type& dflt) -> string_type
{
auto found(getoptenv(key));
if (found)
{
return *found;
}
else
{
return dflt;
}
}
template<class T>
BOOL LLStringUtilBase<T>::convertToBOOL(const string_type& string, BOOL& value)