Files
DNF_DEV/source/squirrel/sqdbg/str.h
2025-10-23 15:21:33 +08:00

1483 lines
28 KiB
C++

//-----------------------------------------------------------------------
// github.com/samisalreadytaken/sqdbg
//-----------------------------------------------------------------------
//
#ifndef SQDBG_STRING_H
#define SQDBG_STRING_H
#define STRLEN(s) (sizeof(s) - 1)
#define FMT_UINT32_LEN 10 // 4294967295
#define FMT_PTR_LEN ( (int)sizeof(void*) * 2 + 2 )
#ifdef _SQ64
#define FMT_INT_LEN 20 // -9223372036854775808
#define FMT_OCT_LEN 23 // 01777777777777777777777
#if defined(_WIN32) || SQUIRREL_VERSION_NUMBER > 223
#define FMT_INT "%lld"
#else
#define FMT_INT "%ld"
#endif
#else
#define FMT_INT_LEN 11 // -2147483648
#define FMT_OCT_LEN 12 // 017777777777
#define FMT_INT "%d"
#endif
#ifdef SQUNICODE
#define FMT_STR "%ls"
#define FMT_CSTR "%hs"
#define FMT_VCSTR "%.*hs"
#define FMT_VSTR "%.*ls"
#else
#define FMT_STR "%s"
#define FMT_CSTR "%s"
#define FMT_VCSTR "%.*s"
#define FMT_VSTR "%.*s"
#endif
#ifdef SQUSEDOUBLE
#define FMT_FLT "%lf"
#define FMT_FLT_LEN ( 1 + DBL_MAX_10_EXP + 1 + 1 + FLT_DIG )
#else
#define FMT_FLT "%f"
#define FMT_FLT_LEN ( 1 + FLT_MAX_10_EXP + 1 + 1 + FLT_DIG )
#endif
struct string_t;
#ifndef SQUNICODE
typedef string_t sqstring_t;
#else
struct sqstring_t;
#endif
struct stringbufbase_t;
template < int BUFSIZE > struct stringbuf_t;
typedef stringbufbase_t stringbufext_t;
template < typename C, typename I >
int printint( C *buf, int size, I value );
template < bool padding = true, bool prefix = true, bool uppercase = true, typename C, typename I >
int printhex( C *buf, int size, I value );
template < typename C, typename I >
inline int printoct( C *buf, int size, I value );
template < typename I >
bool atoi( string_t str, I *out );
template < typename I >
bool atox( string_t str, I *out );
template < typename I >
bool atoo( string_t str, I *out );
template < typename I >
struct _as_unsigned { typedef I T; };
template <>
struct _as_unsigned< int > { typedef unsigned int T; };
#ifdef _SQ64
template <>
struct _as_unsigned< int64_t > { typedef uint64_t T; };
#endif
#define as_unsigned_type( I ) typename _as_unsigned<I>::T
#define cast_unsigned( I, v ) (as_unsigned_type(I)(v))
#define IS_UNSIGNED( I ) ((I)0 < (I)-1)
#define _isdigit( c ) \
IN_RANGE_CHAR( c, '0', '9' )
#define _isxdigit( c ) \
( IN_RANGE_CHAR( c, '0', '9' ) || \
IN_RANGE_CHAR( c, 'A', 'F' ) || \
IN_RANGE_CHAR( c, 'a', 'f' ) )
#define _isalpha( c ) \
( IN_RANGE_CHAR( c, 'A', 'Z' ) || IN_RANGE_CHAR( c, 'a', 'z' ) )
#define _isalnum( c ) \
( _isalpha(c) || _isdigit(c) )
#define IN_RANGE(c, min, max) \
((uint32_t)((uint32_t)(c) - (uint32_t)(min)) <= (uint32_t)((max)-(min)))
#define IN_RANGE_CHAR(c, min, max) \
((unsigned char)((unsigned char)(c) - (unsigned char)(min)) <= (unsigned char)((max)-(min)))
#define UTF16_NONCHAR(cp) IN_RANGE(cp, 0xFDD0, 0xFDEF)
#define UTF_NONCHAR(cp) ( ( (cp) & 0xFFFE ) == 0xFFFE )
// [0xD800, 0xDFFF]
#define UTF_SURROGATE(cp) ( ( (cp) & 0xFFFFF800 ) == 0x0000D800 )
// [0xD800, 0xDBFF]
#define UTF_SURROGATE_LEAD(cp) ( ( (cp) & 0xFFFFFC00 ) == 0x0000D800 )
// [0xDC00, 0xDFFF]
#define UTF_SURROGATE_TRAIL(cp) ( ( (cp) & 0xFFFFFC00 ) == 0x0000DC00 )
// 10xxxxxx
#define UTF8_TRAIL(c) ( ( (c) & 0xC0 ) == 0x80 )
// 110xxxxx 10xxxxxx
#define UTF8_2_LEAD(c) ( ( (c) & 0xE0 ) == 0xC0 )
// 1110xxxx 10xxxxxx 10xxxxxx
#define UTF8_3_LEAD(c) ( ( (c) & 0xF0 ) == 0xE0 )
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
#define UTF8_4_LEAD(c) ( ( (c) & 0xF8 ) == 0xF0 )
#define UTF8_2(len, c0, src) \
( (len) > 1 && UTF8_TRAIL((src)[1]) )
#define UTF8_3(len, c0, src) \
( (len) > 2 && UTF8_3_ISVALID(c0, (src)[1]) && UTF8_TRAIL((src)[2]) )
#define UTF8_4(len, c0, src) \
( (len) > 3 && UTF8_4_ISVALID(c0, (src)[1]) && UTF8_TRAIL((src)[2]) && UTF8_TRAIL((src)[3]) )
#define UTF8_3_ISVALID(c0, c1) \
( ( c0 == 0xE0 && IN_RANGE(c1, 0xA0, 0xBF) ) || \
( c0 == 0xED && IN_RANGE(c1, 0x80, 0x9F) ) || \
( UTF8_TRAIL(c1) && (IN_RANGE(c0, 0xE1, 0xEC) || IN_RANGE(c0, 0xEE, 0xEF)) ) )
#define UTF8_4_ISVALID(c0, c1) \
( ( c0 == 0xF0 && IN_RANGE(c1, 0x90, 0xBF) ) || \
( c0 == 0xF4 && IN_RANGE(c1, 0x80, 0x8F) ) || \
( UTF8_TRAIL(c1) && IN_RANGE(c0, 0xF1, 0xF3) ) )
#define UTF32_FROM_UTF8_2(c0, c1) \
( ( ( (c0) & 0x1F ) << 6 ) | \
( (c1) & 0x3F ) )
#define UTF32_FROM_UTF8_3(c0, c1, c2) \
( ( ( (c0) & 0x0F ) << 12 ) | \
( ( (c1) & 0x3F ) << 6 ) | \
( (c2) & 0x3F ) )
#define UTF32_FROM_UTF8_4(c0, c1, c2, c3) \
( ( ( (c0) & 0x07 ) << 18 ) | \
( ( (c1) & 0x3F ) << 12 ) | \
( ( (c2) & 0x3F ) << 6 ) | \
( (c3) & 0x3F ) )
#define UTF32_FROM_UTF16_SURROGATE(lead, trail) \
( ( ( ( (lead) & 0x3FF ) << 10 ) | ( (trail) & 0x3FF ) ) + 0x10000 )
#define UTF16_SURROGATE_FROM_UTF32(dst, cp) \
(dst)[0] = 0xD800 | ( (cp - 0x10000) >> 10 ); \
(dst)[1] = 0xDC00 | ( (cp - 0x10000) & 0x3FF );
#define UTF8_2_FROM_UTF32(mbc, cp) \
(mbc)[0] = 0xC0 | ( (cp) >> 6 ); \
(mbc)[1] = 0x80 | ( (cp) & 0x3F );
#define UTF8_3_FROM_UTF32(mbc, cp) \
(mbc)[0] = 0xE0 | ( (cp) >> 12 ); \
(mbc)[1] = 0x80 | ( ( (cp) >> 6 ) & 0x3F ); \
(mbc)[2] = 0x80 | ( (cp) & 0x3F ); \
#define UTF8_4_FROM_UTF32(mbc, cp) \
(mbc)[0] = 0xF0 | ( (cp) >> 18 ); \
(mbc)[1] = 0x80 | ( ( (cp) >> 12 ) & 0x3F ); \
(mbc)[2] = 0x80 | ( ( (cp) >> 6 ) & 0x3F ); \
(mbc)[3] = 0x80 | ( (cp) & 0x3F );
typedef enum
{
kUTFNoEscape = 0,
// Use 'x' as hex escape, escape invalid unicode
kUTFEscape = 1,
// Same as kUTFEscape, escape all backslashes as well, quote the whole input
kUTFEscapeQuoted,
// Use 'u' as hex escape
kUTFEscapeJSON,
} EUTFEscape;
inline int IsValidUTF8( unsigned char *src, unsigned int srclen );
#ifdef SQUNICODE
inline int IsValidUnicode( const SQChar *src, unsigned int srclen );
template < bool undoEscape = false >
inline unsigned int UTF8ToSQUnicode( SQChar *dst, unsigned int destSize, const char *src, unsigned int srclen );
template < EUTFEscape escape = kUTFNoEscape >
inline unsigned int SQUnicodeToUTF8( char *dst, unsigned int destSize, const SQChar *src, unsigned int srclen );
// Returns code unit count
template < bool undoEscape = false >
inline unsigned int SQUnicodeLength( const char *src, unsigned int srclen )
{
return UTF8ToSQUnicode< undoEscape >( NULL, 0, src, srclen );
}
// Returns byte length
template < EUTFEscape escape = kUTFNoEscape >
inline unsigned int UTF8Length( const SQChar *src, unsigned int srclen )
{
return SQUnicodeToUTF8< escape >( NULL, 0, src, srclen );
}
#endif
inline unsigned int scstombslen( const SQChar *src, unsigned int srclen )
{
#ifdef SQUNICODE
return UTF8Length( src, srclen );
#else
(void)src;
return srclen;
#endif
}
inline unsigned int scstombs( char *dst, unsigned int destSize, const SQChar *src, unsigned int srclen )
{
#ifdef SQUNICODE
return SQUnicodeToUTF8( dst, destSize, src, srclen );
#else
unsigned int len = min( srclen, destSize );
memcpy( dst, src, len );
return len;
#endif
}
#define STR_EXPAND(s) (s).len, (s).ptr
struct string_t
{
char *ptr = nullptr;
unsigned int len = 0;
string_t() {}
string_t( const char *src, unsigned int size ) :
ptr((char*)src),
len(size)
{
}
string_t( const stringbufbase_t &src );
#ifndef SQUNICODE
string_t( SQString *src ) :
ptr(src->_val),
len(src->_len)
{
}
#endif
template < int SIZE >
string_t( const char (&src)[SIZE] ) :
ptr((char*)src),
len(SIZE-1)
{
// input wasn't a string literal,
// call ( src, size ) constructor instead
Assert( strlen(src) == len );
}
template < int SIZE >
bool StartsWith( const char (&other)[SIZE] ) const
{
if ( SIZE-1 <= len && *ptr == *other )
return !memcmp( ptr, other, SIZE-1 );
return false;
}
bool StartsWith( const string_t &other ) const
{
if ( other.len <= len && *ptr == *other.ptr )
return !memcmp( ptr, other.ptr, other.len );
return false;
}
template < int SIZE >
bool IsEqualTo( const char (&other)[SIZE] ) const
{
if ( SIZE-1 == len && *ptr == *other )
return !memcmp( ptr, other, SIZE-1 );
return false;
}
bool IsEqualTo( const char *other, unsigned int size ) const
{
if ( len == size && *ptr == *other )
return !memcmp( ptr, other, size );
return false;
}
bool IsEqualTo( const string_t &other ) const
{
if ( len == other.len && *ptr == *other.ptr )
return !memcmp( ptr, other.ptr, len );
return false;
}
#ifdef SQUNICODE
bool IsEqualTo( const sqstring_t &other ) const;
#else
bool IsEqualTo( const SQString *other ) const
{
if ( (SQUnsignedInteger)len == (SQUnsignedInteger)other->_len && *ptr == *other->_val )
return !memcmp( ptr, other->_val, sq_rsl(len) );
return false;
}
#endif
bool IsEmpty() const
{
return !len;
}
bool Contains( char ch ) const
{
return ( memchr( ptr, ch, len ) != NULL );
}
template < int SIZE >
void Assign( const char (&src)[SIZE] )
{
ptr = (char*)src;
len = SIZE - 1;
Assert( strlen(src) == len );
}
void Assign( const char *src, unsigned int size )
{
ptr = (char*)src;
len = size;
}
#ifndef SQUNICODE
void Assign( const SQString *src )
{
ptr = (SQChar*)src->_val;
len = src->_len;
}
#endif
private:
operator const char*();
operator char*();
string_t &operator=( const char *src );
};
struct conststring_t : string_t
{
template < int SIZE >
conststring_t( const char (&src)[SIZE] ) : string_t(src) {}
conststring_t() {}
};
#ifdef SQUNICODE
struct sqstring_t
{
SQChar *ptr;
unsigned int len;
sqstring_t() {}
sqstring_t( const SQString *src ) :
ptr((SQChar*)src->_val),
len(src->_len)
{
}
sqstring_t( const SQChar *src, unsigned int size ) :
ptr((SQChar*)src),
len(size)
{
}
template < int SIZE >
sqstring_t( const SQChar (&src)[SIZE] ) :
ptr((SQChar*)src),
len(SIZE-1)
{
Assert( scstrlen(src) == len );
}
bool StartsWith( const string_t &other ) const
{
if ( other.len <= len )
{
Assert( other.len );
unsigned int i = 0;
do
{
if ( (SQUnsignedChar)ptr[i] > 0x7E || other.ptr[i] != (char)ptr[i] )
{
// > 0x7E can be reached through completions request
// unicode identifiers are not supported, ignore them
return false;
}
}
while ( ++i < other.len );
return true;
}
return false;
}
bool IsEqualTo( const sqstring_t &other ) const
{
if ( len == other.len && *ptr == *other.ptr )
return !memcmp( ptr, other.ptr, sq_rsl(len) );
return false;
}
bool IsEqualTo( const SQString *other ) const
{
if ( (SQUnsignedInteger)len == (SQUnsignedInteger)other->_len && *ptr == *other->_val )
return !memcmp( ptr, other->_val, sq_rsl(len) );
return false;
}
bool IsEmpty() const
{
return !len;
}
template < int SIZE >
void Assign( const SQChar (&src)[SIZE] )
{
ptr = (SQChar*)src;
len = SIZE - 1;
Assert( scstrlen(src) == len );
}
void Assign( const SQChar *src, unsigned int size )
{
ptr = (SQChar*)src;
len = size;
}
void Assign( const SQString *src )
{
ptr = (SQChar*)src->_val;
len = src->_len;
}
};
#endif
struct stringbufbase_t
{
char *ptr;
unsigned int len;
const unsigned int size;
stringbufbase_t( char *src, unsigned int nSize ) :
ptr(src),
len(0),
size(nSize)
{
}
stringbufbase_t( stringbufbase_t &src ) :
ptr(src.ptr),
len(src.len),
size(src.size)
{
}
stringbufbase_t( const stringbufbase_t &src ) :
ptr(src.ptr),
len(src.len),
size(src.size)
{
}
stringbufbase_t &operator=( const stringbufbase_t & );
int BufSize()
{
return size;
}
int BytesLeft()
{
return BufSize() - len;
}
template < int SIZE >
void Puts( const char (&psz)[SIZE] )
{
int amt = min( BytesLeft(), (SIZE-1) );
memcpy( ptr + len, psz, amt );
len += amt;
}
void Puts( const string_t &str )
{
Assert( str.len < INT_MAX );
int amt = min( BytesLeft(), (int)str.len );
memcpy( ptr + len, str.ptr, amt );
len += amt;
}
#ifdef SQUNICODE
void Puts( const sqstring_t &str )
{
len += SQUnicodeToUTF8( ptr + len, BytesLeft(), str.ptr, str.len );
}
#endif
void Put( char ch )
{
Assert( len < INT_MAX );
if ( BufSize() >= (int)( len + 1 ) )
{
ptr[len++] = ch;
}
}
void Term()
{
if ( (int)len > BufSize()-1 )
len = BufSize()-1;
ptr[len] = 0;
Assert( strlen(ptr) == len );
}
template < typename I >
void PutInt( I value )
{
int space = BytesLeft();
if ( space < 1 )
return;
len += printint( ptr + len, space, value );
}
template < typename I >
void PutHex( I value, bool padding = true )
{
STATIC_ASSERT( IS_UNSIGNED( I ) );
int space = BytesLeft();
if ( space < 3 )
return;
if ( padding )
{
len += printhex< true >( ptr + len, space, value );
}
else
{
len += printhex< false >( ptr + len, space, value );
}
}
};
string_t::string_t( const stringbufbase_t &src ) :
ptr(src.ptr),
len(src.len)
{
}
#ifdef SQUNICODE
bool string_t::IsEqualTo( const sqstring_t &other ) const
{
if ( len == other.len )
{
Assert( len );
unsigned int i = 0;
do
{
// Used for comparing against locals and outers,
// implement unicode conversion if locals can have unicode characters
if ( (SQUnsignedChar)other.ptr[i] > 0x7E || (char)other.ptr[i] != ptr[i] )
{
AssertMsg( (SQUnsignedChar)other.ptr[i] <= 0x7E, "not implemented" );
return false;
}
}
while ( ++i < len );
return true;
}
return false;
}
#endif
template < int BUFSIZE >
struct stringbuf_t : stringbufbase_t
{
char ptr[BUFSIZE];
stringbuf_t() : stringbufbase_t( ptr, BUFSIZE )
{
}
};
template < int BASE = 10, typename I >
inline int countdigits( I input )
{
int i = 0;
do
{
input /= BASE;
i++;
}
while ( input );
return i;
}
template < typename C, typename I >
inline int printint( C *buf, int size, I value )
{
Assert( buf );
Assert( size > 0 );
if ( !value )
{
if ( size >= 1 )
{
buf[0] = '0';
return 1;
}
return 0;
}
bool neg;
int len;
if ( value >= 0 )
{
len = countdigits( value );
neg = false;
}
else
{
value = -value;
len = countdigits( value ) + 1;
buf[0] = '-';
neg = ( value < 0 ); // value == INT_MIN
}
if ( len > size )
len = size;
int i = len - 1;
do
{
C c = value % 10;
value /= 10;
buf[i--] = !neg ? ( '0' + c ) : ( '0' - c );
}
while ( value );
return len;
}
template < bool padding, bool prefix, bool uppercase, typename C, typename I >
inline int printhex( C *buf, int size, I value )
{
STATIC_ASSERT( IS_UNSIGNED( as_unsigned_type( I ) ) );
Assert( buf );
Assert( size > 0 );
int len = ( prefix ? 2 : 0 ) + ( padding ? sizeof(I) * 2 : countdigits<16>( cast_unsigned( I, value ) ) );
if ( len > size )
len = size;
int i = len - 1;
do
{
C c = cast_unsigned( I, value ) & 0xf;
*(as_unsigned_type(I)*)&value >>= 4;
buf[i--] = c + ( ( c < 10 ) ? '0' : ( ( uppercase ? 'A' : 'a' ) - 10 ) );
}
while ( value );
if ( padding )
{
while ( i >= ( prefix ? 2 : 0 ) )
buf[i--] = '0';
}
if ( prefix )
{
if ( i >= 0 )
{
buf[i--] = 'x';
if ( i == 0 )
buf[i--] = '0';
}
}
Assert( i == -1 );
return len;
}
template < typename C, typename I >
inline int printoct( C *buf, int size, I value )
{
STATIC_ASSERT( IS_UNSIGNED( I ) );
Assert( buf );
Assert( size > 0 );
int len = countdigits<8>( value ) + 1;
if ( len > size )
len = size;
int i = len - 1;
do
{
C c = value & 0x7;
value >>= 3;
buf[i--] = '0' + c;
}
while ( value );
if ( i >= 0 )
buf[i--] = '0';
Assert( i == -1 );
return len;
}
template < typename I >
inline bool atoi( string_t str, I *out )
{
Assert( str.ptr && str.len > 0 );
I val = 0;
bool neg = ( *str.ptr == '-' );
if ( neg )
{
str.ptr++;
str.len--;
}
for ( ; str.len--; str.ptr++ )
{
unsigned char ch = *str.ptr;
if ( IN_RANGE_CHAR( ch, '0', '9' ) )
{
val *= 10;
val += ch - '0';
}
else
{
*out = 0;
return false;
}
}
*out = !neg ? val : -val;
return true;
}
template < typename I >
inline bool atox( string_t str, I *out )
{
if ( str.StartsWith("0x") )
{
str.ptr += 2;
str.len -= 2;
}
I val = 0;
for ( ; str.len--; str.ptr++ )
{
unsigned char ch = *str.ptr;
if ( IN_RANGE_CHAR( ch, '0', '9' ) )
{
val <<= 4;
val += ch - '0';
}
else if ( IN_RANGE_CHAR( ch, 'A', 'F' ) )
{
val <<= 4;
val += ch - 'A' + 10;
}
else if ( IN_RANGE_CHAR( ch, 'a', 'f' ) )
{
val <<= 4;
val += ch - 'a' + 10;
}
else
{
*out = 0;
return false;
}
}
*out = val;
return true;
}
template < typename I >
inline bool atoo( string_t str, I *out )
{
I val = 0;
for ( ; str.len--; str.ptr++ )
{
unsigned char ch = *str.ptr;
if ( IN_RANGE_CHAR( ch, '0', '7' ) )
{
val <<= 3;
val += ch - '0';
}
else
{
*out = 0;
return false;
}
}
*out = val;
return true;
}
template < typename I >
inline bool strtoint( string_t str, I *out )
{
if ( !str.StartsWith("0x") )
{
return atoi( str, out );
}
else
{
return atox( str, out );
}
}
// Returns byte count of valid UTF8 sequences
// Returns 0 for control characters
// Returns 0 for noncharacters
inline int IsValidUTF8( unsigned char *src, unsigned int srclen )
{
unsigned char cp = src[0];
if ( cp <= 0x7E )
{
if ( cp >= 0x20 )
return 1;
return 0;
}
else if ( IN_RANGE_CHAR( cp, 0xC2, 0xF4 ) )
{
if ( UTF8_2_LEAD(cp) )
{
if ( UTF8_2( srclen, cp, src ) )
{
return 2;
}
}
else if ( UTF8_3_LEAD(cp) )
{
if ( UTF8_3( srclen, cp, src ) )
{
return 3;
}
}
else if ( UTF8_4_LEAD(cp) )
{
if ( UTF8_4( srclen, cp, src ) )
{
return 4;
}
}
}
// Look behind
// Unused, there is no condition where strings aren't processed linearly from the start
#if 0
else if ( UTF8_TRAIL(cp) )
{
int lim = srcindex - 3;
if ( lim < 0 )
lim = 0;
while ( srcindex-- > lim )
{
cp = *(--src);
srclen++;
if ( !UTF8_TRAIL(cp) )
{
if ( IN_RANGE_CHAR( cp, 0xC2, 0xF4 ) )
goto check;
return 0;
}
}
}
#endif
// else [0x7F, 0xC2) & (0xF4, 0xFF]
return 0;
}
#ifdef SQUNICODE
// Returns code unit count for valid unicode
// Returns 0 for control characters
// Returns -1 if the invalid code unit is larger than 1 byte
// Noncharacters and private use areas are valid
inline int IsValidUnicode( const SQChar *src, unsigned int srclen )
{
uint32_t cp = (uint32_t)src[0];
if ( cp <= 0x7E )
{
if ( cp >= 0x20 )
return 1;
return 0;
}
else if ( cp < 0xA0 )
{
return 0;
}
if ( cp <= 0xFFFF )
{
if ( UTF_SURROGATE(cp) )
{
if ( srclen > 1 && UTF_SURROGATE_LEAD(cp) && UTF_SURROGATE_TRAIL(src[1]) )
{
return 2;
}
return -1;
}
return 1;
}
else if ( cp <= 0x10FFFF )
{
return 2;
}
else
{
return -1;
}
}
template < bool undoEscape >
inline unsigned int UTF8ToSQUnicode( SQChar *dst, unsigned int destSize, const char *src, unsigned int srclen )
{
uint32_t cp;
const char *end = src + srclen;
unsigned int count = 0;
for ( ; src < end; src++ )
{
cp = (uint32_t)((unsigned char*)src)[0];
if ( cp <= 0x7E )
{
if ( undoEscape )
{
if ( cp == '\\' && src + 1 < end )
{
switch ( ((unsigned char*)src)[1] )
{
case '\\': src++; break;
case '\"': cp = '\"'; src++; break;
case '\'': cp = '\''; src++; break;
case 'a': cp = '\a'; src++; break;
case 'b': cp = '\b'; src++; break;
case 'f': cp = '\f'; src++; break;
case 'n': cp = '\n'; src++; break;
case 'r': cp = '\r'; src++; break;
case 't': cp = '\t'; src++; break;
case 'v': cp = '\v'; src++; break;
case 'x':
{
if ( src + sizeof(SQChar) * 2 + 1 < end )
{
atox( { src + 2, sizeof(SQChar) * 2 }, &cp );
src += sizeof(SQChar) * 2 + 1;
}
break;
}
case 'u':
{
if ( src + sizeof(uint16_t) * 2 + 1 < end )
{
atox( { src + 2, sizeof(uint16_t) * 2 }, &cp );
src += sizeof(uint16_t) * 2 + 1;
}
break;
}
}
}
}
goto xffff;
}
else if ( IN_RANGE( cp, 0xC2, 0xF4 ) )
{
if ( UTF8_2_LEAD(cp) )
{
if ( UTF8_2( end - src, cp, (unsigned char*)src ) )
{
cp = UTF32_FROM_UTF8_2( cp, src[1] );
src += 1;
goto xffff;
}
}
else if ( UTF8_3_LEAD(cp) )
{
if ( UTF8_3( end - src, cp, (unsigned char*)src ) )
{
cp = UTF32_FROM_UTF8_3( cp, src[1], src[2] );
src += 2;
goto xffff;
}
}
else if ( UTF8_4_LEAD(cp) )
{
if ( UTF8_4( end - src, cp, (unsigned char*)src ) )
{
cp = UTF32_FROM_UTF8_4( cp, src[1], src[2], src[3] );
src += 3;
goto supplementary;
}
}
goto xffff;
}
else // [0x7F, 0xC2) & (0xF4, 0xFF]
{
goto xffff;
}
xffff:
#if WCHAR_SIZE == 4
supplementary:
#endif
if ( dst )
{
if ( sizeof(SQChar) <= destSize )
{
*dst++ = (SQChar)cp;
destSize -= sizeof(SQChar);
count += 1;
}
else
{
// out of space
break;
}
}
else
{
count += 1;
}
continue;
#if WCHAR_SIZE == 2
supplementary:
if ( dst )
{
if ( sizeof(SQChar) * 2 <= destSize )
{
UTF16_SURROGATE_FROM_UTF32( dst, cp );
dst += 2;
destSize -= sizeof(SQChar) * 2;
count += 2;
}
else
{
cp = 0xFFFD;
goto xffff;
}
}
else
{
count += 2;
}
continue;
#endif
}
return count;
}
// SQUnicode can be UTF16 or UTF32
template < EUTFEscape escape >
inline unsigned int SQUnicodeToUTF8( char *dst, unsigned int destSize, const SQChar *src, unsigned int srclen )
{
uint32_t cp;
const SQChar *end = src + srclen;
unsigned char mbc[ escape != 0 ?
( escape == kUTFEscapeJSON ?
6 : // "\u0000"
( escape == kUTFEscapeQuoted ?
14 : // "\\uD800\\uDC00"
// kUTFEscape
12 ) ) : // "\uD800\uDC00"
4 ];
unsigned int count = 0;
unsigned int bytes;
if ( escape == kUTFEscapeQuoted )
{
mbc[0] = '\\';
mbc[1] = '\"';
bytes = 2;
if ( dst )
{
if ( bytes <= destSize )
{
memcpy( dst, mbc, bytes );
dst += bytes;
destSize -= bytes;
count += bytes;
}
else
{
// out of space
return count;
}
}
else
{
count += bytes;
}
}
for ( ; src < end; src++ )
{
cp = (uint32_t)src[0];
if ( cp <= 0xFF )
{
if ( escape )
{
bytes = 0;
switch ( cp )
{
case '\\':
case '\"':
if ( escape == kUTFEscapeQuoted )
{
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
}
mbc[bytes++] = '\\';
mbc[bytes++] = (unsigned char)cp;
goto write;
case '\a':
if ( escape == kUTFEscapeJSON )
goto doescape;
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'a';
goto write;
case '\b':
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'b';
goto write;
case '\f':
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'f';
goto write;
case '\n':
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'n';
goto write;
case '\r':
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'r';
goto write;
case '\t':
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 't';
goto write;
case '\v':
if ( escape == kUTFEscapeJSON )
goto doescape;
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'v';
goto write;
default:
if ( !IN_RANGE_CHAR( cp, 0x20, 0x7E ) )
{
// Convert UTF8 bytes in UTF16 by default with the assumption of
// most editors using UTF8 without BOM,
// and files being likely read plain (no conversion/ISO 8859-1)
// However, this will make certain distinct SQ strings (e.g. "\xC3\xBC", "\xFC")
// indistinguishable to the client
#ifndef SQDBG_DONT_CONVERT_UTF8_BYTES_IN_UTF16
if ( IN_RANGE( cp, 0xC2, 0xF4 ) )
{
if ( UTF8_2_LEAD(cp) )
{
if ( UTF8_2( end - src, cp, src ) )
{
mbc[0] = (unsigned char)cp;
mbc[1] = (unsigned char)src[1];
bytes = 2;
src += 1;
goto write;
}
}
else if ( UTF8_3_LEAD(cp) )
{
if ( UTF8_3( end - src, cp, src ) )
{
mbc[0] = (unsigned char)cp;
mbc[1] = (unsigned char)src[1];
mbc[2] = (unsigned char)src[2];
bytes = 3;
src += 2;
goto write;
}
}
else if ( UTF8_4_LEAD(cp) )
{
if ( UTF8_4( end - src, cp, src ) )
{
mbc[0] = (unsigned char)cp;
mbc[1] = (unsigned char)src[1];
mbc[2] = (unsigned char)src[2];
mbc[3] = (unsigned char)src[3];
bytes = 4;
src += 3;
goto write;
}
}
}
#endif
if ( cp >= 0xA0 ) // [0xA0, 0xFF]
goto x7ff;
doescape:
// [0x00, 0x20) & (0x7E, 0xA0)
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
if ( escape == kUTFEscapeJSON )
{
mbc[bytes++] = 'u';
bytes += printhex< true, false >( mbc + bytes, sizeof(mbc) - bytes, (uint16_t)cp );
}
else
{
mbc[bytes++] = 'x';
bytes += printhex< true, false >( mbc + bytes, sizeof(mbc) - bytes, (SQUnsignedChar)cp );
}
goto write;
}
}
}
mbc[0] = (unsigned char)cp;
bytes = 1;
}
else if ( cp <= 0x7FF )
{
x7ff:
UTF8_2_FROM_UTF32( mbc, cp );
bytes = 2;
}
else if ( cp <= 0xFFFF )
{
if ( UTF_SURROGATE(cp) )
{
if ( src + 1 < end && UTF_SURROGATE_LEAD(cp) && UTF_SURROGATE_TRAIL(src[1]) )
{
cp = UTF32_FROM_UTF16_SURROGATE( cp, (uint32_t)src[1] );
src++;
goto supplementary;
}
if ( escape && escape != kUTFEscapeJSON )
{
bytes = 0;
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'x';
bytes = bytes + printhex< true, false >( mbc + bytes, sizeof(mbc) - bytes, (SQUnsignedChar)cp );
goto write;
}
}
UTF8_3_FROM_UTF32( mbc, cp );
bytes = 3;
}
else
{
if ( cp > 0x10FFFF && escape && escape != kUTFEscapeJSON )
{
// "\\uD800\\uDC00"
uint16_t s[2];
UTF16_SURROGATE_FROM_UTF32( s, cp );
bytes = 0;
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'u';
bytes += printhex< true, false >( mbc + bytes, sizeof(mbc) - bytes, s[0] );
if ( escape == kUTFEscapeQuoted )
mbc[bytes++] = '\\';
mbc[bytes++] = '\\';
mbc[bytes++] = 'u';
bytes += printhex< true, false >( mbc + bytes, sizeof(mbc) - bytes, s[1] );
goto write;
}
supplementary:
UTF8_4_FROM_UTF32( mbc, cp );
bytes = 4;
}
write:
if ( dst )
{
if ( bytes <= destSize )
{
memcpy( dst, mbc, bytes );
dst += bytes;
destSize -= bytes;
count += bytes;
}
else
{
// out of space
break;
}
}
else
{
count += bytes;
}
}
if ( escape == kUTFEscapeQuoted )
{
mbc[0] = '\\';
mbc[1] = '\"';
bytes = 2;
if ( dst )
{
if ( bytes <= destSize )
{
memcpy( dst, mbc, bytes );
dst += bytes;
destSize -= bytes;
count += bytes;
}
else
{
// out of space
return count;
}
}
else
{
count += bytes;
}
}
return count;
}
#endif
#if defined(SQUNICODE) && !defined(_WIN32)
// Do case insensitive comparison for ASCII characters, ignore the rest
inline int sqdbg_wcsicmp( const SQChar *s1, const SQChar *s2 )
{
for (;;)
{
SQChar c1 = *s1++;
SQChar c2 = *s2++;
if ( !c1 || !c2 )
return c1 - c2;
if ( c1 == c2 )
continue;
if ( c1 >= 'A' && c1 <= 'Z' )
c1 |= 0x20;
if ( c2 >= 'A' && c2 <= 'Z' )
c2 |= 0x20;
if ( c1 == c2 )
continue;
return c1 - c2;
}
}
#endif
#endif // SQDBG_STRING_H