RingRacers/src/core/string.cpp

1127 lines
21 KiB
C++

// DR. ROBOTNIK'S RING RACERS
//-----------------------------------------------------------------------------
// Copyright (C) 2025 by Ronald "Eidolon" Kinard
// Copyright (C) 2025 by Kart Krew
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
#include "string.h"
#include "fmt/format.h"
#include <cstring>
#include <initializer_list>
#include <stdexcept>
#include <string>
#include <string_view>
namespace srb2
{
String::String(const String&) = default;
String::String(String&&) noexcept = default;
String::~String() = default;
String& String::operator=(const String&) = default;
String& String::operator=(String&&) noexcept = default;
String::String(const char* rhs) : String(std::string_view { rhs })
{}
String::String(const char* rhs, size_t len) : String(std::string_view { rhs, len })
{}
String::String(const std::string& rhs) : String(std::string_view { rhs })
{}
String::String(std::string_view view) : String()
{
append(view);
}
String::operator std::string() const
{
std::string_view view = *this;
return std::string(view);
}
String::operator std::string_view() const
{
return std::string_view((const char*)data(), size());
}
uint32_t String::size() const noexcept
{
if (data_.empty())
{
return 0;
}
return data_.size() - 1;
}
static const char* kEmptyString = "";
const char* String::c_str() const
{
if (data_.empty())
{
return kEmptyString;
}
return reinterpret_cast<const char*>(data_.data());
}
void String::reserve(size_type capacity)
{
if (capacity == 0)
{
data_.reserve(0);
return;
}
data_.reserve(capacity + 1);
}
uint8_t* String::begin() noexcept
{
if (data_.empty())
{
return nullptr;
}
return data();
}
uint8_t* String::end() noexcept
{
if (data_.empty())
{
return nullptr;
}
return data() + size();
}
const uint8_t* String::cbegin() const noexcept
{
if (data_.empty())
{
return nullptr;
}
return data();
}
const uint8_t* String::cend() const noexcept
{
if (data_.empty())
{
return nullptr;
}
return data() + size();
}
uint8_t& String::at(size_type i)
{
if (i >= size())
{
throw std::out_of_range("string byte index out of bounds");
}
return data_.at(i);
}
const uint8_t& String::at(size_type i) const
{
if (i >= size())
{
throw std::out_of_range("string byte index out of bounds");
}
return data_.at(i);
}
String& String::insert(size_type index, size_type count, uint8_t ch)
{
if (index > size())
{
throw std::out_of_range("string byte index out of bounds");
}
data_.insert(data_.begin() + index, count, ch);
return *this;
}
String& String::insert(size_type index, const char* s)
{
return insert(index, s, (size_type)std::strlen(s));
}
String& String::insert(size_type index, const char* s, size_type count)
{
if (index > size())
{
throw std::out_of_range("string byte index out of bounds");
}
if (!empty())
{
// remove null byte
data_.pop_back();
}
data_.insert(data_.begin() + index, s, s + count);
if (data_.size() > 0)
{
data_.push_back(0);
}
return *this;
}
String& String::insert(size_type index, std::string_view str)
{
if (str.empty()) return *this;
return insert(index, &*str.begin(), (size_type)str.size());
}
String& String::insert(size_type index, std::string_view str, size_t s_index, size_t count)
{
if (str.empty()) return *this;
if (s_index > str.size())
{
throw std::out_of_range("s_index > str.size()");
}
return insert(index, str.substr(s_index, std::max(str.size() - s_index, count)));
}
String::iterator String::insert(const_iterator pos, uint8_t ch)
{
if (pos < cbegin() || pos > cend())
{
throw std::out_of_range("insert iterator out of bounds");
}
return data_.insert(pos, ch);
}
String::iterator String::insert(const_iterator pos, size_type count, uint8_t ch)
{
if (pos < cbegin() || pos > cend())
{
throw std::out_of_range("insert iterator out of bounds");
}
if (!empty())
{
data_.pop_back();
}
for (size_type i = 0; i < count; i++)
{
data_.insert(pos, ch);
}
if (data_.size() > 0)
{
data_.push_back(0);
}
return const_cast<iterator>(pos);
}
String& String::erase(size_type index, size_type count)
{
if (index + count >= size())
{
throw std::out_of_range("string byte index out of bounds");
}
const_iterator first = begin() + index;
const_iterator last = first + count;
data_.erase(first, last);
if (data_.size() == 1)
{
data_.pop_back();
}
return *this;
}
String::iterator String::erase(const_iterator position)
{
return data_.erase(position);
}
String::iterator String::erase(const_iterator first, const_iterator last)
{
return data_.erase(first, last);
}
void String::push_back(uint8_t v)
{
if (data_.empty())
{
data_.push_back(v);
data_.push_back(0);
return;
}
data_[data_.size() - 1] = v;
data_.push_back(0);
}
void String::pop_back()
{
data_.pop_back();
if (data_.size() == 1)
{
data_.pop_back();
}
else
{
data_[data_.size() - 1] = 0;
}
}
String& String::append(size_type count, uint8_t ch)
{
if (count == 0)
{
return *this;
}
if (!data_.empty())
{
data_.pop_back();
}
for (size_type i = 0; i < count; i++)
{
data_.push_back(ch);
}
data_.push_back(0);
return *this;
}
String& String::append(const char* s, size_type count)
{
insert(size(), s, count);
return *this;
}
String& String::append(const char* s)
{
insert(size(), s);
return *this;
}
String& String::append(std::string_view str)
{
insert(size(), str);
return *this;
}
String& String::append(std::string_view str, size_type pos, size_type count)
{
insert(size(), str, pos, count);
return *this;
}
String& String::operator+=(std::string_view r)
{
insert(size(), r);
return *this;
}
String& String::operator+=(const char* r)
{
insert(size(), r);
return *this;
}
String& String::operator+=(uint8_t r)
{
push_back(r);
return *this;
}
String& String::operator+=(std::initializer_list<uint8_t> r)
{
append(r.begin(), r.end());
return *this;
}
String& String::replace(size_type pos, size_type count, std::string_view str)
{
return replace(pos, count, str, 0, str.size());
}
String& String::replace(const_iterator first, const_iterator last, std::string_view str)
{
if (first < begin() || last > end() || first + str.size() > end())
{
throw std::out_of_range("string replacement range out of bounds");
}
size_type index = &*first - data_.data();
size_type count = last - first;
return replace(index, count, str);
}
String& String::replace(size_type pos, size_type count, std::string_view str, size_t pos2, size_t count2)
{
if (pos >= size())
{
throw std::out_of_range("string replacement range out of bounds");
}
if (pos2 >= str.size())
{
throw std::out_of_range("string replacement string_view range out of bounds");
}
erase(pos, count);
insert(pos, str, pos2, count2);
return *this;
}
String& String::replace(size_type pos, size_type count, const char* cstr, size_type count2)
{
size_t len = std::strlen(cstr);
return replace(pos, count, std::string_view(cstr, len), count2);
}
String& String::replace(const_iterator first, const_iterator last, const char* cstr, size_type count2)
{
size_type index = first - data_.data();
size_type count = last - first;
return replace(index, count, cstr, count2);
}
String& String::replace(size_type pos, size_type count, const char* cstr)
{
size_t len = std::strlen(cstr);
return replace(pos, count, std::string_view(cstr, len));
}
String& String::replace(const_iterator first, const_iterator last, const char* cstr)
{
size_type index = first - data_.data();
size_type count = last - first;
return replace(index, count, cstr);
}
String& String::replace(const_iterator first, const_iterator last, uint8_t ch)
{
if (first < begin() || last > end())
{
throw std::out_of_range("string iterators out of range");
}
for (; first != last; first++)
{
*const_cast<iterator>(first) = ch;
}
return *this;
}
String& String::replace(const_iterator first, const_iterator last, std::initializer_list<uint8_t> ilist)
{
return replace(first, last, ilist.begin(), ilist.end());
}
String::size_type String::copy(uint8_t* dest, size_type count, size_type pos) const
{
if (pos > size())
{
throw std::out_of_range("string byte index out of bounds");
}
size_type copied = 0;
for (size_type i = 0; i < count && (i + pos) < size(); i++)
{
dest[i] = data_[i + pos];
copied += 1;
}
return copied;
}
String::size_type String::copy(char* dest, size_type count, size_type pos) const
{
if (pos > size())
{
throw std::out_of_range("string byte index out of bounds");
}
size_type copied = 0;
for (size_type i = 0; i < count && (i + pos) < size(); i++)
{
dest[i] = data_[i + pos];
copied += 1;
}
return copied;
}
void String::resize(size_type count)
{
if (count == 0)
{
data_.clear();
return;
}
data_.resize(count + 1);
data_[count] = 0;
}
void String::resize(size_type count, uint8_t ch)
{
if (count == 0)
{
data_.clear();
return;
}
data_.resize(count + 1, ch);
data_[count] = 0;
}
void String::swap(String& other) noexcept
{
std::swap(this->data_, other.data_);
}
String::size_type String::find(const String& str, size_type pos) const
{
return find(static_cast<std::string_view>(str), pos);
}
String::size_type String::find(std::string_view str, size_type pos) const
{
if (size() == 0)
{
return npos;
}
for (size_type i = pos; i < size(); i++)
{
bool found = true;
for (size_t j = 0; j < str.size() && found; j++)
{
if (i + j >= size() || data_[i + j] != str[j])
{
found = false;
}
}
if (found)
{
return i;
}
}
return npos;
}
String::size_type String::find(const char* s, size_type pos, size_t count) const
{
return find(std::string_view(s, count), pos);
}
String::size_type String::find(const char* s, size_type pos) const
{
size_t len = std::strlen(s);
return find(std::string_view(s, len), pos);
}
String::size_type String::find(uint8_t ch, size_type pos) const
{
for (size_type i = pos; i < size(); i++)
{
if (data_[i] == ch)
{
return i;
}
}
return npos;
}
String::size_type String::rfind(const String& str, size_type pos) const
{
return rfind(static_cast<std::string_view>(str), pos);
}
String::size_type String::rfind(std::string_view str, size_type pos) const
{
if (str.empty())
{
return std::min(pos, size());
}
if (size() == 0)
{
return npos;
}
for (size_type i = std::min(pos, size()); i >= 0; i--)
{
bool found = true;
for (size_t j = 0; j < str.size() && found; j++)
{
if (i + j >= size() || data_[i + j] != str[j])
{
found = false;
}
}
if (found)
{
return i;
}
}
return npos;
}
String::size_type String::rfind(const char* s, size_type pos, size_type count) const
{
return rfind(std::string_view(s, count), pos);
}
String::size_type String::rfind(const char* s, size_type pos) const
{
size_t len = std::strlen(s);
return rfind(std::string_view(s, len), pos);
}
String::size_type String::rfind(uint8_t ch, size_type pos) const
{
if (empty())
{
return npos;
}
for (size_type i = std::min(pos, size()); i >= 0; i--)
{
if (data_[i] == ch)
{
return i;
}
}
return npos;
}
int String::compare(std::string_view str) const noexcept
{
std::string_view self = *this;
return self.compare(str);
}
int String::compare(const char* s) const
{
std::string_view self = *this;
std::string_view that { s, std::strlen(s) };
return self.compare(that);
}
String String::substr(size_type pos, size_type count) const
{
String ret;
if (pos >= size())
{
throw std::out_of_range("string byte index invalid");
}
size_type start = pos;
size_type end = std::min(pos + count, size() - pos);
ret.reserve(end - start);
for (size_type i = start; i < end; i++)
{
ret.push_back(data_[i]);
}
return ret;
}
bool String::valid_utf8() const noexcept
{
for (auto itr = decode_begin(); itr != decode_end(); itr++)
{
if (!itr.valid())
{
return false;
}
}
return true;
}
Vector<uint16_t> String::to_utf16() const
{
return ::srb2::to_utf16(static_cast<std::string_view>(*this));
}
Vector<uint16_t> to_utf16(std::string_view utf8)
{
Vector<uint16_t> ret;
for (auto itr = Utf8Iter::begin(utf8); itr != Utf8Iter::end(utf8); itr++)
{
uint32_t codepoint = *itr;
if (codepoint < 0x10000)
{
ret.push_back(static_cast<uint16_t>(codepoint));
continue;
}
// high surrogate
ret.push_back(static_cast<uint16_t>(
(((codepoint - 0x10000) & 0b11111111110000000000) >> 10) + 0xD800
));
// low surrogate
ret.push_back(static_cast<uint16_t>(
(((codepoint - 0x10000) & 0b1111111111)) + 0xDC00
));
}
return ret;
}
Vector<uint32_t> to_utf32(std::string_view utf8)
{
Vector<uint32_t> ret;
for (auto itr = Utf8Iter::begin(utf8); itr != Utf8Iter::end(utf8); itr++)
{
ret.push_back(itr.codepoint());
}
return ret;
}
StaticVec<uint8_t, 4> to_utf8(uint32_t codepoint)
{
StaticVec<uint8_t, 4> enc;
if (codepoint < 0x80)
{
enc.push_back(static_cast<uint8_t>(codepoint));
}
else if (codepoint >= 0x80 && codepoint < 0x800)
{
enc.push_back(((codepoint >> 6) & 0b11111) + 0xC0);
enc.push_back((codepoint & 0b111111) + 0x80);
}
else if (codepoint >= 0x800 && codepoint < 0x10000)
{
enc.push_back(((codepoint >> 12) & 0b1111) + 0xE0);
enc.push_back(((codepoint >> 6) & 0b111111) + 0x80);
enc.push_back((codepoint & 0b111111) + 0x80);
}
else if (codepoint >= 0x10000 && codepoint < 0x110000)
{
enc.push_back(((codepoint >> 18) & 0b111) + 0xF0);
enc.push_back(((codepoint >> 12) & 0b111111) + 0x80);
enc.push_back(((codepoint >> 6) & 0b111111) + 0x80);
enc.push_back((codepoint & 0b111111) + 0x80);
}
else
{
// replacement char due to invalid codepoint
enc = to_utf8(0xFFFD);
}
return enc;
}
String to_utf8(std::u32string_view utf32view)
{
return to_utf8(utf32view.begin(), utf32view.end());
}
String operator+(const String& lhs, const String& rhs)
{
String ret;
ret.append(lhs);
ret.append(rhs);
return ret;
}
String operator+(const String& lhs, const char* rhs)
{
String ret;
size_t len = std::strlen(rhs);
ret.append(lhs);
ret.append(std::string_view(rhs, len));
return ret;
}
String operator+(const String& lhs, uint8_t rhs)
{
String ret;
ret.append(lhs);
ret.push_back(rhs);
return ret;
}
String operator+(const String& lhs, std::string_view view)
{
String ret;
ret.append(lhs);
ret.append(view);
return ret;
}
bool operator==(const String& lhs, const String& rhs)
{
return lhs.compare(rhs) == 0;
}
bool operator==(const String& lhs, const char* rhs)
{
return lhs.compare(rhs) == 0;
}
// bool operator==(const String& lhs, std::string_view rhs)
// {
// return lhs.compare(rhs) == 0;
// }
bool operator!=(const String& lhs, const String& rhs)
{
return !(lhs == rhs);
}
bool operator!=(const String& lhs, const char* rhs)
{
return !(lhs == rhs);
}
// bool operator!=(const String& lhs, std::string_view rhs)
// {
// return !(lhs == rhs);
// }
bool operator<(const String& lhs, const String& rhs)
{
return lhs.compare(rhs) < 0;
}
bool operator<(const String& lhs, const char* rhs)
{
return lhs.compare(rhs) < 0;
}
// bool operator<(const String& lhs, std::string_view rhs)
// {
// return lhs.compare(rhs) < 0;
// }
bool operator<=(const String& lhs, const String& rhs)
{
return lhs.compare(rhs) <= 0;
}
bool operator<=(const String& lhs, const char* rhs)
{
return lhs.compare(rhs) <= 0;
}
// bool operator<=(const String& lhs, std::string_view rhs)
// {
// return lhs.compare(rhs) <= 0;
// }
bool operator>(const String& lhs, const String& rhs)
{
return lhs.compare(rhs) > 0;
}
bool operator>(const String& lhs, const char* rhs)
{
return lhs.compare(rhs) > 0;
}
// bool operator>(const String& lhs, std::string_view rhs)
// {
// return lhs.compare(rhs) > 0;
// }
bool operator>=(const String& lhs, const String& rhs)
{
return lhs.compare(rhs) >= 0;
}
bool operator>=(const String& lhs, const char* rhs)
{
return lhs.compare(rhs) >= 0;
}
// bool operator>=(const String& lhs, std::string_view rhs)
// {
// return lhs.compare(rhs) >= 0;
// }
static constexpr bool is_utf8_byte(uint8_t b)
{
return b != 0xC0 && b != 0xC1 && b < 0xF5;
}
static constexpr bool is_utf8_continuation(uint8_t b)
{
return b >= 0x80 && b < 0xC0;
}
uint32_t Utf8Iter::do_codepoint() const
{
uint8_t b[4];
uint8_t s;
bool v = true;
b[0] = s_[i_];
if (b[0] < 0x80) s = 1;
else if (b[0] >= 0x80 && b[0] < 0xC0)
{
// invalid, first byte continuation
s = 1;
v = false;
}
else if (b[0] >= 0xC0 && b[0] < 0xE0)
{
// 2 byte
if (s_.size() - i_ < 2)
{
// invalid, truncated
s = 1;
v = false;
goto decode;
}
b[1] = s_[i_ + 1];
if (!is_utf8_continuation(b[1]))
{
// invalid, not a continuation
s = 1;
v = false;
goto decode;
}
s = 2;
}
else if (b[0] >= 0xE0 && b[0] < 0xF0)
{
// 3 byte
if (s_.size() - i_ < 2)
{
// invalid, truncated
s = 1;
v = false;
goto decode;
}
if (s_.size() - i_ < 3)
{
// invalid, truncated
s = 2;
v = false;
goto decode;
}
b[1] = s_[i_ + 1];
b[2] = s_[i_ + 2];
if (!is_utf8_continuation(b[1]))
{
// invalid, not a continuation
s = 1;
v = false;
goto decode;
}
if (!is_utf8_continuation(b[2]))
{
// invalid, not a continuation
s = 2;
v = false;
goto decode;
}
s = 3;
}
else if (b[0] >= 0xF0 && b[0] < 0xF5)
{
// 4 byte
if (s_.size() - i_ < 2)
{
// invalid, truncated
s = 1;
v = false;
goto decode;
}
if (s_.size() - i_ < 3)
{
// invalid, truncated
s = 2;
v = false;
goto decode;
}
if (s_.size() - i_ < 4)
{
// invalid, truncated
s = 3;
v = false;
goto decode;
}
b[1] = s_[i_ + 1];
b[2] = s_[i_ + 2];
b[3] = s_[i_ + 3];
if (!is_utf8_continuation(b[1]))
{
// invalid, not a continuation
s = 1;
v = false;
goto decode;
}
if (!is_utf8_continuation(b[2]))
{
// invalid, not a continuation
s = 2;
v = false;
goto decode;
}
if (!is_utf8_continuation(b[3]))
{
// invalid, not a continuation
s = 3;
v = false;
goto decode;
}
s = 4;
}
else
{
// invalid
s = 1;
v = false;
}
decode:
// bit 29 indicates unparseable (immediately invalid, replacement char U+FFFD)
// bit 30-31 indicates byte size (0-3)
if (v == false) return 0xFFFD + ((s - 1) << 30) + (1 << 29);
switch (s)
{
default:
case 1: return b[0] & 0x7f;
case 2: return (b[1] & 0x3f) + ((b[0] & 0x1f) << 6) + (1 << 30);
case 3: return (b[2] & 0x3f) + ((b[1] & 0x3f) << 6) + ((b[0] & 0x0f) << 12) + (2 << 30);
case 4: return (b[3] & 0x3f) + ((b[2] & 0x3f) << 6) + ((b[1] & 0x3f) << 12) + ((b[2] & 0x7) << 18) + (3 << 30);
}
}
uint32_t Utf8Iter::codepoint() const
{
uint32_t c = do_codepoint();
uint32_t ret = c & 0x001fffff;
uint8_t s = c >> 30;
// overlong encodings are still invalid and should be replaced,
// even if bit 29 is unset
switch (s)
{
default:
case 0: return ret >= (2 << 8) ? 0xFFFD : ret;
case 1: return ret >= (2 << 12) ? 0xFFFD : ret;
case 2: return ret >= (2 << 17) ? 0xFFFD : ret;
case 3: return ret;
}
}
bool Utf8Iter::valid() const
{
uint32_t c = do_codepoint();
uint32_t ret = c & 0x001fffff;
if ((c >> 29) & 1) return false;
uint8_t s = c >> 30;
switch (s)
{
default:
case 0: return ret >= (2 << 8) ? false : true;
case 1: return ret >= (2 << 12) ? false : true;
case 2: return ret >= (2 << 17) ? false : true;
case 3: return true;
}
}
uint8_t Utf8Iter::size() const
{
uint32_t c = do_codepoint();
uint8_t s = (c >> 30);
return s + 1;
}
static constexpr bool utf16_is_low_surrogate(uint16_t word)
{
return word >= 0xDC00 && word < 0xDFFF;
}
static constexpr bool utf16_is_high_surrogate(uint16_t word)
{
return word >= 0xD800 && word < 0xDBFF;
}
static constexpr bool utf16_is_surrogate(uint16_t word)
{
return utf16_is_high_surrogate(word) || utf16_is_low_surrogate(word);
}
uint32_t Utf16Iter::do_codepoint() const
{
uint16_t words[2];
words[0] = s_[i_];
if (!utf16_is_high_surrogate(words[0]))
{
// unpaired low surrogates allowed as-is for windows compatibility
return words[0];
}
if (s_.size() - i_ < 2)
{
// unpaired high surrogates allowed as-is for windows compatibility
return words[0];
}
words[1] = s_[i_ + 1];
return ((words[1] - 0xDC00) & 0x3FF)
+ ((words[0] - 0xD800) & 0x3FF)
+ 0x10000;
}
uint32_t Utf16Iter::codepoint() const
{
uint32_t c = do_codepoint();
uint32_t ret = c & 0x001fffff;
return ret;
}
uint8_t Utf16Iter::size() const
{
uint32_t c = do_codepoint() & 0x001fffff;
return c >= 0x10000 ? 2 : 1;
}
// fmtlib
String vformat(fmt::string_view fmt, fmt::format_args args)
{
auto buf = fmt::memory_buffer();
vformat_to(buf, fmt, args);
return { buf.data(), buf.size() };
}
} // namespace srb2
size_t std::hash<srb2::String>::operator()(const srb2::String& v)
{
std::string_view str = v;
return std::hash<std::string_view>()(str);
}
// C functions
int Str_IsValidUTF8(const char* str)
{
size_t len = std::strlen(str);
if (len == 0)
{
return 1;
}
for (auto itr = srb2::Utf8Iter::begin(str); itr != srb2::Utf8Iter::end(str + len - 1); ++itr)
{
if (!itr.valid())
{
return false;
}
}
return true;
}
uint32_t Str_NextCodepointFromUTF8(const char** itr)
{
auto i = srb2::Utf8Iter::begin(*itr);
uint32_t ret = i.codepoint();
uint8_t s = i.size();
*itr += s;
return ret;
}