// DR. ROBOTNIK'S RING RACERS //----------------------------------------------------------------------------- // Copyright (C) 2025 by Ronald "Eidolon" Kinard // Copyright (C) 2025 by Kart Krew // // This program is free software distributed under the // terms of the GNU General Public License, version 2. // See the 'LICENSE' file for more details. //----------------------------------------------------------------------------- #include "string.h" #include "fmt/format.h" #include #include #include #include #include namespace srb2 { String::String(const String&) = default; String::String(String&&) noexcept = default; String::~String() = default; String& String::operator=(const String&) = default; String& String::operator=(String&&) noexcept = default; String::String(const char* rhs) : String(std::string_view { rhs }) {} String::String(const char* rhs, size_t len) : String(std::string_view { rhs, len }) {} String::String(const std::string& rhs) : String(std::string_view { rhs }) {} String::String(std::string_view view) : String() { append(view); } String::operator std::string() const { std::string_view view = *this; return std::string(view); } String::operator std::string_view() const { return std::string_view((const char*)data(), size()); } uint32_t String::size() const noexcept { if (data_.empty()) { return 0; } return data_.size() - 1; } static const char* kEmptyString = ""; const char* String::c_str() const { if (data_.empty()) { return kEmptyString; } return reinterpret_cast(data_.data()); } void String::reserve(size_type capacity) { if (capacity == 0) { data_.reserve(0); return; } data_.reserve(capacity + 1); } uint8_t* String::begin() noexcept { if (data_.empty()) { return nullptr; } return data(); } uint8_t* String::end() noexcept { if (data_.empty()) { return nullptr; } return data() + size(); } const uint8_t* String::cbegin() const noexcept { if (data_.empty()) { return nullptr; } return data(); } const uint8_t* String::cend() const noexcept { if (data_.empty()) { return nullptr; } return data() + size(); } uint8_t& String::at(size_type i) { if (i >= size()) { throw std::out_of_range("string byte index out of bounds"); } return data_.at(i); } const uint8_t& String::at(size_type i) const { if (i >= size()) { throw std::out_of_range("string byte index out of bounds"); } return data_.at(i); } String& String::insert(size_type index, size_type count, uint8_t ch) { if (index > size()) { throw std::out_of_range("string byte index out of bounds"); } data_.insert(data_.begin() + index, count, ch); return *this; } String& String::insert(size_type index, const char* s) { return insert(index, s, (size_type)std::strlen(s)); } String& String::insert(size_type index, const char* s, size_type count) { if (index > size()) { throw std::out_of_range("string byte index out of bounds"); } if (!empty()) { // remove null byte data_.pop_back(); } data_.insert(data_.begin() + index, s, s + count); if (data_.size() > 0) { data_.push_back(0); } return *this; } String& String::insert(size_type index, std::string_view str) { return insert(index, str.begin(), (size_type)str.size()); } String& String::insert(size_type index, std::string_view str, size_t s_index, size_t count) { if (s_index > str.size()) { throw std::out_of_range("s_index > str.size()"); } return insert(index, str.substr(s_index, std::max(str.size() - s_index, count))); } String::iterator String::insert(const_iterator pos, uint8_t ch) { if (pos < cbegin() || pos > cend()) { throw std::out_of_range("insert iterator out of bounds"); } return data_.insert(pos, ch); } String::iterator String::insert(const_iterator pos, size_type count, uint8_t ch) { if (pos < cbegin() || pos > cend()) { throw std::out_of_range("insert iterator out of bounds"); } if (!empty()) { data_.pop_back(); } for (size_type i = 0; i < count; i++) { data_.insert(pos, ch); } if (data_.size() > 0) { data_.push_back(0); } return const_cast(pos); } String& String::erase(size_type index, size_type count) { if (index + count >= size()) { throw std::out_of_range("string byte index out of bounds"); } const_iterator first = begin() + index; const_iterator last = first + count; data_.erase(first, last); if (data_.size() == 1) { data_.pop_back(); } return *this; } String::iterator String::erase(const_iterator position) { return data_.erase(position); } String::iterator String::erase(const_iterator first, const_iterator last) { return data_.erase(first, last); } void String::push_back(uint8_t v) { if (data_.empty()) { data_.push_back(v); data_.push_back(0); return; } data_[data_.size() - 1] = v; data_.push_back(0); } void String::pop_back() { data_.pop_back(); if (data_.size() == 1) { data_.pop_back(); } else { data_[data_.size() - 1] = 0; } } String& String::append(size_type count, uint8_t ch) { if (count == 0) { return *this; } if (!data_.empty()) { data_.pop_back(); } for (size_type i = 0; i < count; i++) { data_.push_back(ch); } data_.push_back(0); return *this; } String& String::append(const char* s, size_type count) { insert(size(), s, count); return *this; } String& String::append(const char* s) { insert(size(), s); return *this; } String& String::append(std::string_view str) { insert(size(), str); return *this; } String& String::append(std::string_view str, size_type pos, size_type count) { insert(size(), str, pos, count); return *this; } String& String::operator+=(std::string_view r) { insert(size(), r); return *this; } String& String::operator+=(const char* r) { insert(size(), r); return *this; } String& String::operator+=(uint8_t r) { push_back(r); return *this; } String& String::operator+=(std::initializer_list r) { append(r.begin(), r.end()); return *this; } String& String::replace(size_type pos, size_type count, std::string_view str) { return replace(pos, count, str, 0, str.size()); } String& String::replace(const_iterator first, const_iterator last, std::string_view str) { if (first < begin() || last > end() || first + str.size() > end()) { throw std::out_of_range("string replacement range out of bounds"); } size_type index = first - data_.data(); size_type count = last - first; return replace(index, count, str); } String& String::replace(size_type pos, size_type count, std::string_view str, size_t pos2, size_t count2) { if (pos >= size()) { throw std::out_of_range("string replacement range out of bounds"); } if (pos2 >= str.size()) { throw std::out_of_range("string replacement string_view range out of bounds"); } erase(pos, count); insert(pos, str, pos2, count2); return *this; } String& String::replace(size_type pos, size_type count, const char* cstr, size_type count2) { size_t len = std::strlen(cstr); return replace(pos, count, std::string_view(cstr, len), count2); } String& String::replace(const_iterator first, const_iterator last, const char* cstr, size_type count2) { size_type index = first - data_.data(); size_type count = last - first; return replace(index, count, cstr, count2); } String& String::replace(size_type pos, size_type count, const char* cstr) { size_t len = std::strlen(cstr); return replace(pos, count, std::string_view(cstr, len)); } String& String::replace(const_iterator first, const_iterator last, const char* cstr) { size_type index = first - data_.data(); size_type count = last - first; return replace(index, count, cstr); } String& String::replace(const_iterator first, const_iterator last, uint8_t ch) { if (first < begin() || last > end()) { throw std::out_of_range("string iterators out of range"); } for (; first != last; first++) { *const_cast(first) = ch; } return *this; } String& String::replace(const_iterator first, const_iterator last, std::initializer_list ilist) { return replace(first, last, ilist.begin(), ilist.end()); } String::size_type String::copy(uint8_t* dest, size_type count, size_type pos) const { if (pos > size()) { throw std::out_of_range("string byte index out of bounds"); } size_type copied = 0; for (size_type i = 0; i < count && (i + pos) < size(); i++) { dest[i] = data_[i + pos]; copied += 1; } return copied; } String::size_type String::copy(char* dest, size_type count, size_type pos) const { if (pos > size()) { throw std::out_of_range("string byte index out of bounds"); } size_type copied = 0; for (size_type i = 0; i < count && (i + pos) < size(); i++) { dest[i] = data_[i + pos]; copied += 1; } return copied; } void String::resize(size_type count) { if (count == 0) { data_.clear(); return; } data_.resize(count + 1); data_[count] = 0; } void String::resize(size_type count, uint8_t ch) { if (count == 0) { data_.clear(); return; } data_.resize(count + 1, ch); data_[count] = 0; } void String::swap(String& other) noexcept { std::swap(this->data_, other.data_); } String::size_type String::find(const String& str, size_type pos) const { return find(static_cast(str), pos); } String::size_type String::find(std::string_view str, size_type pos) const { if (size() == 0) { return npos; } for (size_type i = pos; i < size(); i++) { bool found = true; for (size_t j = 0; j < str.size() && found; j++) { if (i + j >= size() || data_[i + j] != str[j]) { found = false; } } if (found) { return i; } } return npos; } String::size_type String::find(const char* s, size_type pos, size_t count) const { return find(std::string_view(s, count), pos); } String::size_type String::find(const char* s, size_type pos) const { size_t len = std::strlen(s); return find(std::string_view(s, len), pos); } String::size_type String::find(uint8_t ch, size_type pos) const { for (size_type i = pos; i < size(); i++) { if (data_[i] == ch) { return i; } } return npos; } String::size_type String::rfind(const String& str, size_type pos) const { return rfind(static_cast(str), pos); } String::size_type String::rfind(std::string_view str, size_type pos) const { if (str.empty()) { return std::min(pos, size()); } if (size() == 0) { return npos; } for (size_type i = std::min(pos, size()); i >= 0; i--) { bool found = true; for (size_t j = 0; j < str.size() && found; j++) { if (i + j >= size() || data_[i + j] != str[j]) { found = false; } } if (found) { return i; } } return npos; } String::size_type String::rfind(const char* s, size_type pos, size_type count) const { return rfind(std::string_view(s, count), pos); } String::size_type String::rfind(const char* s, size_type pos) const { size_t len = std::strlen(s); return rfind(std::string_view(s, len), pos); } String::size_type String::rfind(uint8_t ch, size_type pos) const { if (empty()) { return npos; } for (size_type i = std::min(pos, size()); i >= 0; i--) { if (data_[i] == ch) { return i; } } return npos; } int String::compare(std::string_view str) const noexcept { std::string_view self = *this; return self.compare(str); } int String::compare(const char* s) const { std::string_view self = *this; std::string_view that { s, std::strlen(s) }; return self.compare(that); } String String::substr(size_type pos, size_type count) const { String ret; if (pos >= size()) { throw std::out_of_range("string byte index invalid"); } size_type start = pos; size_type end = std::min(pos + count, size() - pos); ret.reserve(end - start); for (size_type i = start; i < end; i++) { ret.push_back(data_[i]); } return ret; } bool String::valid_utf8() const noexcept { for (auto itr = decode_begin(); itr != decode_end(); itr++) { if (!itr.valid()) { return false; } } return true; } Vector String::to_utf16() const { return ::srb2::to_utf16(static_cast(*this)); } Vector to_utf16(std::string_view utf8) { Vector ret; for (auto itr = Utf8Iter::begin(utf8); itr != Utf8Iter::end(utf8); itr++) { uint32_t codepoint = *itr; if (codepoint < 0x10000) { ret.push_back(static_cast(codepoint)); continue; } // high surrogate ret.push_back(static_cast( (((codepoint - 0x10000) & 0b11111111110000000000) >> 10) + 0xD800 )); // low surrogate ret.push_back(static_cast( (((codepoint - 0x10000) & 0b1111111111)) + 0xDC00 )); } return ret; } Vector to_utf32(std::string_view utf8) { Vector ret; for (auto itr = Utf8Iter::begin(utf8); itr != Utf8Iter::end(utf8); itr++) { ret.push_back(itr.codepoint()); } return ret; } StaticVec to_utf8(uint32_t codepoint) { StaticVec enc; if (codepoint < 0x80) { enc.push_back(static_cast(codepoint)); } else if (codepoint >= 0x80 && codepoint < 0x800) { enc.push_back(((codepoint >> 6) & 0b11111) + 0xC0); enc.push_back((codepoint & 0b111111) + 0x80); } else if (codepoint >= 0x800 && codepoint < 0x10000) { enc.push_back(((codepoint >> 12) & 0b1111) + 0xE0); enc.push_back(((codepoint >> 6) & 0b111111) + 0x80); enc.push_back((codepoint & 0b111111) + 0x80); } else if (codepoint >= 0x10000 && codepoint < 0x110000) { enc.push_back(((codepoint >> 18) & 0b111) + 0xF0); enc.push_back(((codepoint >> 12) & 0b111111) + 0x80); enc.push_back(((codepoint >> 6) & 0b111111) + 0x80); enc.push_back((codepoint & 0b111111) + 0x80); } else { // replacement char due to invalid codepoint enc = to_utf8(0xFFFD); } return enc; } String to_utf8(std::u32string_view utf32view) { return to_utf8(utf32view.begin(), utf32view.end()); } String operator+(const String& lhs, const String& rhs) { String ret; ret.append(lhs); ret.append(rhs); return ret; } String operator+(const String& lhs, const char* rhs) { String ret; size_t len = std::strlen(rhs); ret.append(lhs); ret.append(std::string_view(rhs, len)); return ret; } String operator+(const String& lhs, uint8_t rhs) { String ret; ret.append(lhs); ret.push_back(rhs); return ret; } String operator+(const String& lhs, std::string_view view) { String ret; ret.append(lhs); ret.append(view); return ret; } bool operator==(const String& lhs, const String& rhs) { return lhs.compare(rhs) == 0; } bool operator==(const String& lhs, const char* rhs) { return lhs.compare(rhs) == 0; } // bool operator==(const String& lhs, std::string_view rhs) // { // return lhs.compare(rhs) == 0; // } bool operator!=(const String& lhs, const String& rhs) { return !(lhs == rhs); } bool operator!=(const String& lhs, const char* rhs) { return !(lhs == rhs); } // bool operator!=(const String& lhs, std::string_view rhs) // { // return !(lhs == rhs); // } bool operator<(const String& lhs, const String& rhs) { return lhs.compare(rhs) < 0; } bool operator<(const String& lhs, const char* rhs) { return lhs.compare(rhs) < 0; } // bool operator<(const String& lhs, std::string_view rhs) // { // return lhs.compare(rhs) < 0; // } bool operator<=(const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; } bool operator<=(const String& lhs, const char* rhs) { return lhs.compare(rhs) <= 0; } // bool operator<=(const String& lhs, std::string_view rhs) // { // return lhs.compare(rhs) <= 0; // } bool operator>(const String& lhs, const String& rhs) { return lhs.compare(rhs) > 0; } bool operator>(const String& lhs, const char* rhs) { return lhs.compare(rhs) > 0; } // bool operator>(const String& lhs, std::string_view rhs) // { // return lhs.compare(rhs) > 0; // } bool operator>=(const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; } bool operator>=(const String& lhs, const char* rhs) { return lhs.compare(rhs) >= 0; } // bool operator>=(const String& lhs, std::string_view rhs) // { // return lhs.compare(rhs) >= 0; // } static constexpr bool is_utf8_byte(uint8_t b) { return b != 0xC0 && b != 0xC1 && b < 0xF5; } static constexpr bool is_utf8_continuation(uint8_t b) { return b >= 0x80 && b < 0xC0; } uint32_t Utf8Iter::do_codepoint() const { uint8_t b[4]; uint8_t s; bool v = true; b[0] = s_[i_]; if (b[0] < 0x80) s = 1; else if (b[0] >= 0x80 && b[0] < 0xC0) { // invalid, first byte continuation s = 1; v = false; } else if (b[0] >= 0xC0 && b[0] < 0xE0) { // 2 byte if (s_.size() - i_ < 2) { // invalid, truncated s = 1; v = false; goto decode; } b[1] = s_[i_ + 1]; if (!is_utf8_continuation(b[1])) { // invalid, not a continuation s = 1; v = false; goto decode; } s = 2; } else if (b[0] >= 0xE0 && b[0] < 0xF0) { // 3 byte if (s_.size() - i_ < 2) { // invalid, truncated s = 1; v = false; goto decode; } if (s_.size() - i_ < 3) { // invalid, truncated s = 2; v = false; goto decode; } b[1] = s_[i_ + 1]; b[2] = s_[i_ + 2]; if (!is_utf8_continuation(b[1])) { // invalid, not a continuation s = 1; v = false; goto decode; } if (!is_utf8_continuation(b[2])) { // invalid, not a continuation s = 2; v = false; goto decode; } s = 3; } else if (b[0] >= 0xF0 && b[0] < 0xF5) { // 4 byte if (s_.size() - i_ < 2) { // invalid, truncated s = 1; v = false; goto decode; } if (s_.size() - i_ < 3) { // invalid, truncated s = 2; v = false; goto decode; } if (s_.size() - i_ < 4) { // invalid, truncated s = 3; v = false; goto decode; } b[1] = s_[i_ + 1]; b[2] = s_[i_ + 2]; b[3] = s_[i_ + 3]; if (!is_utf8_continuation(b[1])) { // invalid, not a continuation s = 1; v = false; goto decode; } if (!is_utf8_continuation(b[2])) { // invalid, not a continuation s = 2; v = false; goto decode; } if (!is_utf8_continuation(b[3])) { // invalid, not a continuation s = 3; v = false; goto decode; } s = 4; } else { // invalid s = 1; v = false; } decode: // bit 29 indicates unparseable (immediately invalid, replacement char U+FFFD) // bit 30-31 indicates byte size (0-3) if (v == false) return 0xFFFD + ((s - 1) << 30) + (1 << 29); switch (s) { default: case 1: return b[0] & 0x7f; case 2: return (b[1] & 0x3f) + ((b[0] & 0x1f) << 6) + (1 << 30); case 3: return (b[2] & 0x3f) + ((b[1] & 0x3f) << 6) + ((b[0] & 0x0f) << 12) + (2 << 30); case 4: return (b[3] & 0x3f) + ((b[2] & 0x3f) << 6) + ((b[1] & 0x3f) << 12) + ((b[2] & 0x7) << 18) + (3 << 30); } } uint32_t Utf8Iter::codepoint() const { uint32_t c = do_codepoint(); uint32_t ret = c & 0x001fffff; uint8_t s = c >> 30; // overlong encodings are still invalid and should be replaced, // even if bit 29 is unset switch (s) { default: case 0: return ret >= (2 << 8) ? 0xFFFD : ret; case 1: return ret >= (2 << 12) ? 0xFFFD : ret; case 2: return ret >= (2 << 17) ? 0xFFFD : ret; case 3: return ret; } } bool Utf8Iter::valid() const { uint32_t c = do_codepoint(); uint32_t ret = c & 0x001fffff; if ((c >> 29) & 1) return false; uint8_t s = c >> 30; switch (s) { default: case 0: return ret >= (2 << 8) ? false : true; case 1: return ret >= (2 << 12) ? false : true; case 2: return ret >= (2 << 17) ? false : true; case 3: return true; } } uint8_t Utf8Iter::size() const { uint32_t c = do_codepoint(); uint8_t s = (c >> 30); return s + 1; } static constexpr bool utf16_is_low_surrogate(uint16_t word) { return word >= 0xDC00 && word < 0xDFFF; } static constexpr bool utf16_is_high_surrogate(uint16_t word) { return word >= 0xD800 && word < 0xDBFF; } static constexpr bool utf16_is_surrogate(uint16_t word) { return utf16_is_high_surrogate(word) || utf16_is_low_surrogate(word); } uint32_t Utf16Iter::do_codepoint() const { uint16_t words[2]; words[0] = s_[i_]; if (!utf16_is_high_surrogate(words[0])) { // unpaired low surrogates allowed as-is for windows compatibility return words[0]; } if (s_.size() - i_ < 2) { // unpaired high surrogates allowed as-is for windows compatibility return words[0]; } words[1] = s_[i_ + 1]; return ((words[1] - 0xDC00) & 0x3FF) + ((words[0] - 0xD800) & 0x3FF) + 0x10000; } uint32_t Utf16Iter::codepoint() const { uint32_t c = do_codepoint(); uint32_t ret = c & 0x001fffff; return ret; } uint8_t Utf16Iter::size() const { uint32_t c = do_codepoint() & 0x001fffff; return c >= 0x10000 ? 2 : 1; } // fmtlib String vformat(fmt::string_view fmt, fmt::format_args args) { auto buf = fmt::memory_buffer(); vformat_to(buf, fmt, args); return { buf.data(), buf.size() }; } } // namespace srb2 size_t std::hash::operator()(const srb2::String& v) { std::string_view str = v; return std::hash()(str); } // C functions int Str_IsValidUTF8(const char* str) { size_t len = std::strlen(str); if (len == 0) { return 1; } for (auto itr = srb2::Utf8Iter::begin(str); itr != srb2::Utf8Iter::end(str + len - 1); ++itr) { if (!itr.valid()) { return false; } } return true; } uint32_t Str_NextCodepointFromUTF8(const char** itr) { auto i = srb2::Utf8Iter::begin(*itr); uint32_t ret = i.codepoint(); uint8_t s = i.size(); *itr += s; return ret; }