12 , primary_language_(primary_lang)
33 , primary_language_(
language::Code::Unknown)
36 if (!runes_.empty()) {
37 std::map<language::Code, size_t> language_counts;
38 for (const auto& rune : runes_) {
39 if (rune.language() != language::Code::Unknown) {
40 ++language_counts[rune.language()];
44 if (!language_counts.empty()) {
45 primary_language_ = std::max_element(language_counts.begin(), language_counts.end(),
46 [](const auto& a, const auto& b) { return a.second < b.second; })->first;
55 result.reserve(
size() * 2);
57 for (
const auto& rune : runes_) {
58 result += rune.to_utf8();
65 std::u16string result;
66 result.reserve(
size() * 2);
68 for (
const auto& rune : runes_) {
69 auto utf16_char = rune.to_utf16();
77 std::u32string result;
78 result.reserve(
size());
80 for (
const auto& rune : runes_) {
81 result.push_back(rune.codepoint());
90 runes_.insert(runes_.end(), other.runes_.begin(), other.runes_.end());
95 runes_.push_back(rune);
105 if (start >=
size()) {
112 result.runes_.assign(runes_.begin() + start, runes_.begin() + start + actual_length);
124 auto it = std::find(runes_.begin() + pos, runes_.end(), rune);
125 return (it != runes_.end()) ? std::distance(runes_.begin(), it) :
npos;
133 if (pos >=
size() || str.size() >
size() - pos) {
137 auto it = std::search(runes_.begin() + pos, runes_.end(),
138 str.runes_.begin(), str.runes_.end());
140 return (it != runes_.end()) ? std::distance(runes_.begin(), it) :
npos;
156 if (utf8_text.empty()) {
161 result.runes_.reserve(utf8_text.size());
163 std::map<language::Code, size_t> language_counts;
166 while (i < utf8_text.size()) {
168 size_t char_length = 1;
169 unsigned char first_byte =
static_cast<unsigned char>(utf8_text[i]);
171 if (first_byte <= 0x7F) {
173 }
else if ((first_byte & 0xE0) == 0xC0) {
175 }
else if ((first_byte & 0xF0) == 0xE0) {
177 }
else if ((first_byte & 0xF8) == 0xF0) {
185 if (i + char_length > utf8_text.size()) {
190 std::string_view char_view = utf8_text.substr(i, char_length);
192 result.runes_.push_back(rune);
198 }
catch (
const std::invalid_argument&) {
206 if (!language_counts.empty()) {
207 result.primary_language_ = std::max_element(language_counts.begin(), language_counts.end(),
208 [](
const auto& a,
const auto& b) { return a.second < b.second; })->first;
219 for (
auto& rune : result.runes_) {
221 rune.set_language(lang);
231 if (utf16_text.empty()) {
235 result.runes_.reserve(utf16_text.size());
236 std::map<language::Code, size_t> language_counts;
239 while (i < utf16_text.size()) {
241 char16_t first = utf16_text[i];
243 if (first >= 0xD800 && first <= 0xDBFF) {
245 if (i + 1 >= utf16_text.size()) {
248 std::u16string_view char_view = utf16_text.substr(i, 2);
250 result.runes_.push_back(rune);
257 }
else if (first >= 0xDC00 && first <= 0xDFFF) {
262 std::u16string_view char_view = utf16_text.substr(i, 1);
264 result.runes_.push_back(rune);
272 }
catch (
const std::invalid_argument&) {
279 if (!language_counts.empty()) {
280 result.primary_language_ = std::max_element(language_counts.begin(), language_counts.end(),
281 [](
const auto& a,
const auto& b) { return a.second < b.second; })->first;
290 if (utf32_text.empty()) {
294 result.runes_.reserve(utf32_text.size());
295 std::map<language::Code, size_t> language_counts;
297 for (
char32_t codepoint : utf32_text) {
299 Rune rune(codepoint);
300 result.runes_.push_back(rune);
305 }
catch (
const std::invalid_argument&) {
311 if (!language_counts.empty()) {
312 result.primary_language_ = std::max_element(language_counts.begin(), language_counts.end(),
313 [](
const auto& a,
const auto& b) { return a.second < b.second; })->first;
bool contains(const Rune &rune) const noexcept
Check if the string contains a Rune.
static RuneString from_utf16(std::u16string_view utf16_text)
Create a RuneString from UTF-16 text.
RuneString() noexcept=default
Default constructor.
static RuneString from_utf8(std::string_view utf8_text)
Create a RuneString from UTF-8 text.
static constexpr size_type npos
void set_primary_language(language::Code lang) noexcept
Set the primary language of the sequence.
size_type find(const Rune &rune, size_type pos=0) const noexcept
Find first occurrence of a Rune.
static RuneString from_utf32(std::u32string_view utf32_text)
Create a RuneString from UTF-32 text.
std::u16string to_utf16() const
Convert the sequence to UTF-16 string.
size_type size() const noexcept
RuneString & append(const RuneString &other)
Append another RuneString.
std::u32string to_utf32() const
Convert the sequence to UTF-32 string.
RuneString substr(size_type start, size_type length=npos) const
Create a substring.
std::string to_utf8() const
Convert the sequence to UTF-8 string.
size_type length() const noexcept
Represents a single textual unit with Unicode and linguistic properties.
static Rune from_utf8(std::string_view utf8_char)
Create a Rune from a UTF-8 character.
constexpr language::Code language() const noexcept
Get the language code.
static Rune from_utf16(std::u16string_view utf16_char)
Create a Rune from a UTF-16 character.
Language detection and identification functionality.
Code
Enumeration of supported language codes.
@ Unknown
Unknown or undetected language.