30 constexpr Rune()
noexcept;
56 constexpr
Rune& operator=(const
Rune&) = default;
57 constexpr
Rune& operator=(
Rune&&) noexcept = default;
66 [[nodiscard]] constexpr
char32_t codepoint() const noexcept {
return codepoint_; }
84 [[nodiscard]] constexpr
unicode::Script
script() const noexcept;
90 [[nodiscard]] const
std::
string&
phoneme() const noexcept {
return phoneme_; }
112 [[nodiscard]]
constexpr bool is_vowel() const noexcept;
118 [[nodiscard]] constexpr
bool is_consonant() const noexcept;
124 [[nodiscard]] constexpr
bool is_letter() const noexcept;
130 [[nodiscard]] constexpr
bool is_digit() const noexcept;
150 [[nodiscard]] constexpr
bool is_ascii() const noexcept;
156 [[nodiscard]] constexpr
bool is_latin() const noexcept;
162 [[nodiscard]] constexpr
bool is_hangul() const noexcept;
168 [[nodiscard]] constexpr
bool is_hiragana() const noexcept;
174 [[nodiscard]] constexpr
bool is_katakana() const noexcept;
180 [[nodiscard]] constexpr
bool is_kanji() const noexcept;
186 [[nodiscard]] constexpr
bool is_emoji() const noexcept;
206 [[nodiscard]] constexpr
std::u32string
to_utf32() const;
240 [[nodiscard]] constexpr auto operator<=>(const
Rune& other) const noexcept = default;
245 [[nodiscard]] constexpr
bool operator==(const
Rune& other) const noexcept = default;
250 std::
string phoneme_;
257 static constexpr
language::Code detect_language(
char32_t cp) noexcept;
269constexpr
Rune operator
""_rune(
char32_t cp) noexcept {
274inline constexpr Rune::Rune() noexcept : codepoint_(U
'\0'), language_(
language::Code::Unknown), phoneme_() {}
285inline constexpr language::Code Rune::detect_language(
char32_t cp)
noexcept {
287 if ((cp >= 0x1100 && cp <= 0x11FF) ||
288 (cp >= 0x3130 && cp <= 0x318F) ||
289 (cp >= 0xAC00 && cp <= 0xD7AF)) {
294 if ((cp >= 0x3040 && cp <= 0x309F) ||
295 (cp >= 0x30A0 && cp <= 0x30FF) ||
296 (cp >= 0x31F0 && cp <= 0x31FF)) {
301 if ((cp >= 0x4E00 && cp <= 0x9FFF) ||
302 (cp >= 0x3400 && cp <= 0x4DBF) ||
303 (cp >= 0x20000 && cp <= 0x2A6DF)) {
308 if ((cp <= 0x024F) ||
309 (cp >= 0x1E00 && cp <= 0x1EFF)) {
316 char32_t c = codepoint_;
319 if (c == U
'a' || c == U
'e' || c == U
'i' || c == U
'o' || c == U
'u' ||
320 c == U
'A' || c == U
'E' || c == U
'I' || c == U
'O' || c == U
'U') {
325 if (c >= U
'ㅏ' && c <= U
'ㅣ') {
330 if (c == U
'あ' || c == U
'い' || c == U
'う' || c == U
'え' || c == U
'お') {
335 if (c == U
'ア' || c == U
'イ' || c == U
'ウ' || c == U
'エ' || c == U
'オ') {
Represents a single textual unit with Unicode and linguistic properties.
constexpr bool is_emoji() const noexcept
Check if this is an emoji character.
constexpr Rune(const Rune &)=default
constexpr bool is_kanji() const noexcept
Check if this is a Kanji character.
constexpr bool is_hangul() const noexcept
Check if this is a Hangul (Korean) character.
constexpr bool is_letter() const noexcept
Check if this rune is a letter.
constexpr Rune(Rune &&) noexcept=default
static Rune from_utf8(std::string_view utf8_char)
Create a Rune from a UTF-8 character.
constexpr Rune() noexcept
Default constructor (creates null character)
constexpr bool is_consonant() const noexcept
Check if this rune represents a consonant.
constexpr bool is_whitespace() const noexcept
Check if this rune is whitespace.
constexpr bool is_katakana() const noexcept
Check if this is a Katakana character.
constexpr bool is_hiragana() const noexcept
Check if this is a Hiragana character.
constexpr language::Code language() const noexcept
Get the language code.
constexpr bool is_latin() const noexcept
Check if this is a Latin script character.
constexpr unicode::Category category() const noexcept
Get the Unicode category.
void set_phoneme(std::string phoneme)
Set the phonetic representation.
const std::string & phoneme() const noexcept
Get the phonetic representation.
constexpr bool is_ascii() const noexcept
Check if this is an ASCII character.
constexpr void set_language(language::Code lang) noexcept
Set the language code.
std::string to_utf8() const
Convert to UTF-8 string.
constexpr bool is_punctuation() const noexcept
Check if this rune is punctuation.
static Rune from_utf16(std::u16string_view utf16_char)
Create a Rune from a UTF-16 character.
constexpr unicode::Script script() const noexcept
Get the Unicode script.
constexpr bool is_digit() const noexcept
Check if this rune is a digit.
constexpr std::u32string to_utf32() const
Convert to UTF-32 string.
std::u16string to_utf16() const
Convert to UTF-16 string.
constexpr bool is_vowel() const noexcept
Check if this rune represents a vowel.
constexpr char32_t codepoint() const noexcept
Get the Unicode codepoint.
Language identification and localization support.
Language detection and identification functionality.
Code
Enumeration of supported language codes.
@ Chinese
中文 (zh-CN) - Chinese (Simplified)
@ English
English (en-US) - English.
@ Unknown
Unknown or undetected language.
@ Korean
한국어 (ko-KR) - Korean
@ Japanese
日本語 (ja-JP) - Japanese
constexpr bool is_whitespace(char32_t cp) noexcept
constexpr bool is_letter(char32_t cp) noexcept
constexpr bool is_punctuation(char32_t cp) noexcept
constexpr bool is_digit(char32_t cp) noexcept