5#include <unordered_map>
11LanguageDetector::LanguageDetector() =
default;
18 auto detected_language = detect_language_from_script(input);
22 result.set_language_hint(detected_language);
28 return "Detects the primary language of text based on script analysis";
36 std::unordered_map<unicode::Script, int> script_counts;
39 for (
const auto& rune : text) {
40 auto script = rune.script();
41 script_counts[script]++;
45 auto most_common = std::max_element(
46 script_counts.begin(),
48 [](
const auto& a,
const auto& b) {
49 return a.second < b.second;
53 if (most_common == script_counts.end()) {
58 switch (most_common->first) {
80 case unicode::Script::Devanagari:
std::string description() const override
Get the spell's description.
RuneSequence operator()(const RuneSequence &input) const override
Apply language detection to the input sequence.
Code
Enumeration of supported language codes.
@ Arabic
العربية (ar) - Arabic
@ Hindi
हिन्दी (hi) - Hindi
@ English
English (en-US) - English.
@ Unknown
Unknown or undetected language.
@ Korean
한국어 (ko-KR) - Korean
@ Russian
Русский (ru) - Russian.
@ Japanese
日本語 (ja-JP) - Japanese
RuneString RuneSequence
Backward compatibility alias for RuneString.