Rune Caster 1.0.0
Modern C++ Text Processing Framework
Loading...
Searching...
No Matches
spell_factory.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "spell_core.hpp"
4#include "spell_filter.hpp"
6#include "unicode.hpp"
7#include "spell_token.hpp"
8
9namespace rune_caster {
10namespace spell {
11namespace factory {
12
20
21// === Whitespace normalization factories ===
22
31inline auto whitespace() {
33}
34
42inline auto whitespace(bool collapse_multiple, bool trim_edges = true) {
43 return core::WhitespaceNormalizer{collapse_multiple, trim_edges};
44}
45
46// === Case conversion factories ===
47
55
63
71
72// === Unicode normalization factories ===
73
81
89
97
105
106// === Punctuation filter factories ===
107
112inline auto punctuation(bool remove = true) {
113 return filter::PunctuationFilter{remove};
114}
115
116// === Combined operation factories ===
117
127template<typename CaseType = void>
128auto text_normalizer(CaseType case_type = {}) {
129 if constexpr (std::is_void_v<CaseType>) {
130 return compose(whitespace(), unicode_nfc());
131 } else {
132 return compose(
134 core::CaseConverter{case_type}
135 );
136 }
137}
138
149inline auto search_preprocessor() {
150 return compose(
152 lowercase()
153 );
154}
155
166inline auto display_formatter() {
167 return compose(
169 titlecase()
170 );
171}
172
178template<bool RemovePunctuation = true>
180 if constexpr (RemovePunctuation) {
181 return compose(compose(whitespace(), unicode_nfc()), punctuation(true));
182 } else {
183 return compose(whitespace(), unicode_nfc());
184 }
185}
186
187// === Trim factory ===
188inline auto trim() { return core::TrimEdges{}; }
189
190// === Tokenizer factory ===
192
193} // namespace factory
194
195// Import factory functions into the main spell namespace for convenience
196using namespace factory;
197
198} // namespace spell
199} // namespace rune_caster
Case conversion spell using C++20 concepts.
@ Title
Convert to titlecase (first letter of each word)
Trim leading and trailing whitespace.
Unicode normalization spell using C++20 concepts.
Normalize whitespace characters using C++20 concepts.
Simple whitespace tokenizer Splits input RuneSequence into tokens separated by Unicode whitespace.
Remove punctuation characters using C++20 concepts.
auto uppercase()
Create an uppercase converter.
auto titlecase()
Create a titlecase converter.
auto display_formatter()
Create a display-optimized text formatter.
auto search_preprocessor()
Create a search-optimized text preprocessor.
auto whitespace()
Factory functions for creating commonly used spells.
auto lowercase()
Create a lowercase converter.
auto punctuation(bool remove=true)
Create a punctuation filter.
auto unicode_nfc()
Create an NFC (Canonical Decomposition followed by Canonical Composition) normalizer.
auto unicode_nfd()
Create an NFD (Canonical Decomposition) normalizer.
auto text_normalizer(CaseType case_type={})
Create a text normalizer that applies common normalizations.
auto text_normalizer_with_punctuation()
Text normalizer with optional punctuation removal.
auto unicode_nfkd()
Create an NFKD (Compatibility Decomposition) normalizer.
auto unicode_nfkc()
Create an NFKC (Compatibility Decomposition followed by Canonical Composition) normalizer.
auto compose(FirstSpell &&first, SecondSpell &&second)
Compose two spells into a single spell pipeline.