blob: cba4fd2f7e8e6801be41f24c81b6b0ae47deaec7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
// file : libbutl/unicode.ixx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
namespace butl
{
inline codepoint_types
operator&= (codepoint_types& x, codepoint_types y)
{
return x = static_cast<codepoint_types> (
static_cast<std::uint16_t> (x) &
static_cast<std::uint16_t> (y));
}
inline codepoint_types
operator|= (codepoint_types& x, codepoint_types y)
{
return x = static_cast<codepoint_types> (
static_cast<std::uint16_t> (x) |
static_cast<std::uint16_t> (y));
}
inline codepoint_types
operator& (codepoint_types x, codepoint_types y)
{
return x &= y;
}
inline codepoint_types
operator| (codepoint_types x, codepoint_types y)
{
return x |= y;
}
LIBBUTL_SYMEXPORT codepoint_types
codepoint_type_lookup (char32_t);
inline codepoint_types
codepoint_type (char32_t c)
{
// Optimize for the common case (printable ASCII characters).
//
if (c >= 0x20 && c <= 0x7E) // Printable ASCII?
return codepoint_types::graphic;
else if (c > 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF)) // Invalid?
return codepoint_types::none;
else if ((c & 0xFFFF) >= 0xFFFE) // Non-range based?
return codepoint_types::non_character;
else
return codepoint_type_lookup (c);
}
inline std::string
to_string (codepoint_types t)
{
// Note that we use the terms from the Unicode standard ("private-use"
// rather than "private use", "noncharacter" rather than "non-character").
//
switch (t)
{
case codepoint_types::graphic: return "graphic";
case codepoint_types::format: return "format";
case codepoint_types::control: return "control";
case codepoint_types::private_use: return "private-use";
case codepoint_types::non_character: return "noncharacter"; // No dash.
case codepoint_types::reserved: return "reserved";
case codepoint_types::none:
case codepoint_types::any: return "";
}
return ""; // Types combination.
}
}
|