1 : /* -*- mode: C -*-
2 : *
3 : * File: pdf-text-ucd-wordbreak.c
4 : * Date: Sun Mar 09 12:59:01 2008
5 : *
6 : * GNU PDF Library - Unicode Character Database - WordBreak Property
7 : *
8 : * WARNING! The contents of the arrays in this file are self-generated
9 : * from the WordBreakProperty.txt file, using the `pdf_text_generate_ucd'
10 : * utility
11 : *
12 : */
13 :
14 : /* Copyright (C) 2008 Free Software Foundation, Inc. */
15 :
16 : /* This program is free software: you can redistribute it and/or modify
17 : * it under the terms of the GNU General Public License as published by
18 : * the Free Software Foundation, either version 3 of the License, or
19 : * (at your option) any later version.
20 : *
21 : * This program is distributed in the hope that it will be useful,
22 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 : * GNU General Public License for more details.
25 : *
26 : * You should have received a copy of the GNU General Public License
27 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
28 : */
29 :
30 : #include <config.h>
31 :
32 : #include <pdf-text-ucd-wordbreak.h>
33 :
34 : typedef struct _unicode_wordbreak_info_s {
35 : pdf_u32_t interval_start;
36 : pdf_u32_t interval_stop;
37 : enum pdf_text_ucd_wb_property_e wb_property;
38 : } unicode_wordbreak_info_t;
39 :
40 : /*************** START OF SELF-GENERATED DATA *********************************/
41 :
42 : /* Index of first 'CR' interval */
43 : #define UCD_WB_CR_F 0
44 : /* Index of last 'CR' interval */
45 : #define UCD_WB_CR_L 0
46 : /* Index of first 'LF' interval */
47 : #define UCD_WB_LF_F 1
48 : /* Index of last 'LF' interval */
49 : #define UCD_WB_LF_L 1
50 : /* Index of first 'NEWLINE' interval */
51 : #define UCD_WB_NEWLINE_F 2
52 : /* Index of last 'NEWLINE' interval */
53 : #define UCD_WB_NEWLINE_L 5
54 : /* Index of first 'EXTEND' interval */
55 : #define UCD_WB_EXTEND_F 6
56 : /* Index of last 'EXTEND' interval */
57 : #define UCD_WB_EXTEND_L 257
58 : /* Index of first 'FORMAT' interval */
59 : #define UCD_WB_FORMAT_F 258
60 : /* Index of last 'FORMAT' interval */
61 : #define UCD_WB_FORMAT_L 272
62 : /* Index of first 'KATAKANA' interval */
63 : #define UCD_WB_KATAKANA_F 273
64 : /* Index of last 'KATAKANA' interval */
65 : #define UCD_WB_KATAKANA_L 284
66 : /* Index of first 'ALETTER' interval */
67 : #define UCD_WB_ALETTER_F 285
68 : /* Index of last 'ALETTER' interval */
69 : #define UCD_WB_ALETTER_L 668
70 : /* Index of first 'MIDLETTER' interval */
71 : #define UCD_WB_MIDLETTER_F 669
72 : /* Index of last 'MIDLETTER' interval */
73 : #define UCD_WB_MIDLETTER_L 676
74 : /* Index of first 'MIDNUM' interval */
75 : #define UCD_WB_MIDNUM_F 677
76 : /* Index of last 'MIDNUM' interval */
77 : #define UCD_WB_MIDNUM_L 690
78 : /* Index of first 'MIDNUMLET' interval */
79 : #define UCD_WB_MIDNUMLET_F 691
80 : /* Index of last 'MIDNUMLET' interval */
81 : #define UCD_WB_MIDNUMLET_L 698
82 : /* Index of first 'NUMERIC' interval */
83 : #define UCD_WB_NUMERIC_F 699
84 : /* Index of last 'NUMERIC' interval */
85 : #define UCD_WB_NUMERIC_L 731
86 : /* Index of first 'EXTENDNUMLET' interval */
87 : #define UCD_WB_EXTENDNUMLET_F 732
88 : /* Index of last 'EXTENDNUMLET' interval */
89 : #define UCD_WB_EXTENDNUMLET_L 737
90 : #define UCD_WB_INFO_N 738 /* Maximum number of elements in array */
91 :
92 :
93 : static unicode_wordbreak_info_t unicode_wordbreak_info[UCD_WB_INFO_N] = {
94 : { 0x000D, 0x000D, PDF_TEXT_UCD_WBP_CR }, /* 0 */
95 : { 0x000A, 0x000A, PDF_TEXT_UCD_WBP_LF }, /* 1 */
96 : { 0x000B, 0x000C, PDF_TEXT_UCD_WBP_Newline }, /* 2 */
97 : { 0x0085, 0x0085, PDF_TEXT_UCD_WBP_Newline }, /* 3 */
98 : { 0x2028, 0x2028, PDF_TEXT_UCD_WBP_Newline }, /* 4 */
99 : { 0x2029, 0x2029, PDF_TEXT_UCD_WBP_Newline }, /* 5 */
100 : { 0x0300, 0x036F, PDF_TEXT_UCD_WBP_Extend }, /* 6 */
101 : { 0x0483, 0x0487, PDF_TEXT_UCD_WBP_Extend }, /* 7 */
102 : { 0x0488, 0x0489, PDF_TEXT_UCD_WBP_Extend }, /* 8 */
103 : { 0x0591, 0x05BD, PDF_TEXT_UCD_WBP_Extend }, /* 9 */
104 : { 0x05BF, 0x05BF, PDF_TEXT_UCD_WBP_Extend }, /* 10 */
105 : { 0x05C1, 0x05C2, PDF_TEXT_UCD_WBP_Extend }, /* 11 */
106 : { 0x05C4, 0x05C5, PDF_TEXT_UCD_WBP_Extend }, /* 12 */
107 : { 0x05C7, 0x05C7, PDF_TEXT_UCD_WBP_Extend }, /* 13 */
108 : { 0x0610, 0x061A, PDF_TEXT_UCD_WBP_Extend }, /* 14 */
109 : { 0x064B, 0x065E, PDF_TEXT_UCD_WBP_Extend }, /* 15 */
110 : { 0x0670, 0x0670, PDF_TEXT_UCD_WBP_Extend }, /* 16 */
111 : { 0x06D6, 0x06DC, PDF_TEXT_UCD_WBP_Extend }, /* 17 */
112 : { 0x06DE, 0x06DE, PDF_TEXT_UCD_WBP_Extend }, /* 18 */
113 : { 0x06DF, 0x06E4, PDF_TEXT_UCD_WBP_Extend }, /* 19 */
114 : { 0x06E7, 0x06E8, PDF_TEXT_UCD_WBP_Extend }, /* 20 */
115 : { 0x06EA, 0x06ED, PDF_TEXT_UCD_WBP_Extend }, /* 21 */
116 : { 0x0711, 0x0711, PDF_TEXT_UCD_WBP_Extend }, /* 22 */
117 : { 0x0730, 0x074A, PDF_TEXT_UCD_WBP_Extend }, /* 23 */
118 : { 0x07A6, 0x07B0, PDF_TEXT_UCD_WBP_Extend }, /* 24 */
119 : { 0x07EB, 0x07F3, PDF_TEXT_UCD_WBP_Extend }, /* 25 */
120 : { 0x0901, 0x0902, PDF_TEXT_UCD_WBP_Extend }, /* 26 */
121 : { 0x0903, 0x0903, PDF_TEXT_UCD_WBP_Extend }, /* 27 */
122 : { 0x093C, 0x093C, PDF_TEXT_UCD_WBP_Extend }, /* 28 */
123 : { 0x093E, 0x0940, PDF_TEXT_UCD_WBP_Extend }, /* 29 */
124 : { 0x0941, 0x0948, PDF_TEXT_UCD_WBP_Extend }, /* 30 */
125 : { 0x0949, 0x094C, PDF_TEXT_UCD_WBP_Extend }, /* 31 */
126 : { 0x094D, 0x094D, PDF_TEXT_UCD_WBP_Extend }, /* 32 */
127 : { 0x0951, 0x0954, PDF_TEXT_UCD_WBP_Extend }, /* 33 */
128 : { 0x0962, 0x0963, PDF_TEXT_UCD_WBP_Extend }, /* 34 */
129 : { 0x0981, 0x0981, PDF_TEXT_UCD_WBP_Extend }, /* 35 */
130 : { 0x0982, 0x0983, PDF_TEXT_UCD_WBP_Extend }, /* 36 */
131 : { 0x09BC, 0x09BC, PDF_TEXT_UCD_WBP_Extend }, /* 37 */
132 : { 0x09BE, 0x09C0, PDF_TEXT_UCD_WBP_Extend }, /* 38 */
133 : { 0x09C1, 0x09C4, PDF_TEXT_UCD_WBP_Extend }, /* 39 */
134 : { 0x09C7, 0x09C8, PDF_TEXT_UCD_WBP_Extend }, /* 40 */
135 : { 0x09CB, 0x09CC, PDF_TEXT_UCD_WBP_Extend }, /* 41 */
136 : { 0x09CD, 0x09CD, PDF_TEXT_UCD_WBP_Extend }, /* 42 */
137 : { 0x09D7, 0x09D7, PDF_TEXT_UCD_WBP_Extend }, /* 43 */
138 : { 0x09E2, 0x09E3, PDF_TEXT_UCD_WBP_Extend }, /* 44 */
139 : { 0x0A01, 0x0A02, PDF_TEXT_UCD_WBP_Extend }, /* 45 */
140 : { 0x0A03, 0x0A03, PDF_TEXT_UCD_WBP_Extend }, /* 46 */
141 : { 0x0A3C, 0x0A3C, PDF_TEXT_UCD_WBP_Extend }, /* 47 */
142 : { 0x0A3E, 0x0A40, PDF_TEXT_UCD_WBP_Extend }, /* 48 */
143 : { 0x0A41, 0x0A42, PDF_TEXT_UCD_WBP_Extend }, /* 49 */
144 : { 0x0A47, 0x0A48, PDF_TEXT_UCD_WBP_Extend }, /* 50 */
145 : { 0x0A4B, 0x0A4D, PDF_TEXT_UCD_WBP_Extend }, /* 51 */
146 : { 0x0A51, 0x0A51, PDF_TEXT_UCD_WBP_Extend }, /* 52 */
147 : { 0x0A70, 0x0A71, PDF_TEXT_UCD_WBP_Extend }, /* 53 */
148 : { 0x0A75, 0x0A75, PDF_TEXT_UCD_WBP_Extend }, /* 54 */
149 : { 0x0A81, 0x0A82, PDF_TEXT_UCD_WBP_Extend }, /* 55 */
150 : { 0x0A83, 0x0A83, PDF_TEXT_UCD_WBP_Extend }, /* 56 */
151 : { 0x0ABC, 0x0ABC, PDF_TEXT_UCD_WBP_Extend }, /* 57 */
152 : { 0x0ABE, 0x0AC0, PDF_TEXT_UCD_WBP_Extend }, /* 58 */
153 : { 0x0AC1, 0x0AC5, PDF_TEXT_UCD_WBP_Extend }, /* 59 */
154 : { 0x0AC7, 0x0AC8, PDF_TEXT_UCD_WBP_Extend }, /* 60 */
155 : { 0x0AC9, 0x0AC9, PDF_TEXT_UCD_WBP_Extend }, /* 61 */
156 : { 0x0ACB, 0x0ACC, PDF_TEXT_UCD_WBP_Extend }, /* 62 */
157 : { 0x0ACD, 0x0ACD, PDF_TEXT_UCD_WBP_Extend }, /* 63 */
158 : { 0x0AE2, 0x0AE3, PDF_TEXT_UCD_WBP_Extend }, /* 64 */
159 : { 0x0B01, 0x0B01, PDF_TEXT_UCD_WBP_Extend }, /* 65 */
160 : { 0x0B02, 0x0B03, PDF_TEXT_UCD_WBP_Extend }, /* 66 */
161 : { 0x0B3C, 0x0B3C, PDF_TEXT_UCD_WBP_Extend }, /* 67 */
162 : { 0x0B3E, 0x0B3E, PDF_TEXT_UCD_WBP_Extend }, /* 68 */
163 : { 0x0B3F, 0x0B3F, PDF_TEXT_UCD_WBP_Extend }, /* 69 */
164 : { 0x0B40, 0x0B40, PDF_TEXT_UCD_WBP_Extend }, /* 70 */
165 : { 0x0B41, 0x0B44, PDF_TEXT_UCD_WBP_Extend }, /* 71 */
166 : { 0x0B47, 0x0B48, PDF_TEXT_UCD_WBP_Extend }, /* 72 */
167 : { 0x0B4B, 0x0B4C, PDF_TEXT_UCD_WBP_Extend }, /* 73 */
168 : { 0x0B4D, 0x0B4D, PDF_TEXT_UCD_WBP_Extend }, /* 74 */
169 : { 0x0B56, 0x0B56, PDF_TEXT_UCD_WBP_Extend }, /* 75 */
170 : { 0x0B57, 0x0B57, PDF_TEXT_UCD_WBP_Extend }, /* 76 */
171 : { 0x0B62, 0x0B63, PDF_TEXT_UCD_WBP_Extend }, /* 77 */
172 : { 0x0B82, 0x0B82, PDF_TEXT_UCD_WBP_Extend }, /* 78 */
173 : { 0x0BBE, 0x0BBF, PDF_TEXT_UCD_WBP_Extend }, /* 79 */
174 : { 0x0BC0, 0x0BC0, PDF_TEXT_UCD_WBP_Extend }, /* 80 */
175 : { 0x0BC1, 0x0BC2, PDF_TEXT_UCD_WBP_Extend }, /* 81 */
176 : { 0x0BC6, 0x0BC8, PDF_TEXT_UCD_WBP_Extend }, /* 82 */
177 : { 0x0BCA, 0x0BCC, PDF_TEXT_UCD_WBP_Extend }, /* 83 */
178 : { 0x0BCD, 0x0BCD, PDF_TEXT_UCD_WBP_Extend }, /* 84 */
179 : { 0x0BD7, 0x0BD7, PDF_TEXT_UCD_WBP_Extend }, /* 85 */
180 : { 0x0C01, 0x0C03, PDF_TEXT_UCD_WBP_Extend }, /* 86 */
181 : { 0x0C3E, 0x0C40, PDF_TEXT_UCD_WBP_Extend }, /* 87 */
182 : { 0x0C41, 0x0C44, PDF_TEXT_UCD_WBP_Extend }, /* 88 */
183 : { 0x0C46, 0x0C48, PDF_TEXT_UCD_WBP_Extend }, /* 89 */
184 : { 0x0C4A, 0x0C4D, PDF_TEXT_UCD_WBP_Extend }, /* 90 */
185 : { 0x0C55, 0x0C56, PDF_TEXT_UCD_WBP_Extend }, /* 91 */
186 : { 0x0C62, 0x0C63, PDF_TEXT_UCD_WBP_Extend }, /* 92 */
187 : { 0x0C82, 0x0C83, PDF_TEXT_UCD_WBP_Extend }, /* 93 */
188 : { 0x0CBC, 0x0CBC, PDF_TEXT_UCD_WBP_Extend }, /* 94 */
189 : { 0x0CBE, 0x0CBE, PDF_TEXT_UCD_WBP_Extend }, /* 95 */
190 : { 0x0CBF, 0x0CBF, PDF_TEXT_UCD_WBP_Extend }, /* 96 */
191 : { 0x0CC0, 0x0CC4, PDF_TEXT_UCD_WBP_Extend }, /* 97 */
192 : { 0x0CC6, 0x0CC6, PDF_TEXT_UCD_WBP_Extend }, /* 98 */
193 : { 0x0CC7, 0x0CC8, PDF_TEXT_UCD_WBP_Extend }, /* 99 */
194 : { 0x0CCA, 0x0CCB, PDF_TEXT_UCD_WBP_Extend }, /* 100 */
195 : { 0x0CCC, 0x0CCD, PDF_TEXT_UCD_WBP_Extend }, /* 101 */
196 : { 0x0CD5, 0x0CD6, PDF_TEXT_UCD_WBP_Extend }, /* 102 */
197 : { 0x0CE2, 0x0CE3, PDF_TEXT_UCD_WBP_Extend }, /* 103 */
198 : { 0x0D02, 0x0D03, PDF_TEXT_UCD_WBP_Extend }, /* 104 */
199 : { 0x0D3E, 0x0D40, PDF_TEXT_UCD_WBP_Extend }, /* 105 */
200 : { 0x0D41, 0x0D44, PDF_TEXT_UCD_WBP_Extend }, /* 106 */
201 : { 0x0D46, 0x0D48, PDF_TEXT_UCD_WBP_Extend }, /* 107 */
202 : { 0x0D4A, 0x0D4C, PDF_TEXT_UCD_WBP_Extend }, /* 108 */
203 : { 0x0D4D, 0x0D4D, PDF_TEXT_UCD_WBP_Extend }, /* 109 */
204 : { 0x0D57, 0x0D57, PDF_TEXT_UCD_WBP_Extend }, /* 110 */
205 : { 0x0D62, 0x0D63, PDF_TEXT_UCD_WBP_Extend }, /* 111 */
206 : { 0x0D82, 0x0D83, PDF_TEXT_UCD_WBP_Extend }, /* 112 */
207 : { 0x0DCA, 0x0DCA, PDF_TEXT_UCD_WBP_Extend }, /* 113 */
208 : { 0x0DCF, 0x0DD1, PDF_TEXT_UCD_WBP_Extend }, /* 114 */
209 : { 0x0DD2, 0x0DD4, PDF_TEXT_UCD_WBP_Extend }, /* 115 */
210 : { 0x0DD6, 0x0DD6, PDF_TEXT_UCD_WBP_Extend }, /* 116 */
211 : { 0x0DD8, 0x0DDF, PDF_TEXT_UCD_WBP_Extend }, /* 117 */
212 : { 0x0DF2, 0x0DF3, PDF_TEXT_UCD_WBP_Extend }, /* 118 */
213 : { 0x0E31, 0x0E31, PDF_TEXT_UCD_WBP_Extend }, /* 119 */
214 : { 0x0E34, 0x0E3A, PDF_TEXT_UCD_WBP_Extend }, /* 120 */
215 : { 0x0E47, 0x0E4E, PDF_TEXT_UCD_WBP_Extend }, /* 121 */
216 : { 0x0EB1, 0x0EB1, PDF_TEXT_UCD_WBP_Extend }, /* 122 */
217 : { 0x0EB4, 0x0EB9, PDF_TEXT_UCD_WBP_Extend }, /* 123 */
218 : { 0x0EBB, 0x0EBC, PDF_TEXT_UCD_WBP_Extend }, /* 124 */
219 : { 0x0EC8, 0x0ECD, PDF_TEXT_UCD_WBP_Extend }, /* 125 */
220 : { 0x0F18, 0x0F19, PDF_TEXT_UCD_WBP_Extend }, /* 126 */
221 : { 0x0F35, 0x0F35, PDF_TEXT_UCD_WBP_Extend }, /* 127 */
222 : { 0x0F37, 0x0F37, PDF_TEXT_UCD_WBP_Extend }, /* 128 */
223 : { 0x0F39, 0x0F39, PDF_TEXT_UCD_WBP_Extend }, /* 129 */
224 : { 0x0F3E, 0x0F3F, PDF_TEXT_UCD_WBP_Extend }, /* 130 */
225 : { 0x0F71, 0x0F7E, PDF_TEXT_UCD_WBP_Extend }, /* 131 */
226 : { 0x0F7F, 0x0F7F, PDF_TEXT_UCD_WBP_Extend }, /* 132 */
227 : { 0x0F80, 0x0F84, PDF_TEXT_UCD_WBP_Extend }, /* 133 */
228 : { 0x0F86, 0x0F87, PDF_TEXT_UCD_WBP_Extend }, /* 134 */
229 : { 0x0F90, 0x0F97, PDF_TEXT_UCD_WBP_Extend }, /* 135 */
230 : { 0x0F99, 0x0FBC, PDF_TEXT_UCD_WBP_Extend }, /* 136 */
231 : { 0x0FC6, 0x0FC6, PDF_TEXT_UCD_WBP_Extend }, /* 137 */
232 : { 0x102B, 0x102C, PDF_TEXT_UCD_WBP_Extend }, /* 138 */
233 : { 0x102D, 0x1030, PDF_TEXT_UCD_WBP_Extend }, /* 139 */
234 : { 0x1031, 0x1031, PDF_TEXT_UCD_WBP_Extend }, /* 140 */
235 : { 0x1032, 0x1037, PDF_TEXT_UCD_WBP_Extend }, /* 141 */
236 : { 0x1038, 0x1038, PDF_TEXT_UCD_WBP_Extend }, /* 142 */
237 : { 0x1039, 0x103A, PDF_TEXT_UCD_WBP_Extend }, /* 143 */
238 : { 0x103B, 0x103C, PDF_TEXT_UCD_WBP_Extend }, /* 144 */
239 : { 0x103D, 0x103E, PDF_TEXT_UCD_WBP_Extend }, /* 145 */
240 : { 0x1056, 0x1057, PDF_TEXT_UCD_WBP_Extend }, /* 146 */
241 : { 0x1058, 0x1059, PDF_TEXT_UCD_WBP_Extend }, /* 147 */
242 : { 0x105E, 0x1060, PDF_TEXT_UCD_WBP_Extend }, /* 148 */
243 : { 0x1062, 0x1064, PDF_TEXT_UCD_WBP_Extend }, /* 149 */
244 : { 0x1067, 0x106D, PDF_TEXT_UCD_WBP_Extend }, /* 150 */
245 : { 0x1071, 0x1074, PDF_TEXT_UCD_WBP_Extend }, /* 151 */
246 : { 0x1082, 0x1082, PDF_TEXT_UCD_WBP_Extend }, /* 152 */
247 : { 0x1083, 0x1084, PDF_TEXT_UCD_WBP_Extend }, /* 153 */
248 : { 0x1085, 0x1086, PDF_TEXT_UCD_WBP_Extend }, /* 154 */
249 : { 0x1087, 0x108C, PDF_TEXT_UCD_WBP_Extend }, /* 155 */
250 : { 0x108D, 0x108D, PDF_TEXT_UCD_WBP_Extend }, /* 156 */
251 : { 0x108F, 0x108F, PDF_TEXT_UCD_WBP_Extend }, /* 157 */
252 : { 0x135F, 0x135F, PDF_TEXT_UCD_WBP_Extend }, /* 158 */
253 : { 0x1712, 0x1714, PDF_TEXT_UCD_WBP_Extend }, /* 159 */
254 : { 0x1732, 0x1734, PDF_TEXT_UCD_WBP_Extend }, /* 160 */
255 : { 0x1752, 0x1753, PDF_TEXT_UCD_WBP_Extend }, /* 161 */
256 : { 0x1772, 0x1773, PDF_TEXT_UCD_WBP_Extend }, /* 162 */
257 : { 0x17B6, 0x17B6, PDF_TEXT_UCD_WBP_Extend }, /* 163 */
258 : { 0x17B7, 0x17BD, PDF_TEXT_UCD_WBP_Extend }, /* 164 */
259 : { 0x17BE, 0x17C5, PDF_TEXT_UCD_WBP_Extend }, /* 165 */
260 : { 0x17C6, 0x17C6, PDF_TEXT_UCD_WBP_Extend }, /* 166 */
261 : { 0x17C7, 0x17C8, PDF_TEXT_UCD_WBP_Extend }, /* 167 */
262 : { 0x17C9, 0x17D3, PDF_TEXT_UCD_WBP_Extend }, /* 168 */
263 : { 0x17DD, 0x17DD, PDF_TEXT_UCD_WBP_Extend }, /* 169 */
264 : { 0x180B, 0x180D, PDF_TEXT_UCD_WBP_Extend }, /* 170 */
265 : { 0x18A9, 0x18A9, PDF_TEXT_UCD_WBP_Extend }, /* 171 */
266 : { 0x1920, 0x1922, PDF_TEXT_UCD_WBP_Extend }, /* 172 */
267 : { 0x1923, 0x1926, PDF_TEXT_UCD_WBP_Extend }, /* 173 */
268 : { 0x1927, 0x1928, PDF_TEXT_UCD_WBP_Extend }, /* 174 */
269 : { 0x1929, 0x192B, PDF_TEXT_UCD_WBP_Extend }, /* 175 */
270 : { 0x1930, 0x1931, PDF_TEXT_UCD_WBP_Extend }, /* 176 */
271 : { 0x1932, 0x1932, PDF_TEXT_UCD_WBP_Extend }, /* 177 */
272 : { 0x1933, 0x1938, PDF_TEXT_UCD_WBP_Extend }, /* 178 */
273 : { 0x1939, 0x193B, PDF_TEXT_UCD_WBP_Extend }, /* 179 */
274 : { 0x19B0, 0x19C0, PDF_TEXT_UCD_WBP_Extend }, /* 180 */
275 : { 0x19C8, 0x19C9, PDF_TEXT_UCD_WBP_Extend }, /* 181 */
276 : { 0x1A17, 0x1A18, PDF_TEXT_UCD_WBP_Extend }, /* 182 */
277 : { 0x1A19, 0x1A1B, PDF_TEXT_UCD_WBP_Extend }, /* 183 */
278 : { 0x1B00, 0x1B03, PDF_TEXT_UCD_WBP_Extend }, /* 184 */
279 : { 0x1B04, 0x1B04, PDF_TEXT_UCD_WBP_Extend }, /* 185 */
280 : { 0x1B34, 0x1B34, PDF_TEXT_UCD_WBP_Extend }, /* 186 */
281 : { 0x1B35, 0x1B35, PDF_TEXT_UCD_WBP_Extend }, /* 187 */
282 : { 0x1B36, 0x1B3A, PDF_TEXT_UCD_WBP_Extend }, /* 188 */
283 : { 0x1B3B, 0x1B3B, PDF_TEXT_UCD_WBP_Extend }, /* 189 */
284 : { 0x1B3C, 0x1B3C, PDF_TEXT_UCD_WBP_Extend }, /* 190 */
285 : { 0x1B3D, 0x1B41, PDF_TEXT_UCD_WBP_Extend }, /* 191 */
286 : { 0x1B42, 0x1B42, PDF_TEXT_UCD_WBP_Extend }, /* 192 */
287 : { 0x1B43, 0x1B44, PDF_TEXT_UCD_WBP_Extend }, /* 193 */
288 : { 0x1B6B, 0x1B73, PDF_TEXT_UCD_WBP_Extend }, /* 194 */
289 : { 0x1B80, 0x1B81, PDF_TEXT_UCD_WBP_Extend }, /* 195 */
290 : { 0x1B82, 0x1B82, PDF_TEXT_UCD_WBP_Extend }, /* 196 */
291 : { 0x1BA1, 0x1BA1, PDF_TEXT_UCD_WBP_Extend }, /* 197 */
292 : { 0x1BA2, 0x1BA5, PDF_TEXT_UCD_WBP_Extend }, /* 198 */
293 : { 0x1BA6, 0x1BA7, PDF_TEXT_UCD_WBP_Extend }, /* 199 */
294 : { 0x1BA8, 0x1BA9, PDF_TEXT_UCD_WBP_Extend }, /* 200 */
295 : { 0x1BAA, 0x1BAA, PDF_TEXT_UCD_WBP_Extend }, /* 201 */
296 : { 0x1C24, 0x1C2B, PDF_TEXT_UCD_WBP_Extend }, /* 202 */
297 : { 0x1C2C, 0x1C33, PDF_TEXT_UCD_WBP_Extend }, /* 203 */
298 : { 0x1C34, 0x1C35, PDF_TEXT_UCD_WBP_Extend }, /* 204 */
299 : { 0x1C36, 0x1C37, PDF_TEXT_UCD_WBP_Extend }, /* 205 */
300 : { 0x1DC0, 0x1DE6, PDF_TEXT_UCD_WBP_Extend }, /* 206 */
301 : { 0x1DFE, 0x1DFF, PDF_TEXT_UCD_WBP_Extend }, /* 207 */
302 : { 0x200C, 0x200D, PDF_TEXT_UCD_WBP_Extend }, /* 208 */
303 : { 0x20D0, 0x20DC, PDF_TEXT_UCD_WBP_Extend }, /* 209 */
304 : { 0x20DD, 0x20E0, PDF_TEXT_UCD_WBP_Extend }, /* 210 */
305 : { 0x20E1, 0x20E1, PDF_TEXT_UCD_WBP_Extend }, /* 211 */
306 : { 0x20E2, 0x20E4, PDF_TEXT_UCD_WBP_Extend }, /* 212 */
307 : { 0x20E5, 0x20F0, PDF_TEXT_UCD_WBP_Extend }, /* 213 */
308 : { 0x2DE0, 0x2DFF, PDF_TEXT_UCD_WBP_Extend }, /* 214 */
309 : { 0x302A, 0x302F, PDF_TEXT_UCD_WBP_Extend }, /* 215 */
310 : { 0x3099, 0x309A, PDF_TEXT_UCD_WBP_Extend }, /* 216 */
311 : { 0xA66F, 0xA66F, PDF_TEXT_UCD_WBP_Extend }, /* 217 */
312 : { 0xA670, 0xA672, PDF_TEXT_UCD_WBP_Extend }, /* 218 */
313 : { 0xA67C, 0xA67D, PDF_TEXT_UCD_WBP_Extend }, /* 219 */
314 : { 0xA802, 0xA802, PDF_TEXT_UCD_WBP_Extend }, /* 220 */
315 : { 0xA806, 0xA806, PDF_TEXT_UCD_WBP_Extend }, /* 221 */
316 : { 0xA80B, 0xA80B, PDF_TEXT_UCD_WBP_Extend }, /* 222 */
317 : { 0xA823, 0xA824, PDF_TEXT_UCD_WBP_Extend }, /* 223 */
318 : { 0xA825, 0xA826, PDF_TEXT_UCD_WBP_Extend }, /* 224 */
319 : { 0xA827, 0xA827, PDF_TEXT_UCD_WBP_Extend }, /* 225 */
320 : { 0xA880, 0xA881, PDF_TEXT_UCD_WBP_Extend }, /* 226 */
321 : { 0xA8B4, 0xA8C3, PDF_TEXT_UCD_WBP_Extend }, /* 227 */
322 : { 0xA8C4, 0xA8C4, PDF_TEXT_UCD_WBP_Extend }, /* 228 */
323 : { 0xA926, 0xA92D, PDF_TEXT_UCD_WBP_Extend }, /* 229 */
324 : { 0xA947, 0xA951, PDF_TEXT_UCD_WBP_Extend }, /* 230 */
325 : { 0xA952, 0xA953, PDF_TEXT_UCD_WBP_Extend }, /* 231 */
326 : { 0xAA29, 0xAA2E, PDF_TEXT_UCD_WBP_Extend }, /* 232 */
327 : { 0xAA2F, 0xAA30, PDF_TEXT_UCD_WBP_Extend }, /* 233 */
328 : { 0xAA31, 0xAA32, PDF_TEXT_UCD_WBP_Extend }, /* 234 */
329 : { 0xAA33, 0xAA34, PDF_TEXT_UCD_WBP_Extend }, /* 235 */
330 : { 0xAA35, 0xAA36, PDF_TEXT_UCD_WBP_Extend }, /* 236 */
331 : { 0xAA43, 0xAA43, PDF_TEXT_UCD_WBP_Extend }, /* 237 */
332 : { 0xAA4C, 0xAA4C, PDF_TEXT_UCD_WBP_Extend }, /* 238 */
333 : { 0xAA4D, 0xAA4D, PDF_TEXT_UCD_WBP_Extend }, /* 239 */
334 : { 0xFB1E, 0xFB1E, PDF_TEXT_UCD_WBP_Extend }, /* 240 */
335 : { 0xFE00, 0xFE0F, PDF_TEXT_UCD_WBP_Extend }, /* 241 */
336 : { 0xFE20, 0xFE26, PDF_TEXT_UCD_WBP_Extend }, /* 242 */
337 : { 0xFF9E, 0xFF9F, PDF_TEXT_UCD_WBP_Extend }, /* 243 */
338 : { 0x101FD, 0x101FD, PDF_TEXT_UCD_WBP_Extend }, /* 244 */
339 : { 0x10A01, 0x10A03, PDF_TEXT_UCD_WBP_Extend }, /* 245 */
340 : { 0x10A05, 0x10A06, PDF_TEXT_UCD_WBP_Extend }, /* 246 */
341 : { 0x10A0C, 0x10A0F, PDF_TEXT_UCD_WBP_Extend }, /* 247 */
342 : { 0x10A38, 0x10A3A, PDF_TEXT_UCD_WBP_Extend }, /* 248 */
343 : { 0x10A3F, 0x10A3F, PDF_TEXT_UCD_WBP_Extend }, /* 249 */
344 : { 0x1D165, 0x1D166, PDF_TEXT_UCD_WBP_Extend }, /* 250 */
345 : { 0x1D167, 0x1D169, PDF_TEXT_UCD_WBP_Extend }, /* 251 */
346 : { 0x1D16D, 0x1D172, PDF_TEXT_UCD_WBP_Extend }, /* 252 */
347 : { 0x1D17B, 0x1D182, PDF_TEXT_UCD_WBP_Extend }, /* 253 */
348 : { 0x1D185, 0x1D18B, PDF_TEXT_UCD_WBP_Extend }, /* 254 */
349 : { 0x1D1AA, 0x1D1AD, PDF_TEXT_UCD_WBP_Extend }, /* 255 */
350 : { 0x1D242, 0x1D244, PDF_TEXT_UCD_WBP_Extend }, /* 256 */
351 : { 0xE0100, 0xE01EF, PDF_TEXT_UCD_WBP_Extend }, /* 257 */
352 : { 0x00AD, 0x00AD, PDF_TEXT_UCD_WBP_Format }, /* 258 */
353 : { 0x0600, 0x0603, PDF_TEXT_UCD_WBP_Format }, /* 259 */
354 : { 0x06DD, 0x06DD, PDF_TEXT_UCD_WBP_Format }, /* 260 */
355 : { 0x070F, 0x070F, PDF_TEXT_UCD_WBP_Format }, /* 261 */
356 : { 0x17B4, 0x17B5, PDF_TEXT_UCD_WBP_Format }, /* 262 */
357 : { 0x200B, 0x200B, PDF_TEXT_UCD_WBP_Format }, /* 263 */
358 : { 0x200E, 0x200F, PDF_TEXT_UCD_WBP_Format }, /* 264 */
359 : { 0x202A, 0x202E, PDF_TEXT_UCD_WBP_Format }, /* 265 */
360 : { 0x2060, 0x2064, PDF_TEXT_UCD_WBP_Format }, /* 266 */
361 : { 0x206A, 0x206F, PDF_TEXT_UCD_WBP_Format }, /* 267 */
362 : { 0xFEFF, 0xFEFF, PDF_TEXT_UCD_WBP_Format }, /* 268 */
363 : { 0xFFF9, 0xFFFB, PDF_TEXT_UCD_WBP_Format }, /* 269 */
364 : { 0x1D173, 0x1D17A, PDF_TEXT_UCD_WBP_Format }, /* 270 */
365 : { 0xE0001, 0xE0001, PDF_TEXT_UCD_WBP_Format }, /* 271 */
366 : { 0xE0020, 0xE007F, PDF_TEXT_UCD_WBP_Format }, /* 272 */
367 : { 0x3031, 0x3035, PDF_TEXT_UCD_WBP_Katakana }, /* 273 */
368 : { 0x309B, 0x309C, PDF_TEXT_UCD_WBP_Katakana }, /* 274 */
369 : { 0x30A0, 0x30A0, PDF_TEXT_UCD_WBP_Katakana }, /* 275 */
370 : { 0x30A1, 0x30FA, PDF_TEXT_UCD_WBP_Katakana }, /* 276 */
371 : { 0x30FC, 0x30FE, PDF_TEXT_UCD_WBP_Katakana }, /* 277 */
372 : { 0x30FF, 0x30FF, PDF_TEXT_UCD_WBP_Katakana }, /* 278 */
373 : { 0x31F0, 0x31FF, PDF_TEXT_UCD_WBP_Katakana }, /* 279 */
374 : { 0x32D0, 0x32FE, PDF_TEXT_UCD_WBP_Katakana }, /* 280 */
375 : { 0x3300, 0x3357, PDF_TEXT_UCD_WBP_Katakana }, /* 281 */
376 : { 0xFF66, 0xFF6F, PDF_TEXT_UCD_WBP_Katakana }, /* 282 */
377 : { 0xFF70, 0xFF70, PDF_TEXT_UCD_WBP_Katakana }, /* 283 */
378 : { 0xFF71, 0xFF9D, PDF_TEXT_UCD_WBP_Katakana }, /* 284 */
379 : { 0x0041, 0x005A, PDF_TEXT_UCD_WBP_ALetter }, /* 285 */
380 : { 0x0061, 0x007A, PDF_TEXT_UCD_WBP_ALetter }, /* 286 */
381 : { 0x00AA, 0x00AA, PDF_TEXT_UCD_WBP_ALetter }, /* 287 */
382 : { 0x00B5, 0x00B5, PDF_TEXT_UCD_WBP_ALetter }, /* 288 */
383 : { 0x00BA, 0x00BA, PDF_TEXT_UCD_WBP_ALetter }, /* 289 */
384 : { 0x00C0, 0x00D6, PDF_TEXT_UCD_WBP_ALetter }, /* 290 */
385 : { 0x00D8, 0x00F6, PDF_TEXT_UCD_WBP_ALetter }, /* 291 */
386 : { 0x00F8, 0x01BA, PDF_TEXT_UCD_WBP_ALetter }, /* 292 */
387 : { 0x01BB, 0x01BB, PDF_TEXT_UCD_WBP_ALetter }, /* 293 */
388 : { 0x01BC, 0x01BF, PDF_TEXT_UCD_WBP_ALetter }, /* 294 */
389 : { 0x01C0, 0x01C3, PDF_TEXT_UCD_WBP_ALetter }, /* 295 */
390 : { 0x01C4, 0x0293, PDF_TEXT_UCD_WBP_ALetter }, /* 296 */
391 : { 0x0294, 0x0294, PDF_TEXT_UCD_WBP_ALetter }, /* 297 */
392 : { 0x0295, 0x02AF, PDF_TEXT_UCD_WBP_ALetter }, /* 298 */
393 : { 0x02B0, 0x02C1, PDF_TEXT_UCD_WBP_ALetter }, /* 299 */
394 : { 0x02C6, 0x02D1, PDF_TEXT_UCD_WBP_ALetter }, /* 300 */
395 : { 0x02E0, 0x02E4, PDF_TEXT_UCD_WBP_ALetter }, /* 301 */
396 : { 0x02EC, 0x02EC, PDF_TEXT_UCD_WBP_ALetter }, /* 302 */
397 : { 0x02EE, 0x02EE, PDF_TEXT_UCD_WBP_ALetter }, /* 303 */
398 : { 0x0370, 0x0373, PDF_TEXT_UCD_WBP_ALetter }, /* 304 */
399 : { 0x0374, 0x0374, PDF_TEXT_UCD_WBP_ALetter }, /* 305 */
400 : { 0x0376, 0x0377, PDF_TEXT_UCD_WBP_ALetter }, /* 306 */
401 : { 0x037A, 0x037A, PDF_TEXT_UCD_WBP_ALetter }, /* 307 */
402 : { 0x037B, 0x037D, PDF_TEXT_UCD_WBP_ALetter }, /* 308 */
403 : { 0x0386, 0x0386, PDF_TEXT_UCD_WBP_ALetter }, /* 309 */
404 : { 0x0388, 0x038A, PDF_TEXT_UCD_WBP_ALetter }, /* 310 */
405 : { 0x038C, 0x038C, PDF_TEXT_UCD_WBP_ALetter }, /* 311 */
406 : { 0x038E, 0x03A1, PDF_TEXT_UCD_WBP_ALetter }, /* 312 */
407 : { 0x03A3, 0x03F5, PDF_TEXT_UCD_WBP_ALetter }, /* 313 */
408 : { 0x03F7, 0x0481, PDF_TEXT_UCD_WBP_ALetter }, /* 314 */
409 : { 0x048A, 0x0523, PDF_TEXT_UCD_WBP_ALetter }, /* 315 */
410 : { 0x0531, 0x0556, PDF_TEXT_UCD_WBP_ALetter }, /* 316 */
411 : { 0x0559, 0x0559, PDF_TEXT_UCD_WBP_ALetter }, /* 317 */
412 : { 0x0561, 0x0587, PDF_TEXT_UCD_WBP_ALetter }, /* 318 */
413 : { 0x05D0, 0x05EA, PDF_TEXT_UCD_WBP_ALetter }, /* 319 */
414 : { 0x05F0, 0x05F2, PDF_TEXT_UCD_WBP_ALetter }, /* 320 */
415 : { 0x05F3, 0x05F3, PDF_TEXT_UCD_WBP_ALetter }, /* 321 */
416 : { 0x0621, 0x063F, PDF_TEXT_UCD_WBP_ALetter }, /* 322 */
417 : { 0x0640, 0x0640, PDF_TEXT_UCD_WBP_ALetter }, /* 323 */
418 : { 0x0641, 0x064A, PDF_TEXT_UCD_WBP_ALetter }, /* 324 */
419 : { 0x066E, 0x066F, PDF_TEXT_UCD_WBP_ALetter }, /* 325 */
420 : { 0x0671, 0x06D3, PDF_TEXT_UCD_WBP_ALetter }, /* 326 */
421 : { 0x06D5, 0x06D5, PDF_TEXT_UCD_WBP_ALetter }, /* 327 */
422 : { 0x06E5, 0x06E6, PDF_TEXT_UCD_WBP_ALetter }, /* 328 */
423 : { 0x06EE, 0x06EF, PDF_TEXT_UCD_WBP_ALetter }, /* 329 */
424 : { 0x06FA, 0x06FC, PDF_TEXT_UCD_WBP_ALetter }, /* 330 */
425 : { 0x06FF, 0x06FF, PDF_TEXT_UCD_WBP_ALetter }, /* 331 */
426 : { 0x0710, 0x0710, PDF_TEXT_UCD_WBP_ALetter }, /* 332 */
427 : { 0x0712, 0x072F, PDF_TEXT_UCD_WBP_ALetter }, /* 333 */
428 : { 0x074D, 0x07A5, PDF_TEXT_UCD_WBP_ALetter }, /* 334 */
429 : { 0x07B1, 0x07B1, PDF_TEXT_UCD_WBP_ALetter }, /* 335 */
430 : { 0x07CA, 0x07EA, PDF_TEXT_UCD_WBP_ALetter }, /* 336 */
431 : { 0x07F4, 0x07F5, PDF_TEXT_UCD_WBP_ALetter }, /* 337 */
432 : { 0x07FA, 0x07FA, PDF_TEXT_UCD_WBP_ALetter }, /* 338 */
433 : { 0x0904, 0x0939, PDF_TEXT_UCD_WBP_ALetter }, /* 339 */
434 : { 0x093D, 0x093D, PDF_TEXT_UCD_WBP_ALetter }, /* 340 */
435 : { 0x0950, 0x0950, PDF_TEXT_UCD_WBP_ALetter }, /* 341 */
436 : { 0x0958, 0x0961, PDF_TEXT_UCD_WBP_ALetter }, /* 342 */
437 : { 0x0971, 0x0971, PDF_TEXT_UCD_WBP_ALetter }, /* 343 */
438 : { 0x0972, 0x0972, PDF_TEXT_UCD_WBP_ALetter }, /* 344 */
439 : { 0x097B, 0x097F, PDF_TEXT_UCD_WBP_ALetter }, /* 345 */
440 : { 0x0985, 0x098C, PDF_TEXT_UCD_WBP_ALetter }, /* 346 */
441 : { 0x098F, 0x0990, PDF_TEXT_UCD_WBP_ALetter }, /* 347 */
442 : { 0x0993, 0x09A8, PDF_TEXT_UCD_WBP_ALetter }, /* 348 */
443 : { 0x09AA, 0x09B0, PDF_TEXT_UCD_WBP_ALetter }, /* 349 */
444 : { 0x09B2, 0x09B2, PDF_TEXT_UCD_WBP_ALetter }, /* 350 */
445 : { 0x09B6, 0x09B9, PDF_TEXT_UCD_WBP_ALetter }, /* 351 */
446 : { 0x09BD, 0x09BD, PDF_TEXT_UCD_WBP_ALetter }, /* 352 */
447 : { 0x09CE, 0x09CE, PDF_TEXT_UCD_WBP_ALetter }, /* 353 */
448 : { 0x09DC, 0x09DD, PDF_TEXT_UCD_WBP_ALetter }, /* 354 */
449 : { 0x09DF, 0x09E1, PDF_TEXT_UCD_WBP_ALetter }, /* 355 */
450 : { 0x09F0, 0x09F1, PDF_TEXT_UCD_WBP_ALetter }, /* 356 */
451 : { 0x0A05, 0x0A0A, PDF_TEXT_UCD_WBP_ALetter }, /* 357 */
452 : { 0x0A0F, 0x0A10, PDF_TEXT_UCD_WBP_ALetter }, /* 358 */
453 : { 0x0A13, 0x0A28, PDF_TEXT_UCD_WBP_ALetter }, /* 359 */
454 : { 0x0A2A, 0x0A30, PDF_TEXT_UCD_WBP_ALetter }, /* 360 */
455 : { 0x0A32, 0x0A33, PDF_TEXT_UCD_WBP_ALetter }, /* 361 */
456 : { 0x0A35, 0x0A36, PDF_TEXT_UCD_WBP_ALetter }, /* 362 */
457 : { 0x0A38, 0x0A39, PDF_TEXT_UCD_WBP_ALetter }, /* 363 */
458 : { 0x0A59, 0x0A5C, PDF_TEXT_UCD_WBP_ALetter }, /* 364 */
459 : { 0x0A5E, 0x0A5E, PDF_TEXT_UCD_WBP_ALetter }, /* 365 */
460 : { 0x0A72, 0x0A74, PDF_TEXT_UCD_WBP_ALetter }, /* 366 */
461 : { 0x0A85, 0x0A8D, PDF_TEXT_UCD_WBP_ALetter }, /* 367 */
462 : { 0x0A8F, 0x0A91, PDF_TEXT_UCD_WBP_ALetter }, /* 368 */
463 : { 0x0A93, 0x0AA8, PDF_TEXT_UCD_WBP_ALetter }, /* 369 */
464 : { 0x0AAA, 0x0AB0, PDF_TEXT_UCD_WBP_ALetter }, /* 370 */
465 : { 0x0AB2, 0x0AB3, PDF_TEXT_UCD_WBP_ALetter }, /* 371 */
466 : { 0x0AB5, 0x0AB9, PDF_TEXT_UCD_WBP_ALetter }, /* 372 */
467 : { 0x0ABD, 0x0ABD, PDF_TEXT_UCD_WBP_ALetter }, /* 373 */
468 : { 0x0AD0, 0x0AD0, PDF_TEXT_UCD_WBP_ALetter }, /* 374 */
469 : { 0x0AE0, 0x0AE1, PDF_TEXT_UCD_WBP_ALetter }, /* 375 */
470 : { 0x0B05, 0x0B0C, PDF_TEXT_UCD_WBP_ALetter }, /* 376 */
471 : { 0x0B0F, 0x0B10, PDF_TEXT_UCD_WBP_ALetter }, /* 377 */
472 : { 0x0B13, 0x0B28, PDF_TEXT_UCD_WBP_ALetter }, /* 378 */
473 : { 0x0B2A, 0x0B30, PDF_TEXT_UCD_WBP_ALetter }, /* 379 */
474 : { 0x0B32, 0x0B33, PDF_TEXT_UCD_WBP_ALetter }, /* 380 */
475 : { 0x0B35, 0x0B39, PDF_TEXT_UCD_WBP_ALetter }, /* 381 */
476 : { 0x0B3D, 0x0B3D, PDF_TEXT_UCD_WBP_ALetter }, /* 382 */
477 : { 0x0B5C, 0x0B5D, PDF_TEXT_UCD_WBP_ALetter }, /* 383 */
478 : { 0x0B5F, 0x0B61, PDF_TEXT_UCD_WBP_ALetter }, /* 384 */
479 : { 0x0B71, 0x0B71, PDF_TEXT_UCD_WBP_ALetter }, /* 385 */
480 : { 0x0B83, 0x0B83, PDF_TEXT_UCD_WBP_ALetter }, /* 386 */
481 : { 0x0B85, 0x0B8A, PDF_TEXT_UCD_WBP_ALetter }, /* 387 */
482 : { 0x0B8E, 0x0B90, PDF_TEXT_UCD_WBP_ALetter }, /* 388 */
483 : { 0x0B92, 0x0B95, PDF_TEXT_UCD_WBP_ALetter }, /* 389 */
484 : { 0x0B99, 0x0B9A, PDF_TEXT_UCD_WBP_ALetter }, /* 390 */
485 : { 0x0B9C, 0x0B9C, PDF_TEXT_UCD_WBP_ALetter }, /* 391 */
486 : { 0x0B9E, 0x0B9F, PDF_TEXT_UCD_WBP_ALetter }, /* 392 */
487 : { 0x0BA3, 0x0BA4, PDF_TEXT_UCD_WBP_ALetter }, /* 393 */
488 : { 0x0BA8, 0x0BAA, PDF_TEXT_UCD_WBP_ALetter }, /* 394 */
489 : { 0x0BAE, 0x0BB9, PDF_TEXT_UCD_WBP_ALetter }, /* 395 */
490 : { 0x0BD0, 0x0BD0, PDF_TEXT_UCD_WBP_ALetter }, /* 396 */
491 : { 0x0C05, 0x0C0C, PDF_TEXT_UCD_WBP_ALetter }, /* 397 */
492 : { 0x0C0E, 0x0C10, PDF_TEXT_UCD_WBP_ALetter }, /* 398 */
493 : { 0x0C12, 0x0C28, PDF_TEXT_UCD_WBP_ALetter }, /* 399 */
494 : { 0x0C2A, 0x0C33, PDF_TEXT_UCD_WBP_ALetter }, /* 400 */
495 : { 0x0C35, 0x0C39, PDF_TEXT_UCD_WBP_ALetter }, /* 401 */
496 : { 0x0C3D, 0x0C3D, PDF_TEXT_UCD_WBP_ALetter }, /* 402 */
497 : { 0x0C58, 0x0C59, PDF_TEXT_UCD_WBP_ALetter }, /* 403 */
498 : { 0x0C60, 0x0C61, PDF_TEXT_UCD_WBP_ALetter }, /* 404 */
499 : { 0x0C85, 0x0C8C, PDF_TEXT_UCD_WBP_ALetter }, /* 405 */
500 : { 0x0C8E, 0x0C90, PDF_TEXT_UCD_WBP_ALetter }, /* 406 */
501 : { 0x0C92, 0x0CA8, PDF_TEXT_UCD_WBP_ALetter }, /* 407 */
502 : { 0x0CAA, 0x0CB3, PDF_TEXT_UCD_WBP_ALetter }, /* 408 */
503 : { 0x0CB5, 0x0CB9, PDF_TEXT_UCD_WBP_ALetter }, /* 409 */
504 : { 0x0CBD, 0x0CBD, PDF_TEXT_UCD_WBP_ALetter }, /* 410 */
505 : { 0x0CDE, 0x0CDE, PDF_TEXT_UCD_WBP_ALetter }, /* 411 */
506 : { 0x0CE0, 0x0CE1, PDF_TEXT_UCD_WBP_ALetter }, /* 412 */
507 : { 0x0D05, 0x0D0C, PDF_TEXT_UCD_WBP_ALetter }, /* 413 */
508 : { 0x0D0E, 0x0D10, PDF_TEXT_UCD_WBP_ALetter }, /* 414 */
509 : { 0x0D12, 0x0D28, PDF_TEXT_UCD_WBP_ALetter }, /* 415 */
510 : { 0x0D2A, 0x0D39, PDF_TEXT_UCD_WBP_ALetter }, /* 416 */
511 : { 0x0D3D, 0x0D3D, PDF_TEXT_UCD_WBP_ALetter }, /* 417 */
512 : { 0x0D60, 0x0D61, PDF_TEXT_UCD_WBP_ALetter }, /* 418 */
513 : { 0x0D7A, 0x0D7F, PDF_TEXT_UCD_WBP_ALetter }, /* 419 */
514 : { 0x0D85, 0x0D96, PDF_TEXT_UCD_WBP_ALetter }, /* 420 */
515 : { 0x0D9A, 0x0DB1, PDF_TEXT_UCD_WBP_ALetter }, /* 421 */
516 : { 0x0DB3, 0x0DBB, PDF_TEXT_UCD_WBP_ALetter }, /* 422 */
517 : { 0x0DBD, 0x0DBD, PDF_TEXT_UCD_WBP_ALetter }, /* 423 */
518 : { 0x0DC0, 0x0DC6, PDF_TEXT_UCD_WBP_ALetter }, /* 424 */
519 : { 0x0F00, 0x0F00, PDF_TEXT_UCD_WBP_ALetter }, /* 425 */
520 : { 0x0F40, 0x0F47, PDF_TEXT_UCD_WBP_ALetter }, /* 426 */
521 : { 0x0F49, 0x0F6C, PDF_TEXT_UCD_WBP_ALetter }, /* 427 */
522 : { 0x0F88, 0x0F8B, PDF_TEXT_UCD_WBP_ALetter }, /* 428 */
523 : { 0x10A0, 0x10C5, PDF_TEXT_UCD_WBP_ALetter }, /* 429 */
524 : { 0x10D0, 0x10FA, PDF_TEXT_UCD_WBP_ALetter }, /* 430 */
525 : { 0x10FC, 0x10FC, PDF_TEXT_UCD_WBP_ALetter }, /* 431 */
526 : { 0x1100, 0x1159, PDF_TEXT_UCD_WBP_ALetter }, /* 432 */
527 : { 0x115F, 0x11A2, PDF_TEXT_UCD_WBP_ALetter }, /* 433 */
528 : { 0x11A8, 0x11F9, PDF_TEXT_UCD_WBP_ALetter }, /* 434 */
529 : { 0x1200, 0x1248, PDF_TEXT_UCD_WBP_ALetter }, /* 435 */
530 : { 0x124A, 0x124D, PDF_TEXT_UCD_WBP_ALetter }, /* 436 */
531 : { 0x1250, 0x1256, PDF_TEXT_UCD_WBP_ALetter }, /* 437 */
532 : { 0x1258, 0x1258, PDF_TEXT_UCD_WBP_ALetter }, /* 438 */
533 : { 0x125A, 0x125D, PDF_TEXT_UCD_WBP_ALetter }, /* 439 */
534 : { 0x1260, 0x1288, PDF_TEXT_UCD_WBP_ALetter }, /* 440 */
535 : { 0x128A, 0x128D, PDF_TEXT_UCD_WBP_ALetter }, /* 441 */
536 : { 0x1290, 0x12B0, PDF_TEXT_UCD_WBP_ALetter }, /* 442 */
537 : { 0x12B2, 0x12B5, PDF_TEXT_UCD_WBP_ALetter }, /* 443 */
538 : { 0x12B8, 0x12BE, PDF_TEXT_UCD_WBP_ALetter }, /* 444 */
539 : { 0x12C0, 0x12C0, PDF_TEXT_UCD_WBP_ALetter }, /* 445 */
540 : { 0x12C2, 0x12C5, PDF_TEXT_UCD_WBP_ALetter }, /* 446 */
541 : { 0x12C8, 0x12D6, PDF_TEXT_UCD_WBP_ALetter }, /* 447 */
542 : { 0x12D8, 0x1310, PDF_TEXT_UCD_WBP_ALetter }, /* 448 */
543 : { 0x1312, 0x1315, PDF_TEXT_UCD_WBP_ALetter }, /* 449 */
544 : { 0x1318, 0x135A, PDF_TEXT_UCD_WBP_ALetter }, /* 450 */
545 : { 0x1380, 0x138F, PDF_TEXT_UCD_WBP_ALetter }, /* 451 */
546 : { 0x13A0, 0x13F4, PDF_TEXT_UCD_WBP_ALetter }, /* 452 */
547 : { 0x1401, 0x166C, PDF_TEXT_UCD_WBP_ALetter }, /* 453 */
548 : { 0x166F, 0x1676, PDF_TEXT_UCD_WBP_ALetter }, /* 454 */
549 : { 0x1681, 0x169A, PDF_TEXT_UCD_WBP_ALetter }, /* 455 */
550 : { 0x16A0, 0x16EA, PDF_TEXT_UCD_WBP_ALetter }, /* 456 */
551 : { 0x16EE, 0x16F0, PDF_TEXT_UCD_WBP_ALetter }, /* 457 */
552 : { 0x1700, 0x170C, PDF_TEXT_UCD_WBP_ALetter }, /* 458 */
553 : { 0x170E, 0x1711, PDF_TEXT_UCD_WBP_ALetter }, /* 459 */
554 : { 0x1720, 0x1731, PDF_TEXT_UCD_WBP_ALetter }, /* 460 */
555 : { 0x1740, 0x1751, PDF_TEXT_UCD_WBP_ALetter }, /* 461 */
556 : { 0x1760, 0x176C, PDF_TEXT_UCD_WBP_ALetter }, /* 462 */
557 : { 0x176E, 0x1770, PDF_TEXT_UCD_WBP_ALetter }, /* 463 */
558 : { 0x1820, 0x1842, PDF_TEXT_UCD_WBP_ALetter }, /* 464 */
559 : { 0x1843, 0x1843, PDF_TEXT_UCD_WBP_ALetter }, /* 465 */
560 : { 0x1844, 0x1877, PDF_TEXT_UCD_WBP_ALetter }, /* 466 */
561 : { 0x1880, 0x18A8, PDF_TEXT_UCD_WBP_ALetter }, /* 467 */
562 : { 0x18AA, 0x18AA, PDF_TEXT_UCD_WBP_ALetter }, /* 468 */
563 : { 0x1900, 0x191C, PDF_TEXT_UCD_WBP_ALetter }, /* 469 */
564 : { 0x1A00, 0x1A16, PDF_TEXT_UCD_WBP_ALetter }, /* 470 */
565 : { 0x1B05, 0x1B33, PDF_TEXT_UCD_WBP_ALetter }, /* 471 */
566 : { 0x1B45, 0x1B4B, PDF_TEXT_UCD_WBP_ALetter }, /* 472 */
567 : { 0x1B83, 0x1BA0, PDF_TEXT_UCD_WBP_ALetter }, /* 473 */
568 : { 0x1BAE, 0x1BAF, PDF_TEXT_UCD_WBP_ALetter }, /* 474 */
569 : { 0x1C00, 0x1C23, PDF_TEXT_UCD_WBP_ALetter }, /* 475 */
570 : { 0x1C4D, 0x1C4F, PDF_TEXT_UCD_WBP_ALetter }, /* 476 */
571 : { 0x1C5A, 0x1C77, PDF_TEXT_UCD_WBP_ALetter }, /* 477 */
572 : { 0x1C78, 0x1C7D, PDF_TEXT_UCD_WBP_ALetter }, /* 478 */
573 : { 0x1D00, 0x1D2B, PDF_TEXT_UCD_WBP_ALetter }, /* 479 */
574 : { 0x1D2C, 0x1D61, PDF_TEXT_UCD_WBP_ALetter }, /* 480 */
575 : { 0x1D62, 0x1D77, PDF_TEXT_UCD_WBP_ALetter }, /* 481 */
576 : { 0x1D78, 0x1D78, PDF_TEXT_UCD_WBP_ALetter }, /* 482 */
577 : { 0x1D79, 0x1D9A, PDF_TEXT_UCD_WBP_ALetter }, /* 483 */
578 : { 0x1D9B, 0x1DBF, PDF_TEXT_UCD_WBP_ALetter }, /* 484 */
579 : { 0x1E00, 0x1F15, PDF_TEXT_UCD_WBP_ALetter }, /* 485 */
580 : { 0x1F18, 0x1F1D, PDF_TEXT_UCD_WBP_ALetter }, /* 486 */
581 : { 0x1F20, 0x1F45, PDF_TEXT_UCD_WBP_ALetter }, /* 487 */
582 : { 0x1F48, 0x1F4D, PDF_TEXT_UCD_WBP_ALetter }, /* 488 */
583 : { 0x1F50, 0x1F57, PDF_TEXT_UCD_WBP_ALetter }, /* 489 */
584 : { 0x1F59, 0x1F59, PDF_TEXT_UCD_WBP_ALetter }, /* 490 */
585 : { 0x1F5B, 0x1F5B, PDF_TEXT_UCD_WBP_ALetter }, /* 491 */
586 : { 0x1F5D, 0x1F5D, PDF_TEXT_UCD_WBP_ALetter }, /* 492 */
587 : { 0x1F5F, 0x1F7D, PDF_TEXT_UCD_WBP_ALetter }, /* 493 */
588 : { 0x1F80, 0x1FB4, PDF_TEXT_UCD_WBP_ALetter }, /* 494 */
589 : { 0x1FB6, 0x1FBC, PDF_TEXT_UCD_WBP_ALetter }, /* 495 */
590 : { 0x1FBE, 0x1FBE, PDF_TEXT_UCD_WBP_ALetter }, /* 496 */
591 : { 0x1FC2, 0x1FC4, PDF_TEXT_UCD_WBP_ALetter }, /* 497 */
592 : { 0x1FC6, 0x1FCC, PDF_TEXT_UCD_WBP_ALetter }, /* 498 */
593 : { 0x1FD0, 0x1FD3, PDF_TEXT_UCD_WBP_ALetter }, /* 499 */
594 : { 0x1FD6, 0x1FDB, PDF_TEXT_UCD_WBP_ALetter }, /* 500 */
595 : { 0x1FE0, 0x1FEC, PDF_TEXT_UCD_WBP_ALetter }, /* 501 */
596 : { 0x1FF2, 0x1FF4, PDF_TEXT_UCD_WBP_ALetter }, /* 502 */
597 : { 0x1FF6, 0x1FFC, PDF_TEXT_UCD_WBP_ALetter }, /* 503 */
598 : { 0x2071, 0x2071, PDF_TEXT_UCD_WBP_ALetter }, /* 504 */
599 : { 0x207F, 0x207F, PDF_TEXT_UCD_WBP_ALetter }, /* 505 */
600 : { 0x2090, 0x2094, PDF_TEXT_UCD_WBP_ALetter }, /* 506 */
601 : { 0x2102, 0x2102, PDF_TEXT_UCD_WBP_ALetter }, /* 507 */
602 : { 0x2107, 0x2107, PDF_TEXT_UCD_WBP_ALetter }, /* 508 */
603 : { 0x210A, 0x2113, PDF_TEXT_UCD_WBP_ALetter }, /* 509 */
604 : { 0x2115, 0x2115, PDF_TEXT_UCD_WBP_ALetter }, /* 510 */
605 : { 0x2119, 0x211D, PDF_TEXT_UCD_WBP_ALetter }, /* 511 */
606 : { 0x2124, 0x2124, PDF_TEXT_UCD_WBP_ALetter }, /* 512 */
607 : { 0x2126, 0x2126, PDF_TEXT_UCD_WBP_ALetter }, /* 513 */
608 : { 0x2128, 0x2128, PDF_TEXT_UCD_WBP_ALetter }, /* 514 */
609 : { 0x212A, 0x212D, PDF_TEXT_UCD_WBP_ALetter }, /* 515 */
610 : { 0x212F, 0x2134, PDF_TEXT_UCD_WBP_ALetter }, /* 516 */
611 : { 0x2135, 0x2138, PDF_TEXT_UCD_WBP_ALetter }, /* 517 */
612 : { 0x2139, 0x2139, PDF_TEXT_UCD_WBP_ALetter }, /* 518 */
613 : { 0x213C, 0x213F, PDF_TEXT_UCD_WBP_ALetter }, /* 519 */
614 : { 0x2145, 0x2149, PDF_TEXT_UCD_WBP_ALetter }, /* 520 */
615 : { 0x214E, 0x214E, PDF_TEXT_UCD_WBP_ALetter }, /* 521 */
616 : { 0x2160, 0x2182, PDF_TEXT_UCD_WBP_ALetter }, /* 522 */
617 : { 0x2183, 0x2184, PDF_TEXT_UCD_WBP_ALetter }, /* 523 */
618 : { 0x2185, 0x2188, PDF_TEXT_UCD_WBP_ALetter }, /* 524 */
619 : { 0x24B6, 0x24E9, PDF_TEXT_UCD_WBP_ALetter }, /* 525 */
620 : { 0x2C00, 0x2C2E, PDF_TEXT_UCD_WBP_ALetter }, /* 526 */
621 : { 0x2C30, 0x2C5E, PDF_TEXT_UCD_WBP_ALetter }, /* 527 */
622 : { 0x2C60, 0x2C6F, PDF_TEXT_UCD_WBP_ALetter }, /* 528 */
623 : { 0x2C71, 0x2C7C, PDF_TEXT_UCD_WBP_ALetter }, /* 529 */
624 : { 0x2C7D, 0x2C7D, PDF_TEXT_UCD_WBP_ALetter }, /* 530 */
625 : { 0x2C80, 0x2CE4, PDF_TEXT_UCD_WBP_ALetter }, /* 531 */
626 : { 0x2D00, 0x2D25, PDF_TEXT_UCD_WBP_ALetter }, /* 532 */
627 : { 0x2D30, 0x2D65, PDF_TEXT_UCD_WBP_ALetter }, /* 533 */
628 : { 0x2D6F, 0x2D6F, PDF_TEXT_UCD_WBP_ALetter }, /* 534 */
629 : { 0x2D80, 0x2D96, PDF_TEXT_UCD_WBP_ALetter }, /* 535 */
630 : { 0x2DA0, 0x2DA6, PDF_TEXT_UCD_WBP_ALetter }, /* 536 */
631 : { 0x2DA8, 0x2DAE, PDF_TEXT_UCD_WBP_ALetter }, /* 537 */
632 : { 0x2DB0, 0x2DB6, PDF_TEXT_UCD_WBP_ALetter }, /* 538 */
633 : { 0x2DB8, 0x2DBE, PDF_TEXT_UCD_WBP_ALetter }, /* 539 */
634 : { 0x2DC0, 0x2DC6, PDF_TEXT_UCD_WBP_ALetter }, /* 540 */
635 : { 0x2DC8, 0x2DCE, PDF_TEXT_UCD_WBP_ALetter }, /* 541 */
636 : { 0x2DD0, 0x2DD6, PDF_TEXT_UCD_WBP_ALetter }, /* 542 */
637 : { 0x2DD8, 0x2DDE, PDF_TEXT_UCD_WBP_ALetter }, /* 543 */
638 : { 0x2E2F, 0x2E2F, PDF_TEXT_UCD_WBP_ALetter }, /* 544 */
639 : { 0x3005, 0x3005, PDF_TEXT_UCD_WBP_ALetter }, /* 545 */
640 : { 0x303B, 0x303B, PDF_TEXT_UCD_WBP_ALetter }, /* 546 */
641 : { 0x303C, 0x303C, PDF_TEXT_UCD_WBP_ALetter }, /* 547 */
642 : { 0x3105, 0x312D, PDF_TEXT_UCD_WBP_ALetter }, /* 548 */
643 : { 0x3131, 0x318E, PDF_TEXT_UCD_WBP_ALetter }, /* 549 */
644 : { 0x31A0, 0x31B7, PDF_TEXT_UCD_WBP_ALetter }, /* 550 */
645 : { 0xA000, 0xA014, PDF_TEXT_UCD_WBP_ALetter }, /* 551 */
646 : { 0xA015, 0xA015, PDF_TEXT_UCD_WBP_ALetter }, /* 552 */
647 : { 0xA016, 0xA48C, PDF_TEXT_UCD_WBP_ALetter }, /* 553 */
648 : { 0xA500, 0xA60B, PDF_TEXT_UCD_WBP_ALetter }, /* 554 */
649 : { 0xA60C, 0xA60C, PDF_TEXT_UCD_WBP_ALetter }, /* 555 */
650 : { 0xA610, 0xA61F, PDF_TEXT_UCD_WBP_ALetter }, /* 556 */
651 : { 0xA62A, 0xA62B, PDF_TEXT_UCD_WBP_ALetter }, /* 557 */
652 : { 0xA640, 0xA65F, PDF_TEXT_UCD_WBP_ALetter }, /* 558 */
653 : { 0xA662, 0xA66D, PDF_TEXT_UCD_WBP_ALetter }, /* 559 */
654 : { 0xA66E, 0xA66E, PDF_TEXT_UCD_WBP_ALetter }, /* 560 */
655 : { 0xA67F, 0xA67F, PDF_TEXT_UCD_WBP_ALetter }, /* 561 */
656 : { 0xA680, 0xA697, PDF_TEXT_UCD_WBP_ALetter }, /* 562 */
657 : { 0xA717, 0xA71F, PDF_TEXT_UCD_WBP_ALetter }, /* 563 */
658 : { 0xA722, 0xA76F, PDF_TEXT_UCD_WBP_ALetter }, /* 564 */
659 : { 0xA770, 0xA770, PDF_TEXT_UCD_WBP_ALetter }, /* 565 */
660 : { 0xA771, 0xA787, PDF_TEXT_UCD_WBP_ALetter }, /* 566 */
661 : { 0xA788, 0xA788, PDF_TEXT_UCD_WBP_ALetter }, /* 567 */
662 : { 0xA78B, 0xA78C, PDF_TEXT_UCD_WBP_ALetter }, /* 568 */
663 : { 0xA7FB, 0xA801, PDF_TEXT_UCD_WBP_ALetter }, /* 569 */
664 : { 0xA803, 0xA805, PDF_TEXT_UCD_WBP_ALetter }, /* 570 */
665 : { 0xA807, 0xA80A, PDF_TEXT_UCD_WBP_ALetter }, /* 571 */
666 : { 0xA80C, 0xA822, PDF_TEXT_UCD_WBP_ALetter }, /* 572 */
667 : { 0xA840, 0xA873, PDF_TEXT_UCD_WBP_ALetter }, /* 573 */
668 : { 0xA882, 0xA8B3, PDF_TEXT_UCD_WBP_ALetter }, /* 574 */
669 : { 0xA90A, 0xA925, PDF_TEXT_UCD_WBP_ALetter }, /* 575 */
670 : { 0xA930, 0xA946, PDF_TEXT_UCD_WBP_ALetter }, /* 576 */
671 : { 0xAA00, 0xAA28, PDF_TEXT_UCD_WBP_ALetter }, /* 577 */
672 : { 0xAA40, 0xAA42, PDF_TEXT_UCD_WBP_ALetter }, /* 578 */
673 : { 0xAA44, 0xAA4B, PDF_TEXT_UCD_WBP_ALetter }, /* 579 */
674 : { 0xAC00, 0xD7A3, PDF_TEXT_UCD_WBP_ALetter }, /* 580 */
675 : { 0xFB00, 0xFB06, PDF_TEXT_UCD_WBP_ALetter }, /* 581 */
676 : { 0xFB13, 0xFB17, PDF_TEXT_UCD_WBP_ALetter }, /* 582 */
677 : { 0xFB1D, 0xFB1D, PDF_TEXT_UCD_WBP_ALetter }, /* 583 */
678 : { 0xFB1F, 0xFB28, PDF_TEXT_UCD_WBP_ALetter }, /* 584 */
679 : { 0xFB2A, 0xFB36, PDF_TEXT_UCD_WBP_ALetter }, /* 585 */
680 : { 0xFB38, 0xFB3C, PDF_TEXT_UCD_WBP_ALetter }, /* 586 */
681 : { 0xFB3E, 0xFB3E, PDF_TEXT_UCD_WBP_ALetter }, /* 587 */
682 : { 0xFB40, 0xFB41, PDF_TEXT_UCD_WBP_ALetter }, /* 588 */
683 : { 0xFB43, 0xFB44, PDF_TEXT_UCD_WBP_ALetter }, /* 589 */
684 : { 0xFB46, 0xFBB1, PDF_TEXT_UCD_WBP_ALetter }, /* 590 */
685 : { 0xFBD3, 0xFD3D, PDF_TEXT_UCD_WBP_ALetter }, /* 591 */
686 : { 0xFD50, 0xFD8F, PDF_TEXT_UCD_WBP_ALetter }, /* 592 */
687 : { 0xFD92, 0xFDC7, PDF_TEXT_UCD_WBP_ALetter }, /* 593 */
688 : { 0xFDF0, 0xFDFB, PDF_TEXT_UCD_WBP_ALetter }, /* 594 */
689 : { 0xFE70, 0xFE74, PDF_TEXT_UCD_WBP_ALetter }, /* 595 */
690 : { 0xFE76, 0xFEFC, PDF_TEXT_UCD_WBP_ALetter }, /* 596 */
691 : { 0xFF21, 0xFF3A, PDF_TEXT_UCD_WBP_ALetter }, /* 597 */
692 : { 0xFF41, 0xFF5A, PDF_TEXT_UCD_WBP_ALetter }, /* 598 */
693 : { 0xFFA0, 0xFFBE, PDF_TEXT_UCD_WBP_ALetter }, /* 599 */
694 : { 0xFFC2, 0xFFC7, PDF_TEXT_UCD_WBP_ALetter }, /* 600 */
695 : { 0xFFCA, 0xFFCF, PDF_TEXT_UCD_WBP_ALetter }, /* 601 */
696 : { 0xFFD2, 0xFFD7, PDF_TEXT_UCD_WBP_ALetter }, /* 602 */
697 : { 0xFFDA, 0xFFDC, PDF_TEXT_UCD_WBP_ALetter }, /* 603 */
698 : { 0x10000, 0x1000B, PDF_TEXT_UCD_WBP_ALetter }, /* 604 */
699 : { 0x1000D, 0x10026, PDF_TEXT_UCD_WBP_ALetter }, /* 605 */
700 : { 0x10028, 0x1003A, PDF_TEXT_UCD_WBP_ALetter }, /* 606 */
701 : { 0x1003C, 0x1003D, PDF_TEXT_UCD_WBP_ALetter }, /* 607 */
702 : { 0x1003F, 0x1004D, PDF_TEXT_UCD_WBP_ALetter }, /* 608 */
703 : { 0x10050, 0x1005D, PDF_TEXT_UCD_WBP_ALetter }, /* 609 */
704 : { 0x10080, 0x100FA, PDF_TEXT_UCD_WBP_ALetter }, /* 610 */
705 : { 0x10140, 0x10174, PDF_TEXT_UCD_WBP_ALetter }, /* 611 */
706 : { 0x10280, 0x1029C, PDF_TEXT_UCD_WBP_ALetter }, /* 612 */
707 : { 0x102A0, 0x102D0, PDF_TEXT_UCD_WBP_ALetter }, /* 613 */
708 : { 0x10300, 0x1031E, PDF_TEXT_UCD_WBP_ALetter }, /* 614 */
709 : { 0x10330, 0x10340, PDF_TEXT_UCD_WBP_ALetter }, /* 615 */
710 : { 0x10341, 0x10341, PDF_TEXT_UCD_WBP_ALetter }, /* 616 */
711 : { 0x10342, 0x10349, PDF_TEXT_UCD_WBP_ALetter }, /* 617 */
712 : { 0x1034A, 0x1034A, PDF_TEXT_UCD_WBP_ALetter }, /* 618 */
713 : { 0x10380, 0x1039D, PDF_TEXT_UCD_WBP_ALetter }, /* 619 */
714 : { 0x103A0, 0x103C3, PDF_TEXT_UCD_WBP_ALetter }, /* 620 */
715 : { 0x103C8, 0x103CF, PDF_TEXT_UCD_WBP_ALetter }, /* 621 */
716 : { 0x103D1, 0x103D5, PDF_TEXT_UCD_WBP_ALetter }, /* 622 */
717 : { 0x10400, 0x1044F, PDF_TEXT_UCD_WBP_ALetter }, /* 623 */
718 : { 0x10450, 0x1049D, PDF_TEXT_UCD_WBP_ALetter }, /* 624 */
719 : { 0x10800, 0x10805, PDF_TEXT_UCD_WBP_ALetter }, /* 625 */
720 : { 0x10808, 0x10808, PDF_TEXT_UCD_WBP_ALetter }, /* 626 */
721 : { 0x1080A, 0x10835, PDF_TEXT_UCD_WBP_ALetter }, /* 627 */
722 : { 0x10837, 0x10838, PDF_TEXT_UCD_WBP_ALetter }, /* 628 */
723 : { 0x1083C, 0x1083C, PDF_TEXT_UCD_WBP_ALetter }, /* 629 */
724 : { 0x1083F, 0x1083F, PDF_TEXT_UCD_WBP_ALetter }, /* 630 */
725 : { 0x10900, 0x10915, PDF_TEXT_UCD_WBP_ALetter }, /* 631 */
726 : { 0x10920, 0x10939, PDF_TEXT_UCD_WBP_ALetter }, /* 632 */
727 : { 0x10A00, 0x10A00, PDF_TEXT_UCD_WBP_ALetter }, /* 633 */
728 : { 0x10A10, 0x10A13, PDF_TEXT_UCD_WBP_ALetter }, /* 634 */
729 : { 0x10A15, 0x10A17, PDF_TEXT_UCD_WBP_ALetter }, /* 635 */
730 : { 0x10A19, 0x10A33, PDF_TEXT_UCD_WBP_ALetter }, /* 636 */
731 : { 0x12000, 0x1236E, PDF_TEXT_UCD_WBP_ALetter }, /* 637 */
732 : { 0x12400, 0x12462, PDF_TEXT_UCD_WBP_ALetter }, /* 638 */
733 : { 0x1D400, 0x1D454, PDF_TEXT_UCD_WBP_ALetter }, /* 639 */
734 : { 0x1D456, 0x1D49C, PDF_TEXT_UCD_WBP_ALetter }, /* 640 */
735 : { 0x1D49E, 0x1D49F, PDF_TEXT_UCD_WBP_ALetter }, /* 641 */
736 : { 0x1D4A2, 0x1D4A2, PDF_TEXT_UCD_WBP_ALetter }, /* 642 */
737 : { 0x1D4A5, 0x1D4A6, PDF_TEXT_UCD_WBP_ALetter }, /* 643 */
738 : { 0x1D4A9, 0x1D4AC, PDF_TEXT_UCD_WBP_ALetter }, /* 644 */
739 : { 0x1D4AE, 0x1D4B9, PDF_TEXT_UCD_WBP_ALetter }, /* 645 */
740 : { 0x1D4BB, 0x1D4BB, PDF_TEXT_UCD_WBP_ALetter }, /* 646 */
741 : { 0x1D4BD, 0x1D4C3, PDF_TEXT_UCD_WBP_ALetter }, /* 647 */
742 : { 0x1D4C5, 0x1D505, PDF_TEXT_UCD_WBP_ALetter }, /* 648 */
743 : { 0x1D507, 0x1D50A, PDF_TEXT_UCD_WBP_ALetter }, /* 649 */
744 : { 0x1D50D, 0x1D514, PDF_TEXT_UCD_WBP_ALetter }, /* 650 */
745 : { 0x1D516, 0x1D51C, PDF_TEXT_UCD_WBP_ALetter }, /* 651 */
746 : { 0x1D51E, 0x1D539, PDF_TEXT_UCD_WBP_ALetter }, /* 652 */
747 : { 0x1D53B, 0x1D53E, PDF_TEXT_UCD_WBP_ALetter }, /* 653 */
748 : { 0x1D540, 0x1D544, PDF_TEXT_UCD_WBP_ALetter }, /* 654 */
749 : { 0x1D546, 0x1D546, PDF_TEXT_UCD_WBP_ALetter }, /* 655 */
750 : { 0x1D54A, 0x1D550, PDF_TEXT_UCD_WBP_ALetter }, /* 656 */
751 : { 0x1D552, 0x1D6A5, PDF_TEXT_UCD_WBP_ALetter }, /* 657 */
752 : { 0x1D6A8, 0x1D6C0, PDF_TEXT_UCD_WBP_ALetter }, /* 658 */
753 : { 0x1D6C2, 0x1D6DA, PDF_TEXT_UCD_WBP_ALetter }, /* 659 */
754 : { 0x1D6DC, 0x1D6FA, PDF_TEXT_UCD_WBP_ALetter }, /* 660 */
755 : { 0x1D6FC, 0x1D714, PDF_TEXT_UCD_WBP_ALetter }, /* 661 */
756 : { 0x1D716, 0x1D734, PDF_TEXT_UCD_WBP_ALetter }, /* 662 */
757 : { 0x1D736, 0x1D74E, PDF_TEXT_UCD_WBP_ALetter }, /* 663 */
758 : { 0x1D750, 0x1D76E, PDF_TEXT_UCD_WBP_ALetter }, /* 664 */
759 : { 0x1D770, 0x1D788, PDF_TEXT_UCD_WBP_ALetter }, /* 665 */
760 : { 0x1D78A, 0x1D7A8, PDF_TEXT_UCD_WBP_ALetter }, /* 666 */
761 : { 0x1D7AA, 0x1D7C2, PDF_TEXT_UCD_WBP_ALetter }, /* 667 */
762 : { 0x1D7C4, 0x1D7CB, PDF_TEXT_UCD_WBP_ALetter }, /* 668 */
763 : { 0x003A, 0x003A, PDF_TEXT_UCD_WBP_MidLetter }, /* 669 */
764 : { 0x00B7, 0x00B7, PDF_TEXT_UCD_WBP_MidLetter }, /* 670 */
765 : { 0x0387, 0x0387, PDF_TEXT_UCD_WBP_MidLetter }, /* 671 */
766 : { 0x05F4, 0x05F4, PDF_TEXT_UCD_WBP_MidLetter }, /* 672 */
767 : { 0x2027, 0x2027, PDF_TEXT_UCD_WBP_MidLetter }, /* 673 */
768 : { 0xFE13, 0xFE13, PDF_TEXT_UCD_WBP_MidLetter }, /* 674 */
769 : { 0xFE55, 0xFE55, PDF_TEXT_UCD_WBP_MidLetter }, /* 675 */
770 : { 0xFF1A, 0xFF1A, PDF_TEXT_UCD_WBP_MidLetter }, /* 676 */
771 : { 0x002C, 0x002C, PDF_TEXT_UCD_WBP_MidNum }, /* 677 */
772 : { 0x003B, 0x003B, PDF_TEXT_UCD_WBP_MidNum }, /* 678 */
773 : { 0x037E, 0x037E, PDF_TEXT_UCD_WBP_MidNum }, /* 679 */
774 : { 0x0589, 0x0589, PDF_TEXT_UCD_WBP_MidNum }, /* 680 */
775 : { 0x060C, 0x060D, PDF_TEXT_UCD_WBP_MidNum }, /* 681 */
776 : { 0x066C, 0x066C, PDF_TEXT_UCD_WBP_MidNum }, /* 682 */
777 : { 0x07F8, 0x07F8, PDF_TEXT_UCD_WBP_MidNum }, /* 683 */
778 : { 0x2044, 0x2044, PDF_TEXT_UCD_WBP_MidNum }, /* 684 */
779 : { 0xFE10, 0xFE10, PDF_TEXT_UCD_WBP_MidNum }, /* 685 */
780 : { 0xFE14, 0xFE14, PDF_TEXT_UCD_WBP_MidNum }, /* 686 */
781 : { 0xFE50, 0xFE50, PDF_TEXT_UCD_WBP_MidNum }, /* 687 */
782 : { 0xFE54, 0xFE54, PDF_TEXT_UCD_WBP_MidNum }, /* 688 */
783 : { 0xFF0C, 0xFF0C, PDF_TEXT_UCD_WBP_MidNum }, /* 689 */
784 : { 0xFF1B, 0xFF1B, PDF_TEXT_UCD_WBP_MidNum }, /* 690 */
785 : { 0x0027, 0x0027, PDF_TEXT_UCD_WBP_MidNumLet }, /* 691 */
786 : { 0x002E, 0x002E, PDF_TEXT_UCD_WBP_MidNumLet }, /* 692 */
787 : { 0x2018, 0x2018, PDF_TEXT_UCD_WBP_MidNumLet }, /* 693 */
788 : { 0x2019, 0x2019, PDF_TEXT_UCD_WBP_MidNumLet }, /* 694 */
789 : { 0x2024, 0x2024, PDF_TEXT_UCD_WBP_MidNumLet }, /* 695 */
790 : { 0xFE52, 0xFE52, PDF_TEXT_UCD_WBP_MidNumLet }, /* 696 */
791 : { 0xFF07, 0xFF07, PDF_TEXT_UCD_WBP_MidNumLet }, /* 697 */
792 : { 0xFF0E, 0xFF0E, PDF_TEXT_UCD_WBP_MidNumLet }, /* 698 */
793 : { 0x0030, 0x0039, PDF_TEXT_UCD_WBP_Numeric }, /* 699 */
794 : { 0x0660, 0x0669, PDF_TEXT_UCD_WBP_Numeric }, /* 700 */
795 : { 0x066B, 0x066B, PDF_TEXT_UCD_WBP_Numeric }, /* 701 */
796 : { 0x06F0, 0x06F9, PDF_TEXT_UCD_WBP_Numeric }, /* 702 */
797 : { 0x07C0, 0x07C9, PDF_TEXT_UCD_WBP_Numeric }, /* 703 */
798 : { 0x0966, 0x096F, PDF_TEXT_UCD_WBP_Numeric }, /* 704 */
799 : { 0x09E6, 0x09EF, PDF_TEXT_UCD_WBP_Numeric }, /* 705 */
800 : { 0x0A66, 0x0A6F, PDF_TEXT_UCD_WBP_Numeric }, /* 706 */
801 : { 0x0AE6, 0x0AEF, PDF_TEXT_UCD_WBP_Numeric }, /* 707 */
802 : { 0x0B66, 0x0B6F, PDF_TEXT_UCD_WBP_Numeric }, /* 708 */
803 : { 0x0BE6, 0x0BEF, PDF_TEXT_UCD_WBP_Numeric }, /* 709 */
804 : { 0x0C66, 0x0C6F, PDF_TEXT_UCD_WBP_Numeric }, /* 710 */
805 : { 0x0CE6, 0x0CEF, PDF_TEXT_UCD_WBP_Numeric }, /* 711 */
806 : { 0x0D66, 0x0D6F, PDF_TEXT_UCD_WBP_Numeric }, /* 712 */
807 : { 0x0E50, 0x0E59, PDF_TEXT_UCD_WBP_Numeric }, /* 713 */
808 : { 0x0ED0, 0x0ED9, PDF_TEXT_UCD_WBP_Numeric }, /* 714 */
809 : { 0x0F20, 0x0F29, PDF_TEXT_UCD_WBP_Numeric }, /* 715 */
810 : { 0x1040, 0x1049, PDF_TEXT_UCD_WBP_Numeric }, /* 716 */
811 : { 0x1090, 0x1099, PDF_TEXT_UCD_WBP_Numeric }, /* 717 */
812 : { 0x17E0, 0x17E9, PDF_TEXT_UCD_WBP_Numeric }, /* 718 */
813 : { 0x1810, 0x1819, PDF_TEXT_UCD_WBP_Numeric }, /* 719 */
814 : { 0x1946, 0x194F, PDF_TEXT_UCD_WBP_Numeric }, /* 720 */
815 : { 0x19D0, 0x19D9, PDF_TEXT_UCD_WBP_Numeric }, /* 721 */
816 : { 0x1B50, 0x1B59, PDF_TEXT_UCD_WBP_Numeric }, /* 722 */
817 : { 0x1BB0, 0x1BB9, PDF_TEXT_UCD_WBP_Numeric }, /* 723 */
818 : { 0x1C40, 0x1C49, PDF_TEXT_UCD_WBP_Numeric }, /* 724 */
819 : { 0x1C50, 0x1C59, PDF_TEXT_UCD_WBP_Numeric }, /* 725 */
820 : { 0xA620, 0xA629, PDF_TEXT_UCD_WBP_Numeric }, /* 726 */
821 : { 0xA8D0, 0xA8D9, PDF_TEXT_UCD_WBP_Numeric }, /* 727 */
822 : { 0xA900, 0xA909, PDF_TEXT_UCD_WBP_Numeric }, /* 728 */
823 : { 0xAA50, 0xAA59, PDF_TEXT_UCD_WBP_Numeric }, /* 729 */
824 : { 0x104A0, 0x104A9, PDF_TEXT_UCD_WBP_Numeric }, /* 730 */
825 : { 0x1D7CE, 0x1D7FF, PDF_TEXT_UCD_WBP_Numeric }, /* 731 */
826 : { 0x005F, 0x005F, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 732 */
827 : { 0x203F, 0x2040, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 733 */
828 : { 0x2054, 0x2054, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 734 */
829 : { 0xFE33, 0xFE34, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 735 */
830 : { 0xFE4D, 0xFE4F, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 736 */
831 : { 0xFF3F, 0xFF3F, PDF_TEXT_UCD_WBP_ExtendNumLet }, /* 737 */
832 : };
833 :
834 : /***************** END OF SELF-GENERATED DATA *********************************/
835 :
836 :
837 : static pdf_bool_t
838 : pdf_text_ucd_wb_in_interval(pdf_u32_t character,
839 : pdf_u32_t first_interval,
840 : pdf_u32_t last_interval)
841 : {
842 : int i;
843 :
844 137646 : for(i=first_interval; i<=last_interval; ++i)
845 : {
846 135542 : if((character >= unicode_wordbreak_info[i].interval_start) && \
847 : (character <= unicode_wordbreak_info[i].interval_stop))
848 : {
849 465 : return PDF_TRUE;
850 : }
851 : }
852 2104 : return PDF_FALSE;
853 : }
854 :
855 :
856 : /* Returns true if the given UTF-32HE unicode point has the CR value
857 : * in the WordBreak property */
858 : pdf_bool_t
859 : pdf_text_ucd_wb_is_cr(pdf_u32_t character)
860 177 : {
861 177 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_CR_F, \
862 : UCD_WB_CR_L);
863 : }
864 :
865 :
866 : /* Returns true if the given UTF-32HE unicode point has the LF value
867 : * in the WordBreak property */
868 : pdf_bool_t
869 : pdf_text_ucd_wb_is_lf(pdf_u32_t character)
870 177 : {
871 177 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_LF_F, \
872 : UCD_WB_LF_L);
873 : }
874 :
875 :
876 : /* Returns true if the given UTF-32HE unicode point has the Newline value
877 : * in the WordBreak property */
878 : pdf_bool_t
879 : pdf_text_ucd_wb_is_newline(pdf_u32_t character)
880 177 : {
881 177 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_NEWLINE_F, \
882 : UCD_WB_NEWLINE_L);
883 : }
884 :
885 :
886 : /* Returns true if the given UTF-32HE unicode point has the Extend value
887 : * in the WordBreak property */
888 : pdf_bool_t
889 : pdf_text_ucd_wb_is_extend(pdf_u32_t character)
890 177 : {
891 177 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_EXTEND_F, \
892 : UCD_WB_EXTEND_L);
893 : }
894 :
895 :
896 : /* Returns true if the given UTF-32HE unicode point has the Format value
897 : * in the WordBreak property */
898 : pdf_bool_t
899 : pdf_text_ucd_wb_is_format(pdf_u32_t character)
900 177 : {
901 177 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_FORMAT_F, \
902 : UCD_WB_FORMAT_L);
903 : }
904 :
905 :
906 : /* Returns true if the given UTF-32HE unicode point has the Katakana value
907 : * in the WordBreak property */
908 : pdf_bool_t
909 : pdf_text_ucd_wb_is_katakana(pdf_u32_t character)
910 124 : {
911 124 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_KATAKANA_F, \
912 : UCD_WB_KATAKANA_L);
913 : }
914 :
915 :
916 : /* Returns true if the given UTF-32HE unicode point has the ALetter value
917 : * in the WordBreak property */
918 : pdf_bool_t
919 : pdf_text_ucd_wb_is_aletter(pdf_u32_t character)
920 585 : {
921 585 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_ALETTER_F, \
922 : UCD_WB_ALETTER_L);
923 : }
924 :
925 :
926 :
927 : /* Returns true if the given UTF-32HE unicode point has the MidLetter value
928 : * in the WordBreak property */
929 : pdf_bool_t
930 : pdf_text_ucd_wb_is_midletter(pdf_u32_t character)
931 228 : {
932 228 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_MIDLETTER_F, \
933 : UCD_WB_MIDLETTER_L);
934 : }
935 :
936 :
937 : /* Returns true if the given UTF-32HE unicode point has the MidNum value
938 : * in the WordBreak property */
939 : pdf_bool_t
940 : pdf_text_ucd_wb_is_midnum(pdf_u32_t character)
941 205 : {
942 205 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_MIDNUM_F, \
943 : UCD_WB_MIDNUM_L);
944 : }
945 :
946 :
947 : /* Returns true if the given UTF-32HE unicode point has the MidNumLet value
948 : * in the WordBreak property */
949 : pdf_bool_t
950 : pdf_text_ucd_wb_is_midnumlet(pdf_u32_t character)
951 213 : {
952 213 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_MIDNUMLET_F, \
953 : UCD_WB_MIDNUMLET_L);
954 : }
955 :
956 :
957 : /* Returns true if the given UTF-32HE unicode point has the Numeric value
958 : * in the WordBreak property */
959 : pdf_bool_t
960 : pdf_text_ucd_wb_is_numeric(pdf_u32_t character)
961 205 : {
962 205 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_NUMERIC_F, \
963 : UCD_WB_NUMERIC_L);
964 : }
965 :
966 :
967 : /* Returns true if the given UTF-32HE unicode point has the ExtendNumLet value
968 : * in the WordBreak property */
969 : pdf_bool_t
970 : pdf_text_ucd_wb_is_extendnumlet(pdf_u32_t character)
971 124 : {
972 124 : return pdf_text_ucd_wb_in_interval(character, UCD_WB_EXTENDNUMLET_F, \
973 : UCD_WB_EXTENDNUMLET_L);
974 : }
975 :
976 :
977 : enum pdf_text_ucd_wb_property_e
978 : pdf_text_ucd_wb_get_property(pdf_u32_t character)
979 585 : {
980 585 : if(pdf_text_ucd_wb_is_aletter(character))
981 : {
982 369 : return PDF_TEXT_UCD_WBP_ALetter;
983 : }
984 216 : else if(pdf_text_ucd_wb_is_midletter(character))
985 : {
986 11 : return PDF_TEXT_UCD_WBP_MidLetter;
987 : }
988 205 : else if(pdf_text_ucd_wb_is_numeric(character))
989 : {
990 0 : return PDF_TEXT_UCD_WBP_Numeric;
991 : }
992 205 : else if(pdf_text_ucd_wb_is_midnum(character))
993 : {
994 0 : return PDF_TEXT_UCD_WBP_MidNum;
995 : }
996 205 : else if(pdf_text_ucd_wb_is_midnumlet(character))
997 : {
998 28 : return PDF_TEXT_UCD_WBP_MidNumLet;
999 : }
1000 177 : else if(pdf_text_ucd_wb_is_format(character))
1001 : {
1002 0 : return PDF_TEXT_UCD_WBP_Format;
1003 : }
1004 177 : else if(pdf_text_ucd_wb_is_cr(character))
1005 : {
1006 0 : return PDF_TEXT_UCD_WBP_CR;
1007 : }
1008 177 : else if(pdf_text_ucd_wb_is_lf(character))
1009 : {
1010 0 : return PDF_TEXT_UCD_WBP_LF;
1011 : }
1012 177 : else if(pdf_text_ucd_wb_is_newline(character))
1013 : {
1014 0 : return PDF_TEXT_UCD_WBP_Newline;
1015 : }
1016 177 : else if(pdf_text_ucd_wb_is_extend(character))
1017 : {
1018 53 : return PDF_TEXT_UCD_WBP_Extend;
1019 : }
1020 124 : else if(pdf_text_ucd_wb_is_katakana(character))
1021 : {
1022 0 : return PDF_TEXT_UCD_WBP_Katakana;
1023 : }
1024 124 : else if(pdf_text_ucd_wb_is_extendnumlet(character))
1025 : {
1026 0 : return PDF_TEXT_UCD_WBP_ExtendNumLet;
1027 : }
1028 : else
1029 : {
1030 124 : return PDF_TEXT_UCD_WBP_None;
1031 : }
1032 : }
1033 :
1034 :
1035 : /* Maximum number of code points needed for a word break check */
1036 : #define PDF_TEXT_UCD_MWBCP 4
1037 :
1038 : /* Word break property information */
1039 : typedef struct pdf_text_ucd_wb_s {
1040 : pdf_char_t *walker;
1041 : pdf_u32_t utf32val;
1042 : enum pdf_text_ucd_wb_property_e wbp;
1043 : } pdf_text_ucd_wb_t;
1044 :
1045 :
1046 : /* RULE WB3: Do not break within CRLF (CR x LF) */
1047 : static pdf_bool_t
1048 : pdf_text_ucd_wb_rule_3(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1049 : {
1050 287 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_CR) && \
1051 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_LF)) ? PDF_TRUE : PDF_FALSE);
1052 : }
1053 :
1054 : /* RULE WB3a: Break before and after Newlines (including CR and LF) */
1055 : static pdf_bool_t
1056 : pdf_text_ucd_wb_rule_3a(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1057 : {
1058 287 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_CR) || \
1059 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_LF) || \
1060 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_Newline)) ? PDF_TRUE : PDF_FALSE);
1061 : }
1062 :
1063 : /* RULE WB3b: Break before and after Newlines (including CR and LF) */
1064 : static pdf_bool_t
1065 : pdf_text_ucd_wb_rule_3b(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1066 : {
1067 287 : return (((buffer[2].wbp == PDF_TEXT_UCD_WBP_CR) || \
1068 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_LF) || \
1069 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Newline)) ? PDF_TRUE : PDF_FALSE);
1070 : }
1071 :
1072 : /* RULE WB4: Ignore Format and Extend characters, except when they appear at
1073 : * the beginning of a region of text. */
1074 : static pdf_bool_t
1075 : pdf_text_ucd_wb_rule_4(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1076 : {
1077 287 : return (((buffer[2].wbp == PDF_TEXT_UCD_WBP_Extend) || \
1078 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Format)) ? PDF_TRUE : PDF_FALSE);
1079 : }
1080 :
1081 : /* RULE WB5: Do not break between most letters (ALetter X ALetter) */
1082 : static pdf_bool_t
1083 : pdf_text_ucd_wb_rule_5(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1084 : {
1085 251 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_ALetter) && \
1086 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_ALetter)) ? PDF_TRUE : PDF_FALSE);
1087 : }
1088 :
1089 : /* RULE WB6: Do not break letters across certain puntuation
1090 : * (ALetter X MidLetter ALetter) */
1091 : static pdf_bool_t
1092 : pdf_text_ucd_wb_rule_6(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1093 : {
1094 142 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_ALetter) && \
1095 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_MidLetter) && \
1096 : (buffer[3].wbp == PDF_TEXT_UCD_WBP_ALetter)) ? PDF_TRUE : PDF_FALSE);
1097 : }
1098 :
1099 :
1100 : /* RULE WB7: Do not break letters across certain punctuation
1101 : * (ALetter MidLetter X ALetter) */
1102 : static pdf_bool_t
1103 : pdf_text_ucd_wb_rule_7(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1104 : {
1105 138 : return (((buffer[0].wbp == PDF_TEXT_UCD_WBP_ALetter) && \
1106 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_MidLetter) && \
1107 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_ALetter)) ? PDF_TRUE : PDF_FALSE);
1108 : }
1109 :
1110 :
1111 : /* RULE WB8: Do not break within sequences of digits, or digits adjacent
1112 : * to letters (Numeric X Numeric) */
1113 : static pdf_bool_t
1114 : pdf_text_ucd_wb_rule_8(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1115 : {
1116 134 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_Numeric) && \
1117 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Numeric)) ? PDF_TRUE : PDF_FALSE);
1118 : }
1119 :
1120 : /* RULE WB9: Do not break within sequences of digits, or digits adjacent
1121 : * to letters (ALetter X Numeric) */
1122 : static pdf_bool_t
1123 : pdf_text_ucd_wb_rule_9(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1124 : {
1125 134 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_ALetter) && \
1126 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Numeric)) ? PDF_TRUE : PDF_FALSE);
1127 : }
1128 :
1129 :
1130 : /* RULE WB10: Do not break within sequences of digits, or digits adjacent
1131 : * to letters (Numeric X ALetter) */
1132 : static pdf_bool_t
1133 : pdf_text_ucd_wb_rule_10(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1134 : {
1135 134 : return (((buffer[0].wbp == PDF_TEXT_UCD_WBP_Numeric) && \
1136 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_ALetter)) ? PDF_TRUE : PDF_FALSE);
1137 : }
1138 :
1139 :
1140 : /* RULE WB11: Do not break within sequences such as "3.2"
1141 : * (Numeric MidNum X Numeric) */
1142 : static pdf_bool_t
1143 : pdf_text_ucd_wb_rule_11(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1144 : {
1145 134 : return (((buffer[0].wbp == PDF_TEXT_UCD_WBP_Numeric) && \
1146 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_MidNum) && \
1147 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Numeric)) ? PDF_TRUE : PDF_FALSE);
1148 : }
1149 :
1150 : /* RULE WB12: Do not break within sequences such as "3.2"
1151 : * (Numeric X MidNum Numeric) */
1152 : static pdf_bool_t
1153 : pdf_text_ucd_wb_rule_12(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1154 : {
1155 134 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_Numeric) && \
1156 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_MidNum) && \
1157 : (buffer[3].wbp == PDF_TEXT_UCD_WBP_Numeric)) ? PDF_TRUE : PDF_FALSE);
1158 : }
1159 :
1160 : /* RULE WB13: Do not break between Katakana */
1161 : static pdf_bool_t
1162 : pdf_text_ucd_wb_rule_13(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1163 : {
1164 134 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_Katakana) && \
1165 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Katakana)) ? PDF_TRUE :PDF_FALSE);
1166 : }
1167 :
1168 : /* RULE WB13a: Do not break from extenders
1169 : * ((ALetter | Numeric | Katakana | ExtendNumLet) X ExtendNumLet) */
1170 : static pdf_bool_t
1171 : pdf_text_ucd_wb_rule_13a(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1172 : {
1173 134 : return ((((buffer[1].wbp == PDF_TEXT_UCD_WBP_ALetter) || \
1174 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_Numeric) || \
1175 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_Katakana) || \
1176 : (buffer[1].wbp == PDF_TEXT_UCD_WBP_ExtendNumLet)) && \
1177 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_ExtendNumLet)) ? \
1178 : PDF_TRUE : PDF_FALSE);
1179 : }
1180 :
1181 : /* RULE WB13b: Do not break from extenders
1182 : * (ExtendNumLet) X (ALetter | Numeric | Katakana ) */
1183 : static pdf_bool_t
1184 : pdf_text_ucd_wb_rule_13b(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1185 : {
1186 134 : return (((buffer[1].wbp == PDF_TEXT_UCD_WBP_ExtendNumLet) && \
1187 : ((buffer[2].wbp == PDF_TEXT_UCD_WBP_ALetter) || \
1188 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Numeric) || \
1189 : (buffer[2].wbp == PDF_TEXT_UCD_WBP_Katakana))) ? \
1190 : PDF_TRUE : PDF_FALSE);
1191 : }
1192 :
1193 : /* Check rules and stop if any of them is true (meaning that shouldn't be a
1194 : * word break) */
1195 : static pdf_bool_t
1196 : pdf_text_ucd_wb_check_rules(const pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP])
1197 287 : {
1198 2751 : return (((pdf_text_ucd_wb_rule_3(buffer)) || \
1199 : (pdf_text_ucd_wb_rule_3a(buffer)) || \
1200 : (pdf_text_ucd_wb_rule_3b(buffer)) || \
1201 : (pdf_text_ucd_wb_rule_4(buffer)) || \
1202 : (pdf_text_ucd_wb_rule_5(buffer)) || \
1203 : (pdf_text_ucd_wb_rule_6(buffer)) || \
1204 : (pdf_text_ucd_wb_rule_7(buffer)) || \
1205 : (pdf_text_ucd_wb_rule_8(buffer)) || \
1206 : (pdf_text_ucd_wb_rule_9(buffer)) || \
1207 : (pdf_text_ucd_wb_rule_10(buffer)) || \
1208 : (pdf_text_ucd_wb_rule_11(buffer)) || \
1209 : (pdf_text_ucd_wb_rule_12(buffer)) || \
1210 : (pdf_text_ucd_wb_rule_13(buffer)) || \
1211 : (pdf_text_ucd_wb_rule_13a(buffer)) || \
1212 : (pdf_text_ucd_wb_rule_13b(buffer))) ? PDF_TRUE : PDF_FALSE);
1213 : }
1214 :
1215 :
1216 : /* Word boundary search algorithm, based on Unicode Standard Annex #29
1217 : * "Text Boundaries".
1218 : * - `current' points to the next byte after the word break (so it points to the
1219 : * FIRST byte of the word).
1220 : * - `next' will point to the previous byte before the next word break (so it
1221 : * points to the LAST byte of the word).
1222 : */
1223 : pdf_status_t
1224 : pdf_text_ucd_wb_detect_next(const pdf_char_t *current,
1225 : const pdf_size_t n_bytes_left_in,
1226 : pdf_char_t **next,
1227 : pdf_size_t *n_bytes_left_out)
1228 192 : {
1229 : /* Buffer to store the unicode points as they are being parsed in the
1230 : * algorithm. Indexes are treated as follows:
1231 : * [0] [1] x [2] [3]
1232 : * This means that possible word breaks are ALWAYS considered between code
1233 : * points [1] and [2], being [0] the previous character to [1] and being
1234 : * [3] the next character to [2].
1235 : * When the buffer is updated, the code points are moved one position to the
1236 : * the left, so that code point in [0] disappears and a new code point
1237 : * enters in [3], and the word break is again checked between [1] and [2].
1238 : */
1239 : pdf_text_ucd_wb_t buffer [PDF_TEXT_UCD_MWBCP];
1240 :
1241 : pdf_u32_t i;
1242 : pdf_size_t n_bytes;
1243 : pdf_bool_t found;
1244 :
1245 : /* Check validity of input number of bytes */
1246 192 : if(n_bytes_left_in % 4 != 0)
1247 : {
1248 0 : return PDF_EBADDATA;
1249 : }
1250 :
1251 : /* Check if the string is just one character long */
1252 192 : if(n_bytes_left_in == 4)
1253 : {
1254 14 : *n_bytes_left_out = 0;
1255 14 : *next = (pdf_char_t *)current;
1256 14 : return PDF_OK;
1257 : }
1258 :
1259 : /* Initialize buffer with first 3 unicode points, stored in [1],[2],[3] */
1260 890 : for(i=0; i<PDF_TEXT_UCD_MWBCP; ++i)
1261 : {
1262 1223 : if((i>0) && \
1263 : (n_bytes_left_in >= (4*i)))
1264 : {
1265 : /* Store pointer */
1266 511 : buffer[i].walker = (pdf_char_t *)(¤t[4*(i-1)]);
1267 : /* Store unsigned 32-bit number */
1268 511 : memcpy(&(buffer[i].utf32val), buffer[i].walker, 4);
1269 : /* Get Word-Break property value from character */
1270 511 : buffer[i].wbp = pdf_text_ucd_wb_get_property(buffer[i].utf32val);
1271 : }
1272 : else
1273 : {
1274 201 : buffer[i].walker = NULL;
1275 201 : buffer[i].utf32val = 0x0;
1276 201 : buffer[i].wbp = PDF_TEXT_UCD_WBP_None;
1277 : }
1278 : }
1279 :
1280 178 : n_bytes = n_bytes_left_in;
1281 178 : found = 0;
1282 :
1283 : /* Start walking the unicode points. At each loop at least 2 unicode points
1284 : * (8 bytes) must be available to check the word break!!!! */
1285 643 : while((!found) && \
1286 : (n_bytes >= 8))
1287 : {
1288 : /* If any of the rules returns true, don't break word */
1289 287 : if(pdf_text_ucd_wb_check_rules(buffer))
1290 : {
1291 : /* If word break is not found, continue with next UTF-32 point */
1292 : /* Update number of bytes pending */
1293 153 : n_bytes -= 4;
1294 :
1295 : /* Shift left contents of the buffer */
1296 612 : for(i=1; i<PDF_TEXT_UCD_MWBCP; ++i)
1297 : {
1298 459 : buffer[i-1] = buffer[i];
1299 : }
1300 :
1301 : /* Insert new buffer element in position [3], if available */
1302 153 : if(n_bytes >= 12)
1303 : {
1304 74 : buffer[3].walker = buffer[2].walker + 4;
1305 : /* Store unsigned 32-bit number */
1306 74 : memcpy(&(buffer[3].utf32val), buffer[3].walker, 4);
1307 : /* Get Word-Break property value from character */
1308 74 : buffer[3].wbp =pdf_text_ucd_wb_get_property(buffer[3].utf32val);
1309 : }
1310 : }
1311 : else
1312 : {
1313 : /* RULE WB14: Otherwise, break everywhere (including around
1314 : * ideographs) */
1315 134 : found = 1;
1316 : }
1317 : }
1318 :
1319 : /* The exit of the loop could be due to two different reasons:
1320 : * 1. A word break was found in the loop. If it is found, the contents of
1321 : * the buffer remain unchanged, so the word break is between [1] and [2],
1322 : * and n_bytes considers the bytes of [1]
1323 : * 2. RULE WB2: Break at end of text ( % EOT). In this case, n_bytes will be
1324 : * equal to 4, and the contents of the buffer would have been shifted left
1325 : * so that the last character is pointed by [1].
1326 : * So, perfect, both cases can be handled in the same way.
1327 : */
1328 :
1329 178 : *next = buffer[1].walker;
1330 178 : *n_bytes_left_out = n_bytes - 4;
1331 :
1332 178 : return PDF_OK;
1333 : }
1334 :
1335 : /* End of pdf-text-ucd-wordbreak.c */
|