1 : /* -*- mode: C -*-
2 : *
3 : * File: pdf-text-context.c
4 : * Date: Fri Feb 25 23:58:56 2008
5 : *
6 : * GNU PDF Library - Encoded Text Context
7 : *
8 : */
9 :
10 : /* Copyright (C) 2008 Free Software Foundation, Inc. */
11 :
12 : /* This program is free software: you can redistribute it and/or modify
13 : * it under the terms of the GNU General Public License as published by
14 : * the Free Software Foundation, either version 3 of the License, or
15 : * (at your option) any later version.
16 : *
17 : * This program is distributed in the hope that it will be useful,
18 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 : * GNU General Public License for more details.
21 : *
22 : * You should have received a copy of the GNU General Public License
23 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 : */
25 :
26 : #include <config.h>
27 :
28 : #include <string.h>
29 : #include <stdio.h>
30 : #include <locale.h>
31 : #include <localename.h> /* From gnulib sources. */
32 : #include <localcharset.h> /* From gnulib sources. */
33 : #include <streq.h> /* From gnulib sources. */
34 :
35 : #include <pdf-types.h>
36 : #include <pdf-text.h>
37 : #include <pdf-text-context.h>
38 :
39 :
40 : /* Host Language code Length */
41 : #define PDF_TEXT_HLL 3
42 :
43 :
44 : /* Structure containing the specific context configuration for the Text
45 : * Encoding Module */
46 : typedef struct pdf_text_context_s {
47 : /* Host encoding configured in the system */
48 : pdf_text_host_encoding_t host_encoding;
49 : /* 2-character LANG ID */
50 : pdf_char_t host_language_id[PDF_TEXT_HLL];
51 : /* 2-character Country ID */
52 : pdf_char_t host_country_id[PDF_TEXT_HLL];
53 : /* System endianness */
54 : enum pdf_endianness_e host_endianness;
55 : /* Default EOL character in the system */
56 : pdf_text_eol_t host_eol;
57 : } pdf_text_context_t;
58 :
59 :
60 : /* This context will be initialized only once at program startup, and it will
61 : * be treated as constant from then on, so there shouldn't be any problem
62 : * with multiple threading and reentrancy */
63 : static pdf_text_context_t text_context;
64 : static pdf_bool_t text_context_initialized = PDF_FALSE;
65 :
66 : /* Definition of the different platform-dependent EOL types, in UTF-8. This
67 : * array is based on the `enum pdf_text_eol_types' enumeration. */
68 : static const struct pdf_text_eol_s pdf_text_eol_types [PDF_TEXT_EOLMAX] = {
69 : { { PDF_TEXT_DEF_CR, PDF_TEXT_DEF_LF, 0x00 } }, /* PDF_TEXT_EOL_WINDOWS */
70 : { { PDF_TEXT_DEF_LF, 0x00, 0x00 } }, /* PDF_TEXT_EOL_UNIX */
71 : { { 0xC2, PDF_TEXT_DEF_NEL, 0x00 } }, /* PDF_TEXT_EOL_EBCDIC */
72 : { { PDF_TEXT_DEF_CR, 0x00, 0x00 } } /* PDF_TEXT_EOL_MACOS */
73 : };
74 :
75 :
76 : /* Function to detect the endianness of the system. Can be either Big Endian
77 : * (like PPC systems) or Little Endian (like Intel systems). PDF_IS_BIG_ENDIAN
78 : * is filled in config.h at configure level. */
79 : static pdf_status_t
80 : pdf_text_detect_host_endianness(void)
81 : {
82 : #if PDF_IS_BIG_ENDIAN
83 : PDF_DEBUG_BASE("TextContext: Host Endianness is 'Big Endian'");
84 : text_context.host_endianness = PDF_TEXT_BIG_ENDIAN;
85 : #else
86 : PDF_DEBUG_BASE("TextContext: Host Endianness is 'Little Endian'");
87 735 : text_context.host_endianness = PDF_TEXT_LITTLE_ENDIAN;
88 : #endif
89 735 : return PDF_OK;
90 : }
91 :
92 :
93 : static pdf_status_t
94 : pdf_text_detect_host_encoding(void)
95 : {
96 735 : const char *charset = NULL;
97 : pdf_size_t length;
98 :
99 : /* Initialize contents of structure */
100 735 : memset(&(text_context.host_encoding.name[0]), 0, PDF_TEXT_HENMAXL);
101 :
102 : /* Get host encoding and check it */
103 735 : charset = locale_charset();
104 735 : if((charset == NULL) || \
105 : ((length = strlen(charset))<3))
106 : {
107 : PDF_DEBUG_BASE("Invalid host encoding info detected! '%s'", \
108 : ((charset!=NULL) ? charset : "null"));
109 0 : return PDF_ETEXTENC;
110 : }
111 :
112 : /* Limit length to maximum length, just in case */
113 735 : length = (length > (PDF_TEXT_HENMAXL-1)) ? (PDF_TEXT_HENMAXL-1) : length;
114 735 : strncpy((char *)&(text_context.host_encoding.name[0]), charset, length);
115 :
116 : PDF_DEBUG_BASE("TextContext: Host Encoding is '%s'", \
117 : text_context.host_encoding.name);
118 :
119 735 : return PDF_OK;
120 : }
121 :
122 :
123 : static pdf_status_t
124 : pdf_text_detect_host_language_and_country(void)
125 735 : {
126 735 : const char *locale_name = NULL;
127 :
128 : /* Initialize context strings */
129 735 : memset(&text_context.host_language_id[0], 0, PDF_TEXT_HLL);
130 735 : memset(&text_context.host_country_id[0], 0, PDF_TEXT_HLL);
131 :
132 : /* Get system default locale name and check it */
133 735 : locale_name = gl_locale_name(LC_CTYPE, "LC_CTYPE");
134 735 : if (locale_name == NULL)
135 : {
136 : PDF_DEBUG_BASE("Invalid locale info detected! (null)",
137 : ((locale_name!=NULL) ? locale_name : "null"));
138 0 : return PDF_ETEXTENC;
139 : }
140 :
141 735 : if (!STREQ (locale_name, "C", 'C', 0, 0, 0, 0, 0, 0, 0, 0)
142 : && !STREQ (locale_name, "POSIX", 'P', 'O', 'S', 'I', 'X', 0, 0, 0, 0))
143 : {
144 : /* Store language ID */
145 735 : strncpy((char *)&(text_context.host_language_id[0]), locale_name,
146 : PDF_TEXT_HLL-1);
147 :
148 : /* If available, store country ID */
149 735 : if((strlen(locale_name) >= 5) && \
150 : (locale_name[2] == '_'))
151 : {
152 735 : strncpy((char *)&(text_context.host_country_id[0]), &locale_name[3],
153 : PDF_TEXT_HLL-1);
154 : }
155 : }
156 :
157 : PDF_DEBUG_BASE("TextContext: Locale name is '%s'", locale_name);
158 : PDF_DEBUG_BASE("TextContext: Language ID is '%.2s'",
159 : text_context.host_language_id);
160 : PDF_DEBUG_BASE("TextContext: Country ID is '%.2s'",
161 : text_context.host_country_id);
162 735 : return PDF_OK;
163 : }
164 :
165 :
166 : static pdf_status_t
167 : pdf_text_detect_host_eol(void)
168 : {
169 : /* The EOL sequence (a.k.a Newline function) may be represented by different
170 : * characters, depending on the platform
171 : *
172 : * Mac OS 9.x and earlier ---> CR (Carriage Return), U+000D (Not supported)
173 : * Mac OS X, Unix, GNU/Linux ---> LF (Line Feed), U+000A
174 : * Windows ---> CRLF (Carriage Return + Line Feed), <U+000D,U+000A>
175 : * EBCDIC-based OS --> NEL (Next Line), U+0085 (Not supported)
176 : */
177 : extern pdf_text_context_t text_context;
178 : extern const struct pdf_text_eol_s pdf_text_eol_types [PDF_TEXT_EOLMAX];
179 : #ifdef PDF_HOST_WIN32
180 : {
181 : text_context.host_eol = (pdf_text_eol_t) \
182 : &pdf_text_eol_types[PDF_TEXT_EOL_WINDOWS];
183 : }
184 : #else
185 : {
186 735 : text_context.host_eol = (pdf_text_eol_t) \
187 : &pdf_text_eol_types[PDF_TEXT_EOL_UNIX];
188 : }
189 : #endif
190 735 : return PDF_OK;
191 : }
192 :
193 :
194 : pdf_status_t
195 : pdf_text_context_init (void)
196 735 : {
197 735 : pdf_status_t ret_code = PDF_OK;
198 :
199 : #if defined HAVE_SETLOCALE
200 : /* Set all categories of the locale */
201 735 : if(setlocale(LC_ALL, "") == NULL)
202 : {
203 : PDF_DEBUG_BASE("Error setting locale information in the process");
204 0 : return PDF_ETEXTENC;
205 : }
206 : #endif
207 :
208 : /* Get system endianness */
209 735 : ret_code = pdf_text_detect_host_endianness();
210 735 : if(ret_code != PDF_OK)
211 : {
212 : PDF_DEBUG_BASE("Error detecting host endianness");
213 0 : return ret_code;
214 : }
215 :
216 : /* Get language and country ID from locale */
217 735 : ret_code = pdf_text_detect_host_language_and_country();
218 735 : if(ret_code != PDF_OK)
219 : {
220 : PDF_DEBUG_BASE("Error detecting host language");
221 0 : return ret_code;
222 : }
223 :
224 : /* Get host encoding from system */
225 735 : ret_code = pdf_text_detect_host_encoding();
226 735 : if(ret_code != PDF_OK)
227 : {
228 : PDF_DEBUG_BASE("Error detecting host encoding");
229 0 : return ret_code;
230 : }
231 :
232 : /* Detect host default EOL sequence */
233 735 : ret_code = pdf_text_detect_host_eol();
234 735 : if(ret_code != PDF_OK)
235 : {
236 : PDF_DEBUG_BASE("Error detecting host EOL sequence");
237 0 : return ret_code;
238 : }
239 :
240 : /* Mark the context as initialized */
241 735 : text_context_initialized = PDF_TRUE;
242 :
243 735 : return PDF_OK;
244 : }
245 :
246 : pdf_bool_t
247 : pdf_text_context_initialized (void)
248 941 : {
249 941 : return text_context_initialized;
250 : }
251 :
252 : enum pdf_endianness_e
253 : pdf_text_context_get_host_endianness(void)
254 0 : {
255 0 : return text_context.host_endianness;
256 : }
257 :
258 : pdf_text_host_encoding_t
259 : pdf_text_context_get_host_encoding(void)
260 28 : {
261 28 : return text_context.host_encoding;
262 : }
263 :
264 : const pdf_char_t *
265 : pdf_text_context_get_host_language(void)
266 18 : {
267 18 : return (const pdf_char_t *)text_context.host_language_id;
268 : }
269 :
270 : const pdf_char_t *
271 : pdf_text_context_get_host_country(void)
272 0 : {
273 0 : return (const pdf_char_t *)text_context.host_country_id;
274 : }
275 :
276 :
277 : pdf_text_eol_t
278 : pdf_text_context_get_host_eol(enum pdf_text_eol_types eol_type)
279 18 : {
280 18 : if(eol_type == PDF_TEXT_EOL_HOST)
281 : {
282 2 : return text_context.host_eol;
283 : }
284 : else
285 : {
286 16 : return (pdf_text_eol_t) &(pdf_text_eol_types [eol_type]);
287 : }
288 : }
289 :
290 :
291 : /* End of pdf-text-context.c */
|