1 : /* -*- mode: C -*-
2 : *
3 : * File: pdf-text-host-encoding.c
4 : * Date: Fri Jan 11 21:09:23 2008
5 : *
6 : * GNU PDF Library - Encoded Text handling utilities - Host Encodings
7 : *
8 : */
9 :
10 : /* Copyright (C) 2008 Free Software Foundation, Inc. */
11 :
12 : /* This program is free software: you can redistribute it and/or modify
13 : * it under the terms of the GNU General Public License as published by
14 : * the Free Software Foundation, either version 3 of the License, or
15 : * (at your option) any later version.
16 : *
17 : * This program is distributed in the hope that it will be useful,
18 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 : * GNU General Public License for more details.
21 : *
22 : * You should have received a copy of the GNU General Public License
23 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 : */
25 :
26 : #include <config.h>
27 :
28 : #include <string.h>
29 : #ifdef PDF_HOST_WIN32
30 : #include <windows.h>
31 : #else
32 : #include <iconv.h>
33 : #include <errno.h>
34 : #endif
35 :
36 : #include <pdf-types.h>
37 : #include <pdf-text.h>
38 : #include <pdf-text-context.h>
39 : #include <pdf-text-encoding.h>
40 : #include <pdf-text-host-encoding.h>
41 :
42 : /*
43 : * ICONV API
44 : * -----------
45 : * iconv_t iconv_open (const char* tocode, const char* fromcode)
46 : *
47 : * size_t iconv (iconv_t cd,
48 : * const char **inbuf, size_t *inbytesleft,
49 : * char **outbuf, size_t *outbytesleft);
50 : *
51 : * int iconv_close (iconv_t cd);
52 : */
53 :
54 :
55 : /*
56 : * WINDOWS API
57 : * -------------
58 : * int MultiByteToWideChar(UINT CodePage,
59 : * DWORD dwFlags,
60 : * LPCSTR lpMultiByteStr,
61 : * int cbMultiByte,
62 : * LPWSTR lpWideCharStr,
63 : * int cchWideChar);
64 : *
65 : * int WideCharToMultiByte(UINT CodePage,
66 : * DWORD dwFlags,
67 : * LPCWSTR lpWideCharStr,
68 : * int cchWideChar,
69 : * LPSTR lpMultiByteStr,
70 : * int cbMultiByte,
71 : * LPCSTR lpDefaultChar,
72 : * LPBOOL lpUsedDefaultChar);
73 : *
74 : * UINT GetACP(void);
75 : *
76 : */
77 :
78 : #ifdef PDF_HOST_WIN32
79 : static DWORD
80 : pdf_text_get_dwflags_for_cp(UINT CodePage, DWORD def_dwflags)
81 : {
82 : /* dwFlags has some restrictions */
83 : switch(CodePage)
84 : {
85 : case 50220:
86 : case 50221:
87 : case 50222:
88 : case 50225:
89 : case 50227:
90 : case 50229:
91 : case 52936:
92 : case 54936:
93 : case 57002:
94 : case 57003:
95 : case 57004:
96 : case 57005:
97 : case 57006:
98 : case 57007:
99 : case 57008:
100 : case 57009:
101 : case 57010:
102 : case 57011:
103 : case 65000:
104 : case 42:
105 : return 0;
106 : default:
107 : return def_dwflags;
108 : }
109 : }
110 :
111 :
112 : pdf_status_t
113 : pdf_text_convert_encoding_name_to_CP(const pdf_char_t *encoding_name,
114 : UINT *pCP)
115 : {
116 : UINT CodePage;
117 : char *end_char;
118 :
119 : /* In windows, the charset name stored in the pdf_text_host_encoding_t
120 : * element will be in the following format: "CPn", where 'n' is the
121 : * code page number (unsigned integer) obtained with GetACP() */
122 :
123 : /* So first of all, check windows host encoding */
124 : if((strlen((char *)encoding_name) < 3) || \
125 : (strncmp((char *)encoding_name,"CP",2) != 0))
126 : {
127 : PDF_DEBUG_BASE("Host encoding received seems not valid");
128 : return PDF_ETEXTENC;
129 : }
130 :
131 : /* Get codepage as unsigned integer. `strtol' will return 0 if it was not
132 : * able to correctly parse the string. BTW, 0 is not a valid code page. */
133 : CodePage = (UINT) strtol ((char *)&encoding_name[2],
134 : &end_char,
135 : 10);
136 : if(CodePage == 0)
137 : {
138 : PDF_DEBUG_BASE("Problem converting input CP value '%s'",
139 : encoding_name);
140 : return PDF_ETEXTENC;
141 : }
142 : else
143 : {
144 : *pCP = CodePage;
145 : return PDF_OK;
146 : }
147 : }
148 :
149 : #endif
150 :
151 :
152 : pdf_status_t
153 : pdf_text_host_encoding_is_available(const pdf_char_t *encoding_name)
154 4 : {
155 : #ifdef PDF_HOST_WIN32
156 : {
157 : UINT CodePage;
158 :
159 : if(pdf_text_convert_encoding_name_to_CP(encoding_name, &CodePage) != PDF_OK)
160 : {
161 : PDF_DEBUG_BASE("Invalid windows encoding name received...");
162 : return PDF_ETEXTENC;
163 : }
164 :
165 : /* Check given code page in the system */
166 : return ((IsValidCodePage(CodePage)) ? PDF_OK : PDF_ETEXTENC);
167 : }
168 : #else
169 : {
170 : iconv_t check;
171 :
172 : /* Check conversion from Host Encoding to UTF-32HE */
173 4 : check = iconv_open((char *)encoding_name, \
174 : (PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"));
175 4 : if(check == (iconv_t)-1)
176 : {
177 : PDF_DEBUG_BASE("Conversion from '%s' to UTF-32HE not available",
178 : encoding_name);
179 1 : return PDF_ETEXTENC;
180 : }
181 3 : iconv_close(check);
182 :
183 : /* Check conversion from UTF-32HE to Host Encoding */
184 3 : check = iconv_open((PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"), \
185 : (char *)encoding_name);
186 3 : if(check == (iconv_t)-1)
187 : {
188 : PDF_DEBUG_BASE("Conversion from UTF-32HE to '%s' not available",
189 : encoding_name);
190 0 : return PDF_ETEXTENC;
191 : }
192 3 : iconv_close(check);
193 :
194 3 : return PDF_OK;
195 : }
196 : #endif
197 : }
198 :
199 :
200 : #ifdef PDF_HOST_WIN32
201 : static pdf_status_t
202 : pdf_text_utf32he_to_host_win32(const pdf_char_t *input_data,
203 : const pdf_size_t input_length,
204 : const pdf_text_host_encoding_t enc,
205 : pdf_char_t **p_output_data,
206 : pdf_size_t *p_output_length)
207 : {
208 : pdf_status_t ret_code;
209 : pdf_char_t *temp_data;
210 : pdf_size_t temp_size;
211 : UINT CodePage;
212 : /* Firstly, convert from UTF-32HE to UTF-16LE */
213 : ret_code = pdf_text_utf32he_to_utf16le(input_data,
214 : input_length,
215 : &temp_data,
216 : &temp_size);
217 : if(ret_code != PDF_OK)
218 : {
219 : PDF_DEBUG_BASE("Couldn't convert from UTF-32HE to UTF-16LE");
220 : return PDF_ETEXTENC;
221 : }
222 :
223 : /* In windows, the charset name stored in the pdf_text_host_encoding_t
224 : * element will be in the following format: "CPn", where 'n' is the
225 : * code page number (unsigned integer) obtained with GetACP() */
226 :
227 : /* So check windows host encoding */
228 : if(pdf_text_convert_encoding_name_to_CP(enc.name, &CodePage) != PDF_OK)
229 : {
230 : PDF_DEBUG_BASE("Invalid windows encoding name received...");
231 : pdf_dealloc(temp_data);
232 : return PDF_ETEXTENC;
233 : }
234 : else
235 : {
236 : DWORD dwFlags;
237 : int output_nmbyte;
238 : BOOL default_used = 0;
239 :
240 : /* Get dwFlags value */
241 : dwFlags = 0;
242 :
243 : /* First of all, query the length of the output string */
244 : SetLastError(0);
245 : output_nmbyte = WideCharToMultiByte(CodePage, /* CodePage */
246 : dwFlags, /* dwFlags */
247 : (LPCWSTR)temp_data, /* lpWideCharStr */
248 : (temp_size/sizeof(WCHAR)), /* cbWideChar */
249 : NULL, /* lpMultiByteStr */
250 : 0, /* ccMultiByte */
251 : NULL, /* lpDefaultChar */
252 : &default_used); /* lpUsedDefaultChar */
253 :
254 : /* Check if we got an error with the call to WideCharToMultiByte */
255 : if(output_nmbyte == 0 || default_used)
256 : {
257 : #ifdef HAVE_DEBUG_BASE
258 : switch(GetLastError())
259 : {
260 : case ERROR_INVALID_FLAGS:
261 : PDF_DEBUG_BASE("Invalid data to convert to Host Encoding:"
262 : " 'Invalid flags'");
263 : break;
264 : default:
265 : PDF_DEBUG_BASE("Invalid data to convert to Host Encoding");
266 : break;
267 : }
268 : #endif
269 : pdf_dealloc(temp_data);
270 : return PDF_EBADTEXT;
271 : }
272 :
273 : /* Allocate memory for output buffer */
274 : *p_output_length = output_nmbyte;
275 : *p_output_data = (pdf_char_t *)pdf_alloc(*p_output_length);
276 : if(*p_output_data == NULL)
277 : {
278 : pdf_dealloc(temp_data);
279 : return PDF_ENOMEM;
280 : }
281 :
282 : /* Launch the conversion to host encoding */
283 : SetLastError(0);
284 : default_used = 0;
285 : if((WideCharToMultiByte(CodePage, /* CodePage */
286 : dwFlags, /* dwFlags */
287 : (LPCWSTR)temp_data, /* lpWideCharStr */
288 : (temp_size/sizeof(WCHAR)), /* cbWideChar */
289 : (char *)*p_output_data, /* lpMultiByteStr */
290 : *p_output_length, /* ccMultiByte */
291 : NULL, /* lpDefaultChar */
292 : &default_used) != output_nmbyte) || \
293 : (default_used))
294 : {
295 : PDF_DEBUG_BASE("Problem performing the host encoding conversion");
296 : pdf_dealloc(*p_output_data);
297 : pdf_dealloc(temp_data);
298 : return PDF_ETEXTENC;
299 : }
300 : else
301 : {
302 : /* Check last byte... could be NUL and we don't want it */
303 : if((*p_output_data)[*p_output_length -1] == '\0')
304 : {
305 : pdf_char_t *temp;
306 : temp = pdf_realloc((*p_output_data), (*p_output_length -1));
307 : if(temp != NULL)
308 : {
309 : *p_output_data = temp;
310 : *p_output_length = *p_output_length -1;
311 : }
312 : }
313 : pdf_dealloc(temp_data);
314 : return PDF_OK;
315 : }
316 : }
317 : }
318 :
319 : #else
320 :
321 : static pdf_status_t
322 : pdf_text_utf32he_to_host_iconv(const pdf_char_t *input_data,
323 : const pdf_size_t input_length,
324 : const pdf_text_host_encoding_t enc,
325 : pdf_char_t **p_output_data,
326 : pdf_size_t *p_output_length)
327 33 : {
328 : iconv_t to_host;
329 : size_t n_conv;
330 : char *in_str;
331 : size_t n_in;
332 : size_t n_out;
333 : pdf_char_t *new_data;
334 : char *out_str;
335 : pdf_size_t worst_length;
336 : pdf_size_t new_length;
337 :
338 : /* Check if conversion is available. If we just specify "UTF-32" as the
339 : * input encoding requested, iconv will expect the BOM by default, and
340 : * we don't want it, so we specify directly the endianness required in the
341 : * name of the encoding, depending on the host endianness */
342 33 : to_host = iconv_open((char *)enc.name, \
343 : (PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"));
344 33 : if(to_host == (iconv_t)-1)
345 : {
346 : PDF_DEBUG_BASE("Conversion from UTF-32 to '%s' not available: '%s'",
347 : enc.name, strerror(errno));
348 3 : return PDF_ETEXTENC;
349 : }
350 :
351 : /* Prepare lengths and locations.
352 : * The worst length is computed as having one single output byte for each
353 : * input single byte */
354 30 : worst_length = input_length+4;
355 30 : new_data = (pdf_char_t *)pdf_alloc(worst_length);
356 30 : if(new_data == NULL)
357 : {
358 0 : iconv_close(to_host);
359 0 : return PDF_ENOMEM;
360 : }
361 30 : n_out = worst_length;
362 30 : in_str = (char *)input_data;
363 30 : out_str = (char *)new_data;
364 30 : n_in = input_length;
365 :
366 90 : while(n_in > 0)
367 : {
368 : /* Convert */
369 30 : n_conv = iconv(to_host, &in_str, &n_in, &out_str, &n_out);
370 :
371 : /* Check conversion output status. We check errno to see if the problem
372 : * is that more buffer is needed in the output. If this is the case,
373 : * we just give a second try to the worst length and reallocate memory.
374 : * There is no problem to use errno in multi-threaded applications
375 : * if the library is compiled with -D_REENTRANT */
376 30 : if(n_conv == (size_t)-1)
377 : {
378 0 : if(errno == E2BIG)
379 : {
380 0 : pdf_size_t n_bytes_generated = 0;
381 : /* Compute the number of bytes actually generated in the
382 : * output buffer. */
383 0 : n_bytes_generated = (pdf_size_t) (worst_length - n_out);
384 :
385 : /* We need more output buffer */
386 0 : worst_length += (n_in);
387 :
388 : PDF_DEBUG_BASE("Reallocating to '%lu'. "
389 : "'%lu' bytes are already generated",
390 : (unsigned long) worst_length,
391 : (unsigned long) n_bytes_generated);
392 : /* Reallocate buffer with greater size */
393 0 : new_data = (pdf_char_t *)pdf_realloc(new_data,worst_length);
394 0 : if(new_data == NULL)
395 : {
396 0 : return PDF_ENOMEM;
397 : }
398 :
399 : /* The re-allocated new data does not have to be in the same
400 : * memory place as the original one, so the `out_str' pointer
401 : * must be reset */
402 0 : out_str = (char *) &new_data[n_bytes_generated];
403 :
404 : /* The number of bytes available in the buffer must also be
405 : * reset */
406 0 : n_out = (worst_length - n_bytes_generated);
407 : }
408 : else
409 : {
410 0 : iconv_close(to_host);
411 : PDF_DEBUG_BASE("Invalid data to convert to Host Encoding: '%s'",
412 : strerror(errno));
413 0 : return PDF_EBADTEXT;
414 : }
415 : }
416 : }
417 :
418 : /* Compute new final length */
419 30 : new_length = worst_length - n_out;
420 :
421 : /* Finally, reset the buffer length to its correct size */
422 30 : if(new_length != worst_length)
423 : {
424 30 : new_data = (pdf_char_t *)pdf_realloc(new_data,new_length);
425 30 : if(new_data == NULL)
426 : {
427 0 : iconv_close(to_host);
428 0 : return PDF_ENOMEM;
429 : }
430 : }
431 :
432 : /* And set the output values */
433 30 : *p_output_data = new_data;
434 30 : *p_output_length = new_length;
435 :
436 30 : iconv_close(to_host);
437 30 : return PDF_OK;
438 : }
439 : #endif
440 :
441 :
442 :
443 : pdf_status_t
444 : pdf_text_utf32he_to_host(const pdf_char_t *input_data,
445 : const pdf_size_t input_length,
446 : const pdf_text_host_encoding_t enc,
447 : pdf_char_t **p_output_data,
448 : pdf_size_t *p_output_length)
449 33 : {
450 : #ifdef PDF_HOST_WIN32
451 : return pdf_text_utf32he_to_host_win32(input_data,
452 : input_length,
453 : enc,
454 : p_output_data,
455 : p_output_length);
456 : #else
457 33 : return pdf_text_utf32he_to_host_iconv(input_data,
458 : input_length,
459 : enc,
460 : p_output_data,
461 : p_output_length);
462 : #endif
463 : }
464 :
465 : #ifdef PDF_HOST_WIN32
466 : static pdf_status_t
467 : pdf_text_host_to_utf32he_win32(const pdf_char_t *input_data,
468 : const pdf_size_t input_length,
469 : const pdf_text_host_encoding_t enc,
470 : pdf_char_t **p_output_data,
471 : pdf_size_t *p_output_length)
472 : {
473 : UINT CodePage;
474 :
475 : /* In windows, the charset name stored in the pdf_text_host_encoding_t
476 : * element will be in the following format: "CPn", where 'n' is the
477 : * code page number (unsigned integer) obtained with GetACP() */
478 :
479 : /* So first of all, check windows host encoding */
480 : if(pdf_text_convert_encoding_name_to_CP(enc.name, &CodePage) != PDF_OK)
481 : {
482 : PDF_DEBUG_BASE("Invalid windows encoding name received...");
483 : return PDF_ETEXTENC;
484 : }
485 : else
486 : {
487 : DWORD dwFlags;
488 : int output_nwchars;
489 : pdf_char_t *temp_data;
490 : pdf_size_t temp_size;
491 :
492 : /* Get dwFlags value */
493 : dwFlags = pdf_text_get_dwflags_for_cp(CodePage, MB_ERR_INVALID_CHARS);
494 :
495 : /* For ASCII-7, check MSB... MultiByteToWideChar doesn't do it, and the
496 : * behaviour should be equal to that of iconv() */
497 : if(CodePage == 20127) /* ASCII-7 code point */
498 : {
499 : if(pdf_text_is_ascii7(input_data, input_length) == PDF_FALSE)
500 : {
501 : PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
502 : " Not ASCII-7");
503 : return PDF_EBADTEXT;
504 : }
505 : }
506 :
507 : /* First of all, query the length of the output string */
508 : SetLastError(0);
509 : output_nwchars = MultiByteToWideChar(CodePage, /* CodePage */
510 : dwFlags, /* dwFlags */
511 : (char *)input_data, /* lpMultiByteStr */
512 : input_length, /* cbMultiByte */
513 : NULL, /* lpWideCharStr */
514 : 0); /* cchWideChar */
515 :
516 : /* Check if we got an error with the call to MultiByteToWideChar*/
517 : if(output_nwchars == 0)
518 : {
519 : #ifdef HAVE_DEBUG_BASE
520 : switch(GetLastError())
521 : {
522 : case ERROR_INVALID_FLAGS:
523 : PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
524 : " 'Invalid flags'");
525 : break;
526 : case ERROR_NO_UNICODE_TRANSLATION:
527 : PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
528 : " 'No Unicode Translation'");
529 : break;
530 : default:
531 : PDF_DEBUG_BASE("Invalid data to convert from Host Encoding");
532 : break;
533 : }
534 : #endif
535 : return PDF_EBADTEXT;
536 : }
537 :
538 : /* Allocate memory for output buffer */
539 : temp_size = output_nwchars * sizeof(WCHAR);
540 : temp_data = (pdf_char_t *)pdf_alloc(temp_size);
541 : if(temp_data == NULL)
542 : {
543 : return PDF_ENOMEM;
544 : }
545 :
546 : /* Launch the conversion to UTF-16LE */
547 : SetLastError(0);
548 : if(MultiByteToWideChar(CodePage, /* CodePage */
549 : dwFlags, /* dwFlags */
550 : (char *)input_data, /* lpMultiByteStr */
551 : input_length, /* cbMultiByte */
552 : (LPWSTR)temp_data, /* lpWideCharStr */
553 : output_nwchars) != output_nwchars) /* cchWideChar */
554 : {
555 : PDF_DEBUG_BASE("Problem performing the host encoding conversion");
556 : return PDF_ETEXTENC;
557 : }
558 : else
559 : {
560 : pdf_status_t ret_code;
561 :
562 : /* Finally, convert to UTF-32HE */
563 : ret_code = pdf_text_utf16le_to_utf32he(temp_data,
564 : temp_size,
565 : p_output_data,
566 : p_output_length);
567 :
568 : pdf_dealloc(temp_data);
569 : return ret_code;
570 : }
571 : }
572 : }
573 :
574 : #else
575 :
576 : static pdf_status_t
577 : pdf_text_host_to_utf32he_iconv(const pdf_char_t *input_data,
578 : const pdf_size_t input_length,
579 : const pdf_text_host_encoding_t enc,
580 : pdf_char_t **p_output_data,
581 : pdf_size_t *p_output_length)
582 10 : {
583 : iconv_t from_host;
584 : size_t n_conv;
585 : char *in_str;
586 : size_t n_in;
587 : size_t n_out;
588 : pdf_char_t *new_data;
589 : char *out_str;
590 : pdf_size_t worst_length;
591 : pdf_size_t new_length;
592 :
593 : /* Check if conversion is available. If we just specify "UTF-32" as the
594 : * output encoding requested, iconv will insert the BOM by default, and
595 : * we don't want it, so we specify directly the endianness required in the
596 : * name of the encoding, depending on the host endianness */
597 10 : from_host = iconv_open((PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"),
598 : (char *)enc.name);
599 10 : if(from_host == (iconv_t)-1)
600 : {
601 : PDF_DEBUG_BASE("Conversion from '%s' to UTF-32 not available",
602 : enc.name);
603 2 : return PDF_ETEXTENC;
604 : }
605 :
606 : /* Prepare lengths and locations.
607 : * The worst length is computed as having 4 output bytes for each input
608 : * single byte, taking into account that iconv adds an extra 32-bit NUL
609 : * value (4 bytes equal to 0) at the end of the converted string. */
610 8 : worst_length = (input_length+1)*4;
611 8 : new_data = (pdf_char_t *)pdf_alloc(worst_length);
612 8 : if(new_data == NULL)
613 : {
614 0 : iconv_close(from_host);
615 0 : return PDF_ENOMEM;
616 : }
617 8 : n_out = worst_length;
618 8 : in_str = (char *)input_data;
619 8 : out_str = (char *)new_data;
620 8 : n_in = input_length;
621 :
622 22 : while(n_in > 0)
623 : {
624 : /* Convert */
625 8 : n_conv = iconv(from_host, &in_str, &n_in, &out_str, &n_out);
626 :
627 : /* Check conversion output status. We check errno to see if the problem
628 : * is that more buffer is needed in the output. If this is the case,
629 : * we just give a second try to the worst length and reallocate memory.
630 : * There is no problem to use errno in multi-threaded applications
631 : * if the library is compiled with -D_REENTRANT */
632 8 : if(n_conv == (size_t)-1)
633 : {
634 2 : if(errno == E2BIG)
635 : {
636 0 : pdf_size_t n_bytes_generated = 0;
637 : /* Compute the number of bytes actually generated in the
638 : * output buffer. `n_out' stores the number of bytes still
639 : * available in the output buffer. As the number of bytes
640 : * allocated is multiple of four, and UTF-32 always uses 4
641 : * bytes for each character, this value should always be 0 */
642 0 : n_bytes_generated = (pdf_size_t) (worst_length - n_out);
643 :
644 : /* We need more output buffer */
645 0 : worst_length += (n_in * 4);
646 : PDF_DEBUG_BASE("Reallocating to '%lu'. "
647 : "'%lu' bytes are already generated",
648 : (unsigned long) worst_length,
649 : (unsigned long) n_bytes_generated);
650 : /* Reallocate buffer with greater size */
651 0 : new_data = (pdf_char_t *)pdf_realloc(new_data,worst_length);
652 0 : if(new_data == NULL)
653 : {
654 0 : iconv_close(from_host);
655 0 : return PDF_ENOMEM;
656 : }
657 :
658 : /* The re-allocated new data does not have to be in the same
659 : * memory place as the original one, so the `out_str' pointer
660 : * must be reset */
661 0 : out_str = (char *) &new_data[n_bytes_generated];
662 :
663 : /* The number of bytes available in the buffer must also be
664 : * reset */
665 0 : n_out = (worst_length - n_bytes_generated);
666 : }
667 : else
668 : {
669 2 : iconv_close(from_host);
670 2 : pdf_dealloc(new_data);
671 : PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
672 : "'%s'",strerror(errno));
673 2 : return PDF_EBADTEXT;
674 : }
675 : }
676 : }
677 :
678 : /* Compute new final length */
679 6 : new_length = worst_length - n_out;
680 :
681 : /* Remove from the length the bytes related to the 4-byte NUL UTF-32 char */
682 6 : if((new_data[new_length-1] == '\0') && \
683 : (new_data[new_length-2] == '\0') && \
684 : (new_data[new_length-3] == '\0') && \
685 : (new_data[new_length-4] == '\0'))
686 : {
687 0 : new_length -= 4;
688 : }
689 :
690 : /* Finally, reset the buffer length to its correct size */
691 6 : if(new_length != worst_length)
692 : {
693 6 : new_data = (pdf_char_t *)pdf_realloc(new_data,new_length);
694 6 : if(new_data == NULL)
695 : {
696 0 : return PDF_ENOMEM;
697 : }
698 : }
699 :
700 : /* And set the output values within pdf_text_t */
701 6 : *p_output_data = new_data;
702 6 : *p_output_length = new_length;
703 :
704 6 : iconv_close(from_host);
705 6 : return PDF_OK;
706 : }
707 : #endif
708 :
709 :
710 : pdf_status_t
711 : pdf_text_host_to_utf32he(const pdf_char_t *input_data,
712 : const pdf_size_t input_length,
713 : const pdf_text_host_encoding_t enc,
714 : pdf_char_t **p_output_data,
715 : pdf_size_t *p_output_length)
716 10 : {
717 : #ifdef PDF_HOST_WIN32
718 : return pdf_text_host_to_utf32he_win32(input_data,
719 : input_length,
720 : enc,
721 : p_output_data,
722 : p_output_length);
723 : #else
724 10 : return pdf_text_host_to_utf32he_iconv(input_data,
725 : input_length,
726 : enc,
727 : p_output_data,
728 : p_output_length);
729 : #endif
730 : }
731 :
732 :
733 :
734 : /* End of pdf-text-host-encoding.c */
|