LTP GCOV extension - code coverage report
Current view: directory - src/base - pdf-text-host-encoding.c
Test: libgnupdf.info
Date: 2010-07-31 Instrumented lines: 91
Code covered: 68.1 % Executed lines: 62

       1                 : /* -*- mode: C -*-
       2                 :  *
       3                 :  *       File:         pdf-text-host-encoding.c
       4                 :  *       Date:         Fri Jan 11 21:09:23 2008
       5                 :  *
       6                 :  *       GNU PDF Library - Encoded Text handling utilities - Host Encodings
       7                 :  *
       8                 :  */
       9                 : 
      10                 : /* Copyright (C) 2008 Free Software Foundation, Inc. */
      11                 : 
      12                 : /* This program is free software: you can redistribute it and/or modify
      13                 :  * it under the terms of the GNU General Public License as published by
      14                 :  * the Free Software Foundation, either version 3 of the License, or
      15                 :  * (at your option) any later version.
      16                 :  *
      17                 :  * This program is distributed in the hope that it will be useful,
      18                 :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19                 :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      20                 :  * GNU General Public License for more details.
      21                 :  *
      22                 :  * You should have received a copy of the GNU General Public License
      23                 :  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
      24                 :  */
      25                 : 
      26                 : #include <config.h>
      27                 : 
      28                 : #include <string.h>
      29                 : #ifdef PDF_HOST_WIN32
      30                 :  #include <windows.h>
      31                 : #else
      32                 :  #include <iconv.h>
      33                 :  #include <errno.h>
      34                 : #endif
      35                 : 
      36                 : #include <pdf-types.h>
      37                 : #include <pdf-text.h>
      38                 : #include <pdf-text-context.h>
      39                 : #include <pdf-text-encoding.h>
      40                 : #include <pdf-text-host-encoding.h>
      41                 : 
      42                 : /*
      43                 :  * ICONV API
      44                 :  * -----------
      45                 :  * iconv_t iconv_open (const char* tocode, const char* fromcode)
      46                 :  *
      47                 :  * size_t iconv (iconv_t cd,
      48                 :  *               const char **inbuf, size_t *inbytesleft,
      49                 :  *               char **outbuf, size_t *outbytesleft);
      50                 :  *
      51                 :  * int iconv_close (iconv_t cd);
      52                 :  */
      53                 : 
      54                 : 
      55                 : /* 
      56                 :  * WINDOWS API
      57                 :  * -------------
      58                 :  * int MultiByteToWideChar(UINT CodePage, 
      59                 :  *                         DWORD dwFlags,         
      60                 :  *                         LPCSTR lpMultiByteStr, 
      61                 :  *                         int cbMultiByte,       
      62                 :  *                         LPWSTR lpWideCharStr,  
      63                 :  *                         int cchWideChar);
      64                 :  *
      65                 :  * int WideCharToMultiByte(UINT CodePage, 
      66                 :  *                         DWORD dwFlags, 
      67                 :  *                         LPCWSTR lpWideCharStr,
      68                 :  *                         int cchWideChar, 
      69                 :  *                         LPSTR lpMultiByteStr, 
      70                 :  *                         int cbMultiByte,
      71                 :  *                         LPCSTR lpDefaultChar,    
      72                 :  *                         LPBOOL lpUsedDefaultChar);
      73                 :  * 
      74                 :  * UINT GetACP(void);
      75                 :  *
      76                 :  */
      77                 : 
      78                 : #ifdef PDF_HOST_WIN32
      79                 : static DWORD
      80                 : pdf_text_get_dwflags_for_cp(UINT CodePage, DWORD def_dwflags)
      81                 : {
      82                 :   /* dwFlags has some restrictions */
      83                 :   switch(CodePage)
      84                 :   {
      85                 :     case 50220:
      86                 :     case 50221:
      87                 :     case 50222:
      88                 :     case 50225:
      89                 :     case 50227:
      90                 :     case 50229:
      91                 :     case 52936:
      92                 :     case 54936:
      93                 :     case 57002:
      94                 :     case 57003:
      95                 :     case 57004:
      96                 :     case 57005:
      97                 :     case 57006:
      98                 :     case 57007:
      99                 :     case 57008:
     100                 :     case 57009:
     101                 :     case 57010:
     102                 :     case 57011:
     103                 :     case 65000:
     104                 :     case 42:
     105                 :       return 0;
     106                 :     default:
     107                 :       return def_dwflags;
     108                 :   }
     109                 : }
     110                 : 
     111                 : 
     112                 : pdf_status_t
     113                 : pdf_text_convert_encoding_name_to_CP(const pdf_char_t *encoding_name,
     114                 :                                      UINT *pCP)
     115                 : {
     116                 :   UINT CodePage;
     117                 :   char *end_char;
     118                 : 
     119                 :   /* In windows, the charset name stored in the pdf_text_host_encoding_t
     120                 :    *  element will be in the following format: "CPn", where 'n' is the
     121                 :    *  code page number (unsigned integer) obtained with GetACP() */
     122                 :     
     123                 :   /* So first of all, check windows host encoding */
     124                 :   if((strlen((char *)encoding_name) < 3) || \
     125                 :      (strncmp((char *)encoding_name,"CP",2) != 0))
     126                 :     {
     127                 :       PDF_DEBUG_BASE("Host encoding received seems not valid");
     128                 :       return PDF_ETEXTENC;
     129                 :     }
     130                 : 
     131                 :   /* Get codepage as unsigned integer. `strtol' will return 0 if it was not
     132                 :    *  able to correctly parse the string. BTW, 0 is not a valid code page. */
     133                 :   CodePage = (UINT) strtol ((char *)&encoding_name[2],
     134                 :                             &end_char,
     135                 :                             10);
     136                 :   if(CodePage == 0)
     137                 :     {
     138                 :       PDF_DEBUG_BASE("Problem converting input CP value '%s'",
     139                 :                      encoding_name);
     140                 :       return PDF_ETEXTENC;
     141                 :     }
     142                 :   else
     143                 :     {
     144                 :       *pCP = CodePage;
     145                 :       return PDF_OK;
     146                 :     }
     147                 : }
     148                 : 
     149                 : #endif
     150                 : 
     151                 : 
     152                 : pdf_status_t
     153                 : pdf_text_host_encoding_is_available(const pdf_char_t *encoding_name)
     154               4 : {
     155                 : #ifdef PDF_HOST_WIN32
     156                 :   {
     157                 :     UINT CodePage;
     158                 : 
     159                 :     if(pdf_text_convert_encoding_name_to_CP(encoding_name, &CodePage) != PDF_OK)
     160                 :       {
     161                 :         PDF_DEBUG_BASE("Invalid windows encoding name received...");
     162                 :         return PDF_ETEXTENC;
     163                 :       }
     164                 : 
     165                 :     /* Check given code page in the system */
     166                 :     return ((IsValidCodePage(CodePage)) ? PDF_OK : PDF_ETEXTENC);
     167                 :   }
     168                 : #else
     169                 :   {
     170                 :     iconv_t check;
     171                 : 
     172                 :     /* Check conversion from Host Encoding to UTF-32HE */
     173               4 :     check = iconv_open((char *)encoding_name, \
     174                 :                        (PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"));
     175               4 :     if(check == (iconv_t)-1)
     176                 :       {
     177                 :         PDF_DEBUG_BASE("Conversion from '%s' to UTF-32HE not available",
     178                 :                        encoding_name);
     179               1 :         return PDF_ETEXTENC;
     180                 :       }
     181               3 :     iconv_close(check);
     182                 : 
     183                 :     /* Check conversion from UTF-32HE to Host Encoding */
     184               3 :     check = iconv_open((PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"), \
     185                 :                        (char *)encoding_name);
     186               3 :     if(check == (iconv_t)-1)
     187                 :       {
     188                 :         PDF_DEBUG_BASE("Conversion from UTF-32HE to '%s' not available",
     189                 :                        encoding_name);
     190               0 :         return PDF_ETEXTENC;
     191                 :       }
     192               3 :     iconv_close(check);
     193                 :     
     194               3 :     return PDF_OK;
     195                 :   }
     196                 : #endif
     197                 : }
     198                 : 
     199                 : 
     200                 : #ifdef PDF_HOST_WIN32
     201                 : static pdf_status_t
     202                 : pdf_text_utf32he_to_host_win32(const pdf_char_t      *input_data,
     203                 :                                const pdf_size_t      input_length,
     204                 :                                const pdf_text_host_encoding_t enc,
     205                 :                                pdf_char_t            **p_output_data,
     206                 :                                pdf_size_t            *p_output_length)
     207                 : {
     208                 :   pdf_status_t ret_code;
     209                 :   pdf_char_t *temp_data;
     210                 :   pdf_size_t  temp_size;
     211                 :   UINT CodePage;
     212                 :   /* Firstly, convert from UTF-32HE to UTF-16LE */
     213                 :   ret_code = pdf_text_utf32he_to_utf16le(input_data,
     214                 :                                          input_length,
     215                 :                                          &temp_data,
     216                 :                                          &temp_size);
     217                 :   if(ret_code != PDF_OK)
     218                 :     {
     219                 :       PDF_DEBUG_BASE("Couldn't convert from UTF-32HE to UTF-16LE");
     220                 :       return PDF_ETEXTENC;
     221                 :     }
     222                 : 
     223                 :   /* In windows, the charset name stored in the pdf_text_host_encoding_t
     224                 :    *  element will be in the following format: "CPn", where 'n' is the
     225                 :    *  code page number (unsigned integer) obtained with GetACP() */
     226                 : 
     227                 :   /* So check windows host encoding */
     228                 :   if(pdf_text_convert_encoding_name_to_CP(enc.name, &CodePage) != PDF_OK)
     229                 :     {
     230                 :       PDF_DEBUG_BASE("Invalid windows encoding name received...");
     231                 :       pdf_dealloc(temp_data);
     232                 :       return PDF_ETEXTENC;
     233                 :     }
     234                 :   else
     235                 :     {
     236                 :       DWORD dwFlags;
     237                 :       int output_nmbyte;
     238                 :       BOOL default_used = 0;
     239                 : 
     240                 :       /* Get dwFlags value */
     241                 :       dwFlags = 0;
     242                 : 
     243                 :       /* First of all, query the length of the output string */
     244                 :       SetLastError(0);
     245                 :       output_nmbyte =  WideCharToMultiByte(CodePage,     /* CodePage */
     246                 :                                            dwFlags,      /* dwFlags */
     247                 :                                            (LPCWSTR)temp_data, /* lpWideCharStr */
     248                 :                                            (temp_size/sizeof(WCHAR)), /* cbWideChar */
     249                 :                                            NULL,         /* lpMultiByteStr */
     250                 :                                            0,            /* ccMultiByte */
     251                 :                                            NULL,            /* lpDefaultChar */
     252                 :                                            &default_used); /* lpUsedDefaultChar */
     253                 : 
     254                 :       /* Check if we got an error with the call to WideCharToMultiByte */
     255                 :       if(output_nmbyte == 0 || default_used)
     256                 :         {
     257                 : #ifdef HAVE_DEBUG_BASE
     258                 :           switch(GetLastError())
     259                 :             {
     260                 :               case ERROR_INVALID_FLAGS:
     261                 :                 PDF_DEBUG_BASE("Invalid data to convert to Host Encoding:"
     262                 :                                " 'Invalid flags'");
     263                 :                 break;
     264                 :               default:
     265                 :                 PDF_DEBUG_BASE("Invalid data to convert to Host Encoding");
     266                 :                 break;
     267                 :             }
     268                 : #endif
     269                 :           pdf_dealloc(temp_data);
     270                 :           return PDF_EBADTEXT;
     271                 :         }
     272                 : 
     273                 :       /* Allocate memory for output buffer */
     274                 :       *p_output_length = output_nmbyte;
     275                 :       *p_output_data = (pdf_char_t *)pdf_alloc(*p_output_length);
     276                 :       if(*p_output_data == NULL)
     277                 :         {
     278                 :           pdf_dealloc(temp_data);
     279                 :           return PDF_ENOMEM;
     280                 :         }
     281                 : 
     282                 :       /* Launch the conversion to host encoding */
     283                 :       SetLastError(0);
     284                 :       default_used = 0;
     285                 :       if((WideCharToMultiByte(CodePage,     /* CodePage */
     286                 :                               dwFlags,      /* dwFlags */
     287                 :                               (LPCWSTR)temp_data, /* lpWideCharStr */
     288                 :                               (temp_size/sizeof(WCHAR)), /* cbWideChar */
     289                 :                               (char *)*p_output_data, /* lpMultiByteStr */
     290                 :                               *p_output_length, /* ccMultiByte */
     291                 :                               NULL,            /* lpDefaultChar */
     292                 :                               &default_used) != output_nmbyte) || \
     293                 :          (default_used))
     294                 :         {
     295                 :           PDF_DEBUG_BASE("Problem performing the host encoding conversion");
     296                 :           pdf_dealloc(*p_output_data);
     297                 :           pdf_dealloc(temp_data);
     298                 :           return PDF_ETEXTENC;
     299                 :         }
     300                 :       else
     301                 :         {
     302                 :           /* Check last byte... could be NUL and we don't want it */
     303                 :           if((*p_output_data)[*p_output_length -1] == '\0')
     304                 :             {
     305                 :               pdf_char_t *temp;
     306                 :               temp = pdf_realloc((*p_output_data), (*p_output_length -1));
     307                 :               if(temp != NULL)
     308                 :                 {
     309                 :                   *p_output_data = temp;
     310                 :                   *p_output_length = *p_output_length -1;
     311                 :                 }
     312                 :             }
     313                 :           pdf_dealloc(temp_data);
     314                 :           return PDF_OK;
     315                 :         }
     316                 :     }
     317                 : }
     318                 : 
     319                 : #else
     320                 : 
     321                 : static pdf_status_t
     322                 : pdf_text_utf32he_to_host_iconv(const pdf_char_t      *input_data,
     323                 :                                const pdf_size_t      input_length,
     324                 :                                const pdf_text_host_encoding_t enc,
     325                 :                                pdf_char_t            **p_output_data,
     326                 :                                pdf_size_t            *p_output_length)
     327              33 : {
     328                 :   iconv_t to_host;
     329                 :   size_t n_conv;
     330                 :   char *in_str;
     331                 :   size_t n_in;
     332                 :   size_t n_out;
     333                 :   pdf_char_t *new_data;
     334                 :   char *out_str;
     335                 :   pdf_size_t worst_length;
     336                 :   pdf_size_t new_length;
     337                 : 
     338                 :     /* Check if conversion is available. If we just specify "UTF-32" as the
     339                 :   *  input encoding requested, iconv will expect the BOM by default, and
     340                 :   *  we don't want it, so we specify directly the endianness required in the
     341                 :     *  name of the encoding, depending on the host endianness */
     342              33 :   to_host = iconv_open((char *)enc.name, \
     343                 :       (PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"));
     344              33 :   if(to_host == (iconv_t)-1)
     345                 :     {
     346                 :       PDF_DEBUG_BASE("Conversion from UTF-32 to '%s' not available: '%s'",
     347                 :                      enc.name, strerror(errno));
     348               3 :       return PDF_ETEXTENC;
     349                 :     }
     350                 : 
     351                 :   /* Prepare lengths and locations.
     352                 :    *  The worst length is computed as having one single output byte for each 
     353                 :    *  input single byte */
     354              30 :   worst_length = input_length+4;
     355              30 :   new_data = (pdf_char_t *)pdf_alloc(worst_length);
     356              30 :   if(new_data == NULL)
     357                 :     {
     358               0 :       iconv_close(to_host);
     359               0 :       return PDF_ENOMEM;
     360                 :     }
     361              30 :   n_out = worst_length;
     362              30 :   in_str = (char *)input_data;
     363              30 :   out_str = (char *)new_data;
     364              30 :   n_in = input_length;
     365                 : 
     366              90 :   while(n_in > 0)
     367                 :     {
     368                 :       /* Convert */
     369              30 :       n_conv = iconv(to_host, &in_str, &n_in, &out_str, &n_out);
     370                 : 
     371                 :       /* Check conversion output status. We check errno to see if the problem
     372                 :        *  is that more buffer is needed in the output. If this is the case,
     373                 :        *  we just give a second try to the worst length and reallocate memory.
     374                 :        *  There is no problem to use errno in multi-threaded applications
     375                 :        *  if the library is compiled with -D_REENTRANT */
     376              30 :       if(n_conv == (size_t)-1)
     377                 :         {
     378               0 :           if(errno == E2BIG)
     379                 :             {
     380               0 :               pdf_size_t n_bytes_generated = 0;
     381                 :               /* Compute the number of bytes actually generated in the
     382                 :                *  output buffer. */
     383               0 :               n_bytes_generated = (pdf_size_t) (worst_length - n_out);
     384                 : 
     385                 :               /* We need more output buffer */
     386               0 :               worst_length += (n_in);
     387                 : 
     388                 :               PDF_DEBUG_BASE("Reallocating to '%lu'. "
     389                 :                              "'%lu' bytes are already generated",
     390                 :                              (unsigned long) worst_length,
     391                 :                              (unsigned long) n_bytes_generated);
     392                 :               /* Reallocate buffer with greater size */
     393               0 :               new_data = (pdf_char_t *)pdf_realloc(new_data,worst_length);
     394               0 :               if(new_data == NULL)
     395                 :                 {
     396               0 :                   return PDF_ENOMEM;
     397                 :                 }
     398                 : 
     399                 :               /* The re-allocated new data does not have to be in the same
     400                 :                *  memory place as the original one, so the `out_str' pointer
     401                 :                *  must be reset */
     402               0 :               out_str = (char *) &new_data[n_bytes_generated];
     403                 : 
     404                 :               /* The number of bytes available in the buffer must also be
     405                 :                *  reset */
     406               0 :               n_out = (worst_length - n_bytes_generated);
     407                 :             }
     408                 :           else
     409                 :             {
     410               0 :               iconv_close(to_host);
     411                 :               PDF_DEBUG_BASE("Invalid data to convert to Host Encoding: '%s'",
     412                 :                              strerror(errno));
     413               0 :               return PDF_EBADTEXT;
     414                 :             }
     415                 :         }
     416                 :     }
     417                 : 
     418                 :   /* Compute new final length */
     419              30 :   new_length = worst_length - n_out;
     420                 : 
     421                 :   /* Finally, reset the buffer length to its correct size */
     422              30 :   if(new_length != worst_length)
     423                 :     {
     424              30 :       new_data = (pdf_char_t *)pdf_realloc(new_data,new_length);
     425              30 :       if(new_data == NULL)
     426                 :         {
     427               0 :           iconv_close(to_host);
     428               0 :           return PDF_ENOMEM;
     429                 :         }
     430                 :     }
     431                 : 
     432                 :   /* And set the output values */
     433              30 :   *p_output_data = new_data;
     434              30 :   *p_output_length = new_length;
     435                 : 
     436              30 :   iconv_close(to_host);
     437              30 :   return PDF_OK;
     438                 : }
     439                 : #endif
     440                 : 
     441                 : 
     442                 : 
     443                 : pdf_status_t
     444                 : pdf_text_utf32he_to_host(const pdf_char_t      *input_data,
     445                 :                          const pdf_size_t      input_length,
     446                 :                          const pdf_text_host_encoding_t enc,
     447                 :                          pdf_char_t            **p_output_data,
     448                 :                          pdf_size_t            *p_output_length)
     449              33 : {
     450                 : #ifdef PDF_HOST_WIN32
     451                 :   return pdf_text_utf32he_to_host_win32(input_data,
     452                 :                                         input_length,
     453                 :                                         enc,
     454                 :                                         p_output_data,
     455                 :                                         p_output_length);
     456                 : #else
     457              33 :   return pdf_text_utf32he_to_host_iconv(input_data,
     458                 :                                         input_length,
     459                 :                                         enc,
     460                 :                                         p_output_data,
     461                 :                                         p_output_length);
     462                 : #endif
     463                 : }
     464                 : 
     465                 : #ifdef PDF_HOST_WIN32
     466                 : static pdf_status_t
     467                 : pdf_text_host_to_utf32he_win32(const pdf_char_t      *input_data,
     468                 :                                const pdf_size_t      input_length,
     469                 :                                const pdf_text_host_encoding_t enc,
     470                 :                                pdf_char_t            **p_output_data,
     471                 :                               pdf_size_t            *p_output_length)
     472                 : {
     473                 :   UINT CodePage;
     474                 : 
     475                 :   /* In windows, the charset name stored in the pdf_text_host_encoding_t
     476                 :    *  element will be in the following format: "CPn", where 'n' is the
     477                 :    *  code page number (unsigned integer) obtained with GetACP() */
     478                 : 
     479                 :   /* So first of all, check windows host encoding */
     480                 :   if(pdf_text_convert_encoding_name_to_CP(enc.name, &CodePage) != PDF_OK)
     481                 :     {
     482                 :       PDF_DEBUG_BASE("Invalid windows encoding name received...");
     483                 :       return PDF_ETEXTENC;
     484                 :     }
     485                 :   else
     486                 :     {
     487                 :       DWORD dwFlags;
     488                 :       int output_nwchars;
     489                 :       pdf_char_t *temp_data;
     490                 :       pdf_size_t temp_size;
     491                 : 
     492                 :       /* Get dwFlags value */
     493                 :       dwFlags = pdf_text_get_dwflags_for_cp(CodePage, MB_ERR_INVALID_CHARS);
     494                 : 
     495                 :       /* For ASCII-7, check MSB... MultiByteToWideChar doesn't do it, and the
     496                 :        *  behaviour should be equal to that of iconv() */
     497                 :       if(CodePage == 20127) /* ASCII-7 code point */
     498                 :         {
     499                 :           if(pdf_text_is_ascii7(input_data, input_length) == PDF_FALSE)
     500                 :             {
     501                 :               PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
     502                 :                              " Not ASCII-7");
     503                 :               return PDF_EBADTEXT;
     504                 :             }
     505                 :         }
     506                 : 
     507                 :       /* First of all, query the length of the output string */
     508                 :       SetLastError(0);
     509                 :       output_nwchars =  MultiByteToWideChar(CodePage,     /* CodePage */
     510                 :                                             dwFlags,      /* dwFlags */
     511                 :                                             (char *)input_data, /* lpMultiByteStr */
     512                 :                                             input_length, /* cbMultiByte */
     513                 :                                             NULL,         /* lpWideCharStr */
     514                 :                                             0);           /* cchWideChar */
     515                 : 
     516                 :       /* Check if we got an error with the call to MultiByteToWideChar*/
     517                 :       if(output_nwchars == 0)
     518                 :         {
     519                 : #ifdef HAVE_DEBUG_BASE
     520                 :           switch(GetLastError())
     521                 :             {
     522                 :               case ERROR_INVALID_FLAGS:
     523                 :                 PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
     524                 :                                " 'Invalid flags'");
     525                 :                 break;
     526                 :               case ERROR_NO_UNICODE_TRANSLATION:
     527                 :                 PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
     528                 :                                " 'No Unicode Translation'");
     529                 :                 break;
     530                 :               default:
     531                 :                 PDF_DEBUG_BASE("Invalid data to convert from Host Encoding");
     532                 :                 break;
     533                 :             }
     534                 : #endif
     535                 :           return PDF_EBADTEXT;
     536                 :         }
     537                 : 
     538                 :       /* Allocate memory for output buffer */
     539                 :       temp_size = output_nwchars * sizeof(WCHAR);
     540                 :       temp_data = (pdf_char_t *)pdf_alloc(temp_size);
     541                 :       if(temp_data == NULL)
     542                 :         {
     543                 :           return PDF_ENOMEM;
     544                 :         }
     545                 : 
     546                 :       /* Launch the conversion to UTF-16LE */
     547                 :       SetLastError(0);
     548                 :       if(MultiByteToWideChar(CodePage,           /* CodePage */
     549                 :                              dwFlags,            /* dwFlags */
     550                 :                              (char *)input_data, /* lpMultiByteStr */
     551                 :                              input_length,       /* cbMultiByte */
     552                 :                              (LPWSTR)temp_data,  /* lpWideCharStr */
     553                 :                              output_nwchars) != output_nwchars) /* cchWideChar */
     554                 :         {
     555                 :           PDF_DEBUG_BASE("Problem performing the host encoding conversion");
     556                 :           return PDF_ETEXTENC;
     557                 :         }
     558                 :       else
     559                 :         {
     560                 :           pdf_status_t ret_code;
     561                 : 
     562                 :           /* Finally, convert to UTF-32HE */
     563                 :           ret_code = pdf_text_utf16le_to_utf32he(temp_data,
     564                 :                                                  temp_size,
     565                 :                                                  p_output_data,
     566                 :                                                  p_output_length);
     567                 : 
     568                 :           pdf_dealloc(temp_data);
     569                 :           return ret_code;
     570                 :         }
     571                 :     }
     572                 : }
     573                 : 
     574                 : #else
     575                 : 
     576                 : static pdf_status_t
     577                 : pdf_text_host_to_utf32he_iconv(const pdf_char_t      *input_data,
     578                 :                                const pdf_size_t      input_length,
     579                 :                                const pdf_text_host_encoding_t enc,
     580                 :                                pdf_char_t            **p_output_data,
     581                 :                                pdf_size_t            *p_output_length)
     582              10 : {
     583                 :   iconv_t from_host;
     584                 :   size_t n_conv;
     585                 :   char *in_str;
     586                 :   size_t n_in;
     587                 :   size_t n_out;
     588                 :   pdf_char_t *new_data;
     589                 :   char *out_str;
     590                 :   pdf_size_t worst_length;
     591                 :   pdf_size_t new_length;
     592                 : 
     593                 :   /* Check if conversion is available. If we just specify "UTF-32" as the
     594                 :    *  output encoding requested, iconv will insert the BOM by default, and
     595                 :    *  we don't want it, so we specify directly the endianness required in the
     596                 :    *  name of the encoding, depending on the host endianness */
     597              10 :   from_host = iconv_open((PDF_IS_BIG_ENDIAN ? "UTF-32BE" : "UTF-32LE"),
     598                 :                           (char *)enc.name);
     599              10 :   if(from_host == (iconv_t)-1)
     600                 :     {
     601                 :       PDF_DEBUG_BASE("Conversion from '%s' to UTF-32 not available",
     602                 :                      enc.name);
     603               2 :       return PDF_ETEXTENC;
     604                 :     }
     605                 : 
     606                 :   /* Prepare lengths and locations.
     607                 :    *  The worst length is computed as having 4 output bytes for each input
     608                 :    *  single byte, taking into account that iconv adds an extra 32-bit NUL
     609                 :    *  value (4 bytes equal to 0) at the end of the converted string. */
     610               8 :   worst_length = (input_length+1)*4;
     611               8 :   new_data = (pdf_char_t *)pdf_alloc(worst_length);
     612               8 :   if(new_data == NULL)
     613                 :     {
     614               0 :       iconv_close(from_host);
     615               0 :       return PDF_ENOMEM;
     616                 :     }
     617               8 :   n_out = worst_length;
     618               8 :   in_str = (char *)input_data;
     619               8 :   out_str = (char *)new_data;
     620               8 :   n_in = input_length;
     621                 : 
     622              22 :   while(n_in > 0)
     623                 :     {
     624                 :       /* Convert */
     625               8 :       n_conv = iconv(from_host, &in_str, &n_in, &out_str, &n_out);
     626                 : 
     627                 :       /* Check conversion output status. We check errno to see if the problem
     628                 :        *  is that more buffer is needed in the output. If this is the case,
     629                 :        *  we just give a second try to the worst length and reallocate memory.
     630                 :        *  There is no problem to use errno in multi-threaded applications
     631                 :        *  if the library is compiled with -D_REENTRANT */
     632               8 :       if(n_conv == (size_t)-1)
     633                 :         {
     634               2 :           if(errno == E2BIG)
     635                 :             {
     636               0 :               pdf_size_t n_bytes_generated = 0;
     637                 :               /* Compute the number of bytes actually generated in the
     638                 :                *  output buffer. `n_out' stores the number of bytes still
     639                 :                *  available in the output buffer. As the number of bytes
     640                 :                *  allocated is multiple of four, and UTF-32 always uses 4
     641                 :                *  bytes for each character, this value should always be 0 */
     642               0 :               n_bytes_generated = (pdf_size_t) (worst_length - n_out);
     643                 : 
     644                 :               /* We need more output buffer */
     645               0 :               worst_length += (n_in * 4);
     646                 :               PDF_DEBUG_BASE("Reallocating to '%lu'. "
     647                 :                              "'%lu' bytes are already generated",
     648                 :                              (unsigned long) worst_length,
     649                 :                              (unsigned long) n_bytes_generated);
     650                 :               /* Reallocate buffer with greater size */
     651               0 :               new_data = (pdf_char_t *)pdf_realloc(new_data,worst_length);
     652               0 :               if(new_data == NULL)
     653                 :                 {
     654               0 :                   iconv_close(from_host);
     655               0 :                   return PDF_ENOMEM;
     656                 :                 }
     657                 : 
     658                 :               /* The re-allocated new data does not have to be in the same
     659                 :                *  memory place as the original one, so the `out_str' pointer
     660                 :                *  must be reset */
     661               0 :               out_str = (char *) &new_data[n_bytes_generated];
     662                 : 
     663                 :               /* The number of bytes available in the buffer must also be
     664                 :                *  reset */
     665               0 :               n_out = (worst_length - n_bytes_generated);
     666                 :             }
     667                 :           else
     668                 :             {
     669               2 :               iconv_close(from_host);
     670               2 :               pdf_dealloc(new_data);
     671                 :               PDF_DEBUG_BASE("Invalid data to convert from Host Encoding:"
     672                 :                              "'%s'",strerror(errno));
     673               2 :               return PDF_EBADTEXT;
     674                 :             }
     675                 :         }
     676                 :     }
     677                 : 
     678                 :   /* Compute new final length */
     679               6 :   new_length = worst_length - n_out;
     680                 : 
     681                 :   /* Remove from the length the bytes related to the 4-byte NUL UTF-32 char */
     682               6 :   if((new_data[new_length-1] == '\0') && \
     683                 :      (new_data[new_length-2] == '\0') && \
     684                 :      (new_data[new_length-3] == '\0') && \
     685                 :      (new_data[new_length-4] == '\0'))
     686                 :     {
     687               0 :       new_length -= 4;
     688                 :     }
     689                 : 
     690                 :   /* Finally, reset the buffer length to its correct size */
     691               6 :   if(new_length != worst_length)
     692                 :     {
     693               6 :       new_data = (pdf_char_t *)pdf_realloc(new_data,new_length);
     694               6 :       if(new_data == NULL)
     695                 :         {
     696               0 :           return PDF_ENOMEM;
     697                 :         }
     698                 :     }
     699                 : 
     700                 :   /* And set the output values within pdf_text_t */
     701               6 :   *p_output_data = new_data;
     702               6 :   *p_output_length = new_length;
     703                 : 
     704               6 :   iconv_close(from_host);
     705               6 :   return PDF_OK;
     706                 : }
     707                 : #endif
     708                 : 
     709                 : 
     710                 : pdf_status_t
     711                 : pdf_text_host_to_utf32he(const pdf_char_t      *input_data,
     712                 :                          const pdf_size_t      input_length,
     713                 :                          const pdf_text_host_encoding_t enc,
     714                 :                          pdf_char_t            **p_output_data,
     715                 :                          pdf_size_t            *p_output_length)
     716              10 : {
     717                 : #ifdef PDF_HOST_WIN32
     718                 :   return pdf_text_host_to_utf32he_win32(input_data,
     719                 :                                         input_length,
     720                 :                                         enc,
     721                 :                                         p_output_data,
     722                 :                                         p_output_length);
     723                 : #else
     724              10 :   return pdf_text_host_to_utf32he_iconv(input_data,
     725                 :                                         input_length,
     726                 :                                         enc,
     727                 :                                         p_output_data,
     728                 :                                         p_output_length);
     729                 : #endif
     730                 : }
     731                 : 
     732                 : 
     733                 : 
     734                 : /* End of pdf-text-host-encoding.c */

Generated by: LTP GCOV extension version 1.6