LTP GCOV extension - code coverage report
Current view: directory - src/base - pdf-token-reader.c
Test: libgnupdf.info
Date: 2010-07-31 Instrumented lines: 393
Code covered: 86.5 % Executed lines: 340

       1                 : /* -*- mode: C -*-
       2                 :  *
       3                 :  *       File:         pdf-token-reader.c
       4                 :  *       Date:         Mon Dec 29 00:45:09 2008
       5                 :  *
       6                 :  *       GNU PDF Library - Stream tokeniser
       7                 :  *
       8                 :  */
       9                 : 
      10                 : /* Copyright (C) 2008, 2009 Free Software Foundation, Inc. */
      11                 : 
      12                 : /* This program is free software: you can redistribute it and/or modify
      13                 :  * it under the terms of the GNU General Public License as published by
      14                 :  * the Free Software Foundation, either version 3 of the License, or
      15                 :  * (at your option) any later version.
      16                 :  *
      17                 :  * This program is distributed in the hope that it will be useful,
      18                 :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19                 :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      20                 :  * GNU General Public License for more details.
      21                 :  *
      22                 :  * You should have received a copy of the GNU General Public License
      23                 :  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
      24                 :  */
      25                 : 
      26                 : #include <config.h>
      27                 : 
      28                 : #include <assert.h>
      29                 : #include <stdlib.h>
      30                 : #include <string.h>
      31                 : #include <locale.h>
      32                 : 
      33                 : #include <pdf-token-reader.h>
      34                 : 
      35                 : static INLINE pdf_status_t store_char (pdf_token_reader_t reader,
      36                 :                                        pdf_char_t ch);
      37                 : static INLINE pdf_status_t store_char_grow (pdf_token_reader_t reader,
      38                 :                                             pdf_char_t ch);
      39                 : static pdf_status_t exit_state (pdf_token_reader_t reader, pdf_u32_t flags,
      40                 :                                 pdf_token_t *token);
      41                 : static INLINE pdf_status_t enter_state (pdf_token_reader_t reader,
      42                 :                                         enum pdf_token_reader_state_e state);
      43                 : static pdf_status_t flush_token (pdf_token_reader_t reader, pdf_u32_t flags,
      44                 :                                  pdf_token_t *token);
      45                 : static pdf_status_t handle_char (pdf_token_reader_t reader, pdf_u32_t flags,
      46                 :                                  pdf_char_t ch, pdf_token_t *token);
      47                 : static INLINE pdf_status_t handle_string_char (pdf_token_reader_t reader,
      48                 :                                                pdf_u32_t flags,
      49                 :                                                pdf_char_t ch,
      50                 :                                                pdf_token_t *token);
      51                 : static INLINE pdf_status_t handle_hexstring_char (pdf_token_reader_t reader,
      52                 :                                                   pdf_u32_t flags,
      53                 :                                                   pdf_char_t ch,
      54                 :                                                   pdf_token_t *token);
      55                 : static int recognise_number (pdf_buffer_t buffer, int *int_value);
      56                 : static INLINE int parse_integer (pdf_buffer_t buffer, int *int_value,
      57                 :                                  int *int_state);
      58                 : static INLINE pdf_status_t parse_real (pdf_buffer_t buffer,
      59                 :                                        char *locale_dec_pt,
      60                 :                                        double *value);
      61                 : static INLINE int validate_real (pdf_buffer_t buffer, int int_state);
      62                 : 
      63                 : 
      64                 : pdf_status_t
      65                 : pdf_token_reader_new (pdf_stm_t stm, pdf_token_reader_t *reader)
      66             701 : {
      67                 :   pdf_status_t err;
      68                 :   pdf_token_reader_t new_tokr;
      69                 : 
      70             701 :   err = PDF_ENOMEM;
      71             701 :   new_tokr = pdf_alloc (sizeof (*new_tokr));
      72             701 :   if (!new_tokr)
      73               0 :     goto fail;
      74                 : 
      75             701 :   new_tokr->beg_pos = 0;
      76             701 :   new_tokr->state_pos = 0;
      77                 : 
      78                 :   /* determine the current locale's decimal point
      79                 :    * (avoid using localeconv since it may not be thread-safe) */
      80             701 :   new_tokr->decimal_point = NULL;
      81                 :   {
      82                 :     int len;
      83                 :     char decpt[16];
      84                 : 
      85             701 :     err = PDF_ERROR;
      86             701 :     len = snprintf (decpt, sizeof (decpt), "%#.0f", 1.0);
      87             701 :     if (len <= 0 || (pdf_size_t)len >= sizeof (decpt))  /* shouldn't happen */
      88               0 :       goto fail;
      89                 : 
      90             701 :     err = PDF_ENOMEM;
      91             701 :     new_tokr->decimal_point = pdf_alloc (len);
      92             701 :     if (!new_tokr->decimal_point)
      93               0 :       goto fail;
      94                 : 
      95                 :     /* this copies the trailing '\0' due to the starting offset */
      96             701 :     memcpy (new_tokr->decimal_point, &decpt[1], len);
      97                 :   }
      98                 : 
      99                 :   /* buffer_size_min is the default buffer size, which is also the maximum
     100                 :    * size for keywords, names, numbers, etc.; strings and comments will
     101                 :    * enlarge the buffer to whatever size is needed. */
     102             701 :   new_tokr->buffer_size_min = 32768;
     103             701 :   new_tokr->buffer = pdf_buffer_new (new_tokr->buffer_size_min);
     104             701 :   if (!new_tokr->buffer)
     105               0 :     goto fail;
     106                 : 
     107             701 :   new_tokr->stream = stm;
     108             701 :   pdf_token_reader_reset (new_tokr);
     109                 : 
     110             701 :   *reader = new_tokr;
     111             701 :   return PDF_OK;
     112                 : 
     113               0 : fail:
     114               0 :   if (new_tokr)
     115                 :     {
     116               0 :       if (new_tokr->decimal_point)
     117               0 :         pdf_dealloc (new_tokr->decimal_point);
     118               0 :       pdf_dealloc (new_tokr);
     119                 :     }
     120                 : 
     121               0 :   return err;
     122                 : }
     123                 : 
     124                 : static void
     125                 : reset_buffer (pdf_token_reader_t reader)
     126                 : {
     127            2328 :   reader->buffer->wp = 0;
     128            2328 :   if (reader->buffer->size > reader->buffer_size_min)
     129                 :     {
     130                 :       /* Try to shrink the buffer, but don't worry if it fails. */
     131               1 :       pdf_buffer_resize (reader->buffer, reader->buffer_size_min);
     132                 :     }
     133                 : }
     134                 : 
     135                 : pdf_status_t
     136                 : pdf_token_reader_reset (pdf_token_reader_t reader)
     137             701 : {
     138                 :   enter_state (reader, PDF_TOKR_STATE_NONE);
     139             701 :   reader->substate = 0;
     140                 :   reset_buffer (reader);
     141             701 :   return PDF_OK;
     142                 : }
     143                 : 
     144                 : pdf_status_t
     145                 : pdf_token_reader_destroy (pdf_token_reader_t reader)
     146             516 : {
     147             516 :   if (!reader) return PDF_EBADDATA;
     148                 : 
     149             516 :   assert (reader->buffer);
     150             516 :   if (reader->buffer)
     151             516 :     pdf_buffer_destroy (reader->buffer);
     152             516 :   pdf_dealloc (reader->decimal_point);
     153             516 :   pdf_dealloc (reader);
     154                 : 
     155             516 :   return PDF_OK;
     156                 : }
     157                 : 
     158                 : static INLINE pdf_char_t
     159                 : hexval (pdf_char_t ch)
     160                 : {
     161             544 :   if (ch >= 48 && ch <= 48+9)  /* '0'--'9' */
     162             347 :     return ch - 48;
     163             197 :   if (ch >= 64+1 && ch <= 64+6)  /* 'A'--'F' */
     164               1 :     return ch - (64+1) + 10;
     165             196 :   if (ch >= 96+1 && ch <= 96+6)  /* 'a'--'f' */
     166             196 :     return ch - (96+1) + 10;
     167               0 :   return 255;
     168                 : }
     169                 : 
     170                 : 
     171                 : /* Tries to handle the given character and possibly produce a token.
     172                 :  * Sets (*token) if a token is produced, and leaves it unmodified otherwise.
     173                 :  *
     174                 :  * Returns PDF_OK if the character was accepted. Otherwise, an error code
     175                 :  * is returned, and the call can be repeated later with the same ch value.
     176                 :  * A token may be produced even if the character isn't accepted.
     177                 :  */
     178                 : static pdf_status_t
     179                 : handle_char (pdf_token_reader_t reader, pdf_u32_t flags,
     180                 :              pdf_char_t ch, pdf_token_t *token)
     181           49254 : {
     182                 :   pdf_status_t rv;
     183                 : 
     184                 :   /* first, handle the states that shouldn't be exited when whitespace
     185                 :    * or a delimiter is seen */
     186                 : 
     187           49254 :   switch (reader->state)
     188                 :     {
     189                 :     case PDF_TOKR_STATE_EOF:
     190               1 :       return PDF_EEOF;
     191                 : 
     192                 :     case PDF_TOKR_STATE_STRING:
     193           84256 :       return handle_string_char (reader, flags, ch, token);
     194                 : 
     195                 :     case PDF_TOKR_STATE_HEXSTRING:
     196              92 :       return handle_hexstring_char (reader, flags, ch, token);
     197                 : 
     198                 :     case PDF_TOKR_STATE_DICTEND:
     199               1 :       if (ch != 62)  /* '>' */
     200               0 :         return PDF_EBADFILE;
     201               1 :       reader->substate = 1;  /* saw the closing '>' */
     202               1 :       return exit_state (reader, flags, token);
     203                 : 
     204                 :     case PDF_TOKR_STATE_COMMENT:
     205              30 :       if (pdf_is_eol_char (ch))
     206                 :         {
     207               3 :           rv = exit_state (reader, flags, token);
     208               3 :           if (rv != PDF_OK)
     209               0 :             return rv;
     210                 : 
     211                 :           /* don't accept this character, but process it next time */
     212               3 :           return PDF_EAGAIN;
     213                 :         }
     214                 : 
     215              12 :       if (!(flags & PDF_TOKEN_RET_COMMENTS))
     216              12 :         reader->substate = 1;
     217              12 :       if (reader->substate == 1)
     218              12 :         return PDF_OK;  /* we don't care about this comment */
     219                 : 
     220               0 :       return store_char_grow (reader, ch);
     221                 : 
     222                 :     default: ;
     223                 :     }
     224                 : 
     225                 :   /* now handle delimiters and whitespace */
     226                 : 
     227           14126 :   if (pdf_is_wspace_char (ch))
     228                 :     {
     229            2571 :       if (reader->state)
     230                 :         {
     231            1403 :           rv = exit_state (reader, flags, token);
     232            1403 :           if (rv != PDF_OK)
     233               0 :             return rv;
     234                 : 
     235                 :           /* avoid reading this byte so PDF_TOKEN_END_AT_STREAM
     236                 :            * will work properly if it's '\r' */
     237            1403 :           return PDF_EAGAIN;
     238                 :         }
     239                 : 
     240            1168 :       if ((flags & PDF_TOKEN_END_AT_STREAM) && ch == 10)  /* LF */
     241                 :         {
     242                 :           /* found the beginning of a stream */
     243                 :           enter_state (reader, PDF_TOKR_STATE_EOF);
     244                 :         }
     245            1168 :       return PDF_OK;
     246                 :     }
     247            4492 :   else if ((flags & PDF_TOKEN_END_AT_STREAM) && ch != 37)  /* 37=='%' */
     248                 :     {
     249                 :       /* only allow whitespace/comments after the "stream" keyword */
     250               0 :       return PDF_EBADFILE;
     251                 :     }
     252                 : 
     253            8984 :   if (pdf_is_delim_char (ch))
     254                 :     {
     255                 :       /* set state 0 (UNINIT), substate 0, bufpos 0 */
     256             987 :       if (reader->state)
     257                 :         {
     258              29 :           rv = exit_state (reader, flags, token);
     259              29 :           if (rv != PDF_OK)
     260               0 :             return rv;
     261              29 :           return PDF_EAGAIN;
     262                 :         }
     263                 : 
     264             958 :       switch (ch)
     265                 :         {
     266                 :         case 37:  /* '%' */
     267                 :           enter_state (reader, PDF_TOKR_STATE_COMMENT);
     268               3 :           return PDF_OK;
     269                 :         case 40:  /* '(' */
     270                 :           enter_state (reader, PDF_TOKR_STATE_STRING);
     271               7 :           reader->intparam = 0;
     272               7 :           return PDF_OK;
     273                 :         case 41:  /* ')' */
     274                 :           /* this shouldn't occur outside the STRING and COMMENT states */
     275               0 :           return PDF_EBADFILE;
     276                 :         case 47:  /* '/' */
     277                 :           enter_state (reader, PDF_TOKR_STATE_NAME);
     278             512 :           return PDF_OK;
     279                 :         case 60:  /* '<' */
     280                 :           enter_state (reader, PDF_TOKR_STATE_HEXSTRING);
     281               4 :           return PDF_OK;
     282                 :         case 62:  /* '>' */
     283                 :           enter_state (reader, PDF_TOKR_STATE_DICTEND);
     284               1 :           return PDF_OK;
     285                 :         case 91:  /* '[' */
     286                 :           /* fall through */
     287                 :         case 93:  /* ']' */
     288                 :           /* fall through */
     289                 :         case 123: /* '{' */
     290                 :           /* fall through */
     291                 :         case 125: /* '}' */
     292                 :           /* exit_state may have emitted a token, so we can't emit another
     293                 :            * one now; we'll do it when exiting the PENDING state */
     294                 :           enter_state (reader, PDF_TOKR_STATE_PENDING);
     295             431 :           reader->charparam = ch;
     296             431 :           return PDF_OK;
     297                 :         }
     298                 : 
     299                 :       /* not reached (all delimiter chars should be handled) */
     300               0 :       assert (0);
     301                 :     }
     302                 : 
     303                 :   /* ch is a regular character */
     304                 : 
     305            3505 :   switch (reader->state)
     306                 :     {
     307                 :     case PDF_TOKR_STATE_PENDING:
     308               1 :       rv = exit_state (reader, flags, token);
     309               1 :       if (rv != PDF_OK)
     310               0 :         return rv;
     311               1 :       return PDF_EAGAIN;
     312                 : 
     313                 :     case PDF_TOKR_STATE_NONE:
     314                 :       enter_state (reader, PDF_TOKR_STATE_KEYWORD);
     315                 :       /* fall through */
     316                 : 
     317                 :     case PDF_TOKR_STATE_KEYWORD:
     318                 :       /* Note: numbers are treated as keywords until flush_token is called. */
     319            4944 :       return store_char (reader, ch);
     320                 : 
     321                 :     case PDF_TOKR_STATE_NAME:
     322            1032 :       if (reader->substate == 0)
     323                 :         {
     324             518 :           if ((ch < 0x21) || (ch > 0x7e))
     325                 :             {
     326                 :               /* Invalid character in a name.  */
     327             156 :               return PDF_EBADFILE;
     328                 :             }
     329                 : 
     330             362 :           if (ch != 35  /* '#' */
     331                 :               || (flags & PDF_TOKEN_NO_NAME_ESCAPES) )
     332             210 :             return store_char (reader, ch);
     333                 : 
     334             257 :           reader->substate = 1;
     335             257 :           return PDF_OK;
     336                 :         }
     337                 : 
     338            1028 :       if ( (ch = hexval (ch)) >= 16 )
     339               0 :         return PDF_EBADFILE;
     340                 : 
     341             514 :       if (reader->substate == 1)  /* the first hex digit of an escape */
     342                 :         {
     343             257 :           reader->substate = 2;
     344             257 :           reader->charparam = ch;
     345             257 :           return PDF_OK;
     346                 :         }
     347                 : 
     348             257 :       ch = (reader->charparam << 4) | ch;
     349             257 :       if (ch == 0)  /* the PDF spec forbids "#00" */
     350               0 :         return PDF_EBADFILE;
     351                 : 
     352             514 :       rv = store_char (reader, ch);
     353             257 :       if (rv == PDF_OK) reader->substate = 0;
     354             257 :       return rv;
     355                 : 
     356                 :     default:
     357               0 :       assert (0);
     358                 :       return PDF_ERROR;
     359                 :   }
     360                 : }
     361                 : 
     362                 : 
     363                 : static INLINE int
     364                 : can_store_char (const pdf_token_reader_t reader)
     365                 : {
     366           44943 :   return reader->buffer->wp < reader->buffer->size;
     367                 : }
     368                 : 
     369                 : static pdf_status_t
     370                 : enlarge_buffer (pdf_token_reader_t reader)
     371                 : {
     372               1 :   pdf_size_t size = reader->buffer->size, newsize = size * 2;
     373               1 :   if (newsize < size)
     374               0 :     return PDF_EIMPLLIMIT;
     375                 : 
     376               1 :   return pdf_buffer_resize (reader->buffer, newsize);
     377                 : }
     378                 : 
     379                 : static INLINE pdf_status_t
     380                 : store_char (pdf_token_reader_t reader, pdf_char_t ch)
     381                 : {
     382            2834 :   if (!can_store_char (reader))
     383               0 :     return PDF_EIMPLLIMIT;
     384            2834 :   reader->buffer->data[reader->buffer->wp++] = ch;
     385            2834 :   return PDF_OK;
     386                 : }
     387                 : 
     388                 : static INLINE pdf_status_t
     389                 : store_char_grow (pdf_token_reader_t reader, pdf_char_t ch)
     390           42109 : {
     391           42109 :   if (!can_store_char (reader))
     392                 :     {
     393               1 :       pdf_status_t rv = enlarge_buffer(reader);
     394               1 :       if (rv != PDF_OK)
     395               0 :         return rv;
     396                 :     }
     397           42109 :   reader->buffer->data[reader->buffer->wp++] = ch;
     398           42109 :   return PDF_OK;
     399                 : }
     400                 : 
     401                 : static INLINE pdf_status_t
     402                 : enter_state (pdf_token_reader_t reader,
     403                 :              enum pdf_token_reader_state_e state)
     404                 : {
     405            2485 :   reader->state = state;
     406            2485 :   reader->state_pos = pdf_stm_tell (reader->stream);
     407                 : 
     408            2485 :   return PDF_OK;
     409                 : }
     410                 : 
     411                 : static pdf_status_t
     412                 : flush_token (pdf_token_reader_t reader, pdf_u32_t flags, pdf_token_t *token)
     413            1806 : {
     414                 :   pdf_status_t rv;
     415                 :   pdf_token_t new_tok;
     416            1806 :   pdf_char_t *data = reader->buffer->data;
     417            1806 :   int datasize = reader->buffer->wp;
     418                 : 
     419            1806 :   switch (reader->state)
     420                 :     {
     421                 :     case PDF_TOKR_STATE_NONE:
     422               3 :       return PDF_OK;  /* no state to exit */
     423                 : 
     424                 :     case PDF_TOKR_STATE_EOF:
     425             176 :       return PDF_EEOF;  /* can't continue parsing after EOF */
     426                 : 
     427                 :     case PDF_TOKR_STATE_COMMENT:
     428               3 :       if ((reader->substate == 1) || !(flags & PDF_TOKEN_RET_COMMENTS))
     429                 :         goto finish;  /* don't return a token */
     430                 : 
     431               0 :       rv = pdf_token_comment_new (data, datasize, &new_tok);
     432               0 :       break;
     433                 : 
     434                 :     case PDF_TOKR_STATE_KEYWORD:
     435                 :       {
     436                 :         int value;
     437            1650 :         int ntyp = recognise_number (reader->buffer, &value);
     438             825 :         if (ntyp == 1)
     439             173 :           rv = pdf_token_integer_new (value, &new_tok);
     440             652 :         else if (ntyp == 2)
     441                 :           {
     442                 :             double realvalue;
     443              26 :             rv = parse_real (reader->buffer,
     444                 :                              reader->decimal_point,
     445                 :                              &realvalue);
     446              13 :             if (rv != PDF_OK)
     447               0 :               return rv;
     448              13 :             rv = pdf_token_real_new ((float)realvalue, &new_tok);
     449                 :           }
     450                 :         else
     451             639 :           rv = pdf_token_keyword_new (data, datasize, &new_tok);
     452                 :       }
     453                 :       break;
     454                 : 
     455                 :     case PDF_TOKR_STATE_NAME:
     456             356 :       if (reader->substate != 0)  /* reading an escape sequence */
     457               0 :         return PDF_EBADFILE;
     458                 : 
     459             356 :       rv = pdf_token_name_new (data, datasize, &new_tok);
     460             356 :       break;
     461                 : 
     462                 :     case PDF_TOKR_STATE_STRING:
     463               7 :       if (reader->intparam >= 0)  /* didn't see the closing ')' */
     464               0 :         return PDF_EBADFILE;
     465                 : 
     466               7 :       rv = pdf_token_string_new (data, datasize, &new_tok);
     467               7 :       break;
     468                 : 
     469                 :     case PDF_TOKR_STATE_HEXSTRING:
     470               3 :       if (reader->substate != 3)  /* didn't see the closing '>' */
     471               0 :         return PDF_EBADFILE;
     472                 : 
     473               3 :       rv = pdf_token_string_new (data, datasize, &new_tok);
     474               3 :       break;
     475                 : 
     476                 :     case PDF_TOKR_STATE_DICTEND:
     477               1 :       if (reader->substate != 1)  /* didn't see a second '>' */
     478               0 :         return PDF_EBADFILE;
     479                 : 
     480               1 :       rv = pdf_token_valueless_new (PDF_TOKEN_DICT_END, &new_tok);
     481               1 :       break;
     482                 : 
     483                 :     case PDF_TOKR_STATE_PENDING:
     484             432 :       switch (reader->charparam)
     485                 :         {
     486                 :         case 60:  /* '<' */
     487               1 :           rv = pdf_token_valueless_new (PDF_TOKEN_DICT_START, &new_tok);
     488               1 :           break;
     489                 :         case 91:  /* '[' */
     490               1 :           rv = pdf_token_valueless_new (PDF_TOKEN_ARRAY_START, &new_tok);
     491               1 :           break;
     492                 :         case 93:  /* ']' */
     493               1 :           rv = pdf_token_valueless_new (PDF_TOKEN_ARRAY_END, &new_tok);
     494               1 :           break;
     495                 :         case 123: /* '{' */
     496             215 :           rv = pdf_token_valueless_new (PDF_TOKEN_PROC_START, &new_tok);
     497             215 :           break;
     498                 :         case 125: /* '}' */
     499             214 :           rv = pdf_token_valueless_new (PDF_TOKEN_PROC_END, &new_tok);
     500             214 :           break;
     501                 :         default:
     502               0 :           assert (0);
     503                 :           return PDF_ERROR;
     504                 :         }
     505                 :       break;
     506                 : 
     507                 :     default:
     508               0 :       assert (0);
     509                 :       return PDF_ERROR;
     510                 :     }
     511                 : 
     512            1624 :   if (rv != PDF_OK)
     513               0 :     return rv;
     514                 : 
     515            1624 :   *token = new_tok;
     516                 : 
     517                 :   /* Set the beginning position of this state */
     518            1624 :   reader->beg_pos = reader->state_pos;
     519                 : 
     520            1627 : finish:
     521                 :   reset_buffer (reader);
     522            1627 :   return PDF_OK;
     523                 : }
     524                 : 
     525                 : 
     526                 : static pdf_status_t
     527                 : exit_state (pdf_token_reader_t reader, pdf_u32_t flags, pdf_token_t *token)
     528            1806 : {
     529            1806 :   pdf_status_t rv = flush_token (reader, flags, token);
     530            1806 :   if (rv == PDF_OK)
     531                 :     {
     532            1630 :       reader->state = PDF_TOKR_STATE_NONE;
     533            1630 :       reader->substate = 0;
     534                 :     }
     535            1806 :   return rv;
     536                 : }
     537                 : 
     538                 : 
     539                 : static INLINE pdf_status_t
     540                 : handle_string_char (pdf_token_reader_t reader,
     541                 :                     pdf_u32_t flags,
     542                 :                     pdf_char_t ch,
     543                 :                     pdf_token_t *token)
     544                 : {
     545                 :   pdf_status_t rv;
     546           42129 : start:
     547           42129 :   switch (reader->substate)
     548                 :     {
     549                 :       case 1:  /* ignore LF */
     550               6 :         reader->substate = 0;
     551               6 :         if (ch == 10)
     552               3 :           return PDF_OK;
     553                 :         /* fall through */
     554                 : 
     555                 :       case 0:  /* no special state */
     556                 :         {
     557           42104 :           if (ch == 92)  /* '\\' */
     558                 :             {
     559              18 :               reader->substate = 2;  /* start an escape sequence */
     560              18 :               return PDF_OK;
     561                 :             }
     562           42086 :           else if (ch == 41 && reader->intparam <= 0)  /* ')'; end of string */
     563                 :             {
     564               7 :               reader->intparam = -1;
     565               7 :               return exit_state (reader, flags, token);
     566                 :             }
     567                 : 
     568           42079 :           pdf_bool_t wasCR = (ch == 13);
     569           42079 :           if (wasCR)
     570               3 :             ch = 10;  /* treat as LF */
     571           42079 :           rv = store_char_grow (reader, ch);
     572                 : 
     573           42079 :           if (rv == PDF_OK)
     574                 :             {
     575           42079 :               if (wasCR)  /* '\r' */
     576               3 :                 reader->substate = 1;  /* ignore the next char if it's LF */
     577           42076 :               else if (ch == 40)  /* '(' */
     578               2 :                 ++reader->intparam;
     579           42074 :               else if (ch == 41)  /* ')' */
     580               2 :                 --reader->intparam;
     581                 :             }
     582                 : 
     583           42079 :           return rv;
     584                 :         }
     585                 : 
     586                 :       case 2:  /* just saw a '\\' (starting an escape sequence) */
     587              18 :         reader->substate = 0;
     588              18 :         if (ch == 98)  /* 'b' */
     589               1 :           ch = 8;  /* BS: backspace */
     590              17 :         else if (ch == 102)  /* 'f' */
     591               1 :           ch = 12;  /* FF: formfeed */
     592              16 :         else if (ch == 110)  /* 'n' */
     593               1 :           ch = 10;  /* NL: newline */
     594              15 :         else if (ch == 114)  /* 'r' */
     595               1 :           ch = 13;  /* CR: carriage return */
     596              14 :         else if (ch == 116)  /* 't' */
     597               1 :           ch = 9;  /* HT: horizontal tab */
     598              13 :         else if (ch == 10)  /* NL */
     599               1 :           return PDF_OK;  /* ignore the line break */
     600              12 :         else if (ch == 13)  /* CR */
     601                 :         {
     602                 :           /* ignore the line break; also ignore the next byte if it's LF */
     603               3 :           reader->substate = 1;
     604               3 :           return PDF_OK;
     605                 :         }
     606               9 :         else if (ch >= 48 && ch <= 48+7)  /* digits '0'--'7' */
     607                 :           {
     608                 :             /* starting an octal escape; we'll read three digits even if the
     609                 :              * first is '4'--'7' (and calculate the final char modulo 256),
     610                 :              * since the PDF/PS specs say to ignore high-order overflow */
     611               2 :             reader->substate = 3;
     612               2 :             reader->charparam = (ch-48);
     613               2 :             return PDF_OK;
     614                 :           }
     615                 : 
     616                 :         /* for any other character, including '(', ')', and '\\',
     617                 :          * store the same character (dropping the leading backslash) */
     618              12 :         return store_char_grow (reader, ch);
     619                 : 
     620                 :       case 3:  /* saw 1 digit of an octal escape */
     621                 :         /* fall through */
     622                 :       case 4:  /* saw 2 digits of an octal escape */
     623               4 :         if (ch < 48 || ch > 48+7)  /* not digits '0'--'7' */
     624                 :           {
     625               1 :             rv = store_char_grow (reader, reader->charparam);
     626               1 :             if (rv != PDF_OK) return rv;
     627                 : 
     628                 :             /* ch isn't part of the escape sequence, so retry */
     629               1 :             reader->substate = 0;
     630                 :             goto start;
     631                 :           }
     632                 : 
     633                 :         /* ch is a digit from '0'--'7' */
     634               3 :         reader->charparam = ((reader->charparam & 0x1f) << 3) | (ch - 48);
     635               3 :         if (reader->substate == 4)  /* this was the final digit */
     636                 :           {
     637               1 :             rv = store_char_grow (reader, reader->charparam);
     638               1 :             if (rv != PDF_OK) return rv;
     639                 : 
     640               1 :             reader->substate = 0;
     641               1 :             return PDF_OK;
     642                 :           }
     643                 : 
     644               2 :         reader->substate = 4;
     645               2 :         return PDF_OK;
     646                 : 
     647                 :       default:
     648               0 :         assert (0);
     649                 :     }
     650                 : }
     651                 : 
     652                 : 
     653                 : static INLINE pdf_status_t
     654                 : handle_hexstring_char (pdf_token_reader_t reader,
     655                 :                        pdf_u32_t flags,
     656                 :                        pdf_char_t ch,
     657                 :                        pdf_token_t *token)
     658                 : {
     659                 :   pdf_status_t rv;
     660                 : 
     661              46 :   if (reader->substate == 0)
     662                 :     {
     663                 :       /* this is the first character after the initial '<' */
     664               4 :       if (ch == 60)  /* '<' */
     665                 :         {
     666                 :           /* this was actually the start of a dictionary */
     667               1 :           reader->state = PDF_TOKR_STATE_PENDING;
     668               1 :           reader->charparam = ch;
     669               1 :           return exit_state (reader, flags, token);
     670                 :         }
     671                 : 
     672               3 :       reader->substate = 1;
     673                 :     }
     674                 : 
     675              90 :   if (pdf_is_wspace_char (ch))
     676              12 :     return PDF_OK;
     677                 : 
     678              33 :   if (ch == 62)  /* '>': end of hex string */
     679                 :     {
     680               3 :       if (reader->substate == 2)
     681                 :         {
     682                 :           /* the last digit is missing; assume it's '0' */
     683               2 :           rv = store_char_grow (reader, reader->charparam << 4);
     684               2 :           if (rv != PDF_OK) return rv;
     685                 :         }
     686                 : 
     687               3 :       reader->substate = 3;  /* saw end of string */
     688               3 :       return exit_state (reader, flags, token);
     689                 :     }
     690                 : 
     691              60 :   if ( (ch = hexval (ch)) == 255 )
     692               0 :     return PDF_EBADFILE;
     693                 : 
     694              30 :   if (reader->substate == 1)  /* first character in a pair */
     695                 :     {
     696              16 :       reader->substate = 2;
     697              16 :       reader->charparam = ch;
     698              16 :       return PDF_OK;
     699                 :     }
     700                 : 
     701              14 :   rv = store_char_grow (reader, (reader->charparam << 4) | ch);
     702              14 :   if (rv == PDF_OK)
     703              14 :     reader->substate = 1;
     704              14 :   return rv;
     705                 : }
     706                 : 
     707                 : pdf_status_t
     708                 : pdf_token_read (pdf_token_reader_t reader, pdf_u32_t flags, pdf_token_t *token)
     709            1960 : {
     710                 :   pdf_status_t rv;
     711                 :   pdf_char_t ch;
     712            1960 :   pdf_token_t new_token = NULL;
     713                 : 
     714            1960 :   if (!reader || !reader->stream || !token)
     715               0 :     return PDF_EBADDATA;
     716                 : 
     717           49612 :   while ( (rv = pdf_stm_peek_char (reader->stream, &ch)) == PDF_OK )
     718                 :     {
     719           49254 :       rv = handle_char (reader, flags, ch, &new_token);
     720           49254 :       if (rv == PDF_OK)
     721                 :         {
     722                 :           /* The character we peeked at was accepted, so get rid of it. */
     723           47661 :           pdf_stm_read_char (reader->stream, &ch);
     724                 :         }
     725                 : 
     726           49254 :       if (new_token)
     727                 :         {
     728                 :           /* Don't return an error code if we got a valid token.
     729                 :            * We'll probably see the same error on the next call since we
     730                 :            * didn't call read_char. */
     731            1445 :           assert (rv == PDF_OK || rv == PDF_EAGAIN);
     732                 :           goto ret_token;
     733                 :         }
     734           47809 :       else if (rv != PDF_OK && rv != PDF_EAGAIN)
     735             157 :         return rv;
     736                 :     }
     737                 : 
     738                 :   /* peek_char returned an error code (rv) */
     739             358 :   if (rv != PDF_EEOF)
     740               0 :     return rv;
     741                 : 
     742             358 :   rv = exit_state (reader, flags, &new_token);
     743             358 :   if (rv != PDF_OK)
     744             176 :     return rv;
     745                 : 
     746             182 :   reader->state = PDF_TOKR_STATE_EOF;
     747             182 :   if (new_token)
     748             179 :     goto ret_token;
     749                 :   else
     750               3 :     return PDF_EEOF;
     751                 : 
     752            1624 : ret_token:
     753            1624 :   assert (new_token);
     754            1624 :   *token = new_token;
     755            1624 :   return PDF_OK;
     756                 : }
     757                 : 
     758                 : pdf_size_t
     759                 : pdf_token_reader_begin_pos (pdf_token_reader_t reader)
     760            1422 : {
     761            1422 :   return reader->beg_pos;
     762                 : }
     763                 : 
     764                 : static INLINE int
     765                 : parse_integer (pdf_buffer_t buffer, int *int_value, int *int_state)
     766                 : {
     767                 :   /* Parse an ASCII integer with the given radix, at the beginning of
     768                 :    * the buffer (possibly leaving unread bytes at the end).
     769                 :    *
     770                 :    * Return value is 0 on failure, or a bitmask otherwise:
     771                 :    *  1 = valid integer
     772                 :    *  2 = signed
     773                 :    *  4 = overflowed (no value stored in *int_value)
     774                 :    */
     775                 : 
     776             825 :   int sign = 0, tmpint = 0, overflowed = 0, ret;
     777                 :   /* Integer states (int_state):
     778                 :    *   0 = at start (looking for sign or digits)
     779                 :    *   1 = saw sign
     780                 :    *   2 = saw digits
     781                 :    */
     782                 : 
     783             825 :   *int_state = 0;
     784             237 :   for (; buffer->rp < buffer->wp; ++buffer->rp)
     785                 :     {
     786                 :       int chval;
     787             888 :       pdf_char_t ch = buffer->data[buffer->rp];
     788             888 :       if (ch == 43 || ch == 45)  /* '+','-' */
     789                 :         {
     790               0 :           if (*int_state != 0)
     791                 :             goto out;
     792                 : 
     793               0 :           *int_state = 1;
     794               0 :           sign = (ch == 43) ? 1 : -1;
     795                 :           continue;
     796                 :         }
     797                 : 
     798             888 :       chval = ch - 48;  /* assume this is a digit */
     799             888 :       if (chval < 0 || chval > 9)
     800                 :         goto out;  /* not a valid number */
     801                 : 
     802             237 :       *int_state = 2;
     803             237 :       if (overflowed)
     804                 :         continue;
     805                 : 
     806                 :       /* convert the digits to an integer, if possible */
     807             223 :       if (sign < 0)
     808                 :         {
     809               0 :           chval = -chval;
     810               0 :           if ( tmpint < (INT_MIN/10)
     811                 :               || (tmpint == (INT_MIN/10) && chval < (INT_MIN%10)) )
     812                 :             {
     813               0 :               overflowed = 1;  /* would overflow */
     814                 :               continue;
     815                 :             }
     816                 :         }
     817                 :       else
     818                 :         {
     819             223 :           if ( tmpint > (INT_MAX/10)
     820                 :               || (tmpint == (INT_MAX/10) && chval > (INT_MAX%10)) )
     821                 :             {
     822               1 :               overflowed = 1;  /* would overflow */
     823                 :               continue;
     824                 :             }
     825                 :         }
     826                 : 
     827             222 :       tmpint += chval + (tmpint * 9);
     828                 :     }
     829                 : 
     830             825 : out:
     831             825 :   if (*int_state != 2)
     832             639 :     return 0;  /* never saw any digits */
     833                 : 
     834             186 :   ret = 1;
     835             186 :   if (sign) ret += 2;
     836             186 :   if (overflowed)
     837               1 :     ret += 4;
     838                 :   else
     839             185 :     *int_value = tmpint;
     840                 : 
     841             186 :   return ret;
     842                 : }
     843                 : 
     844                 : 
     845                 : static INLINE int
     846                 : validate_real (pdf_buffer_t buffer, int int_state)
     847                 : {
     848                 :   /* Determines whether the given number is a valid PS/PDF real number;
     849                 :    * assumes the initial sign was already read (if present), and any data
     850                 :    * before buffer->rp is a valid integer.
     851                 :    *
     852                 :    * Return value:
     853                 :    *   0 = not a real number
     854                 :    *   1 = valid PDF/PS real
     855                 :    */
     856                 : 
     857             651 :   int seen_point = 0;
     858                 :   /* Integer states (int_state):
     859                 :    *   0 = at start
     860                 :    *   1 = saw sign
     861                 :    *   2 = saw digits
     862                 :    */
     863                 : 
     864              29 :   for (; buffer->rp < buffer->wp; ++buffer->rp)
     865                 :     {
     866             668 :       pdf_char_t ch = buffer->data[buffer->rp];
     867             668 :       if (ch == 46)  /* '.' */
     868                 :         {
     869              12 :           if (!seen_point)
     870              12 :             seen_point = 1;
     871                 :           else
     872               0 :             return 0;
     873                 :         }
     874             656 :       else if (ch == 43 || ch == 45)  /* '+','-' */
     875                 :         {
     876               0 :           if (int_state == 0)
     877               0 :             int_state = 1;
     878                 :           else
     879               0 :             return 0;
     880                 :         }
     881             656 :       else if (ch >= 48+0 && ch <= 48+9)
     882              17 :         int_state = 2;
     883                 :       else
     884             639 :         return 0;
     885                 :     }
     886                 : 
     887              12 :   return (int_state == 2);  /* only valid if we saw a digit */
     888                 : }
     889                 : 
     890                 : 
     891                 : /* Given a buffer containing a validated PDF real (in ASCII), convert it to a
     892                 :  * double by translating it to the execution character set, replacing '.' with
     893                 :  * the locale's decimal point, and calling strtod. */
     894                 : static INLINE pdf_status_t
     895                 : parse_real (pdf_buffer_t buffer, char *locale_dec_pt, double *value)
     896                 : {
     897                 :   pdf_status_t ret;
     898                 :   size_t tmplen, wpos, ptlen;
     899                 :   char *tmp, *endptr;
     900                 : 
     901              13 :   ptlen = strlen (locale_dec_pt);
     902                 :   /* we may remove 1 byte ('.') and replace it with ptlen bytes */
     903              13 :   tmplen = buffer->wp - 1 + ptlen;
     904                 : 
     905              13 :   tmp = pdf_alloc (tmplen + 1);
     906              13 :   if (!tmp)
     907               0 :     return PDF_ENOMEM;
     908                 : 
     909              13 :   wpos = 0;
     910              13 :   ret = PDF_ERROR;  /* nothing should fail if the buffer was validated */
     911              79 :   for (buffer->rp = 0; buffer->rp < buffer->wp; ++buffer->rp)
     912                 :     {
     913              66 :       pdf_char_t ch = buffer->data[buffer->rp];
     914              66 :       if (wpos >= tmplen)
     915                 :         goto out;
     916                 : 
     917              66 :       if (ch == 46)  /* '.' */
     918                 :         {
     919              12 :           if (wpos + ptlen > tmplen)
     920                 :             goto out;
     921                 : 
     922              12 :           memcpy (tmp + wpos, locale_dec_pt, ptlen);
     923              12 :           wpos += ptlen;
     924                 :         }
     925              54 :       else if (ch == 43)  /* '+' */
     926               0 :         tmp[wpos++] = '+';
     927              54 :       else if (ch == 45)  /* '-' */
     928               0 :         tmp[wpos++] = '-';
     929              54 :       else if (ch >= 48+0 && ch <= 48+9)  /* '0'--'9' */
     930              54 :         tmp[wpos++] = '0' + (ch-48);
     931                 :       else
     932                 :         goto out;
     933                 :     }
     934                 : 
     935                 :   /* null-terminate the new string, and call strtod to get its value
     936                 :    * (strtof would also work if it's available) */
     937              13 :   tmp[wpos] = '\0';
     938              13 :   *value = strtod (tmp, &endptr);
     939              13 :   if (endptr == tmp + wpos)
     940              13 :     ret = PDF_OK;
     941                 : 
     942              13 : out:
     943              13 :   pdf_dealloc (tmp);
     944              13 :   return ret;
     945                 : }
     946                 : 
     947                 : 
     948                 : /*
     949                 :  * Return value:
     950                 :  *   0 = not a number
     951                 :  *   1 = integer (stored in *int_value)
     952                 :  *   2 = real
     953                 :  */
     954                 : static int
     955                 : recognise_number (pdf_buffer_t buffer, int *int_value)
     956                 : {
     957             825 :   int rv, tmpint = 0, int_state = 0;
     958                 : 
     959                 :   /* try to parse as an integer */
     960                 : 
     961             825 :   buffer->rp = 0;
     962             825 :   rv = parse_integer (buffer, &tmpint, &int_state);
     963                 : 
     964             825 :   if (buffer->rp < buffer->wp)  /* didn't look at the whole buffer */
     965                 :     {
     966            1302 :       rv = validate_real (buffer, int_state);
     967             651 :       if (rv == 1)
     968              12 :         return 2;
     969                 : 
     970             639 :       return 0;
     971                 :     }
     972                 : 
     973             174 :   if (!rv) return 0;
     974             174 :   else if (rv & 4)
     975               1 :     return 2;  /* integer overflowed, but could be read as a real */
     976                 : 
     977             173 :   *int_value = tmpint;
     978             173 :   return 1;
     979                 : }
     980                 : 
     981                 : 
     982                 : /* End of pdf-token-reader.c */

Generated by: LTP GCOV extension version 1.6