1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include <stdio.h> 25 #include <ctype.h> 26 #include <string.h> 27 #include "cppdef.h" 28 #include "cpp.h" 29 30 /*ER evaluate macros to pDefOut */ 31 32 /* 33 * skipnl() skips over input text to the end of the line. 34 * skipws() skips over "whitespace" (spaces or tabs), but 35 * not skip over the end of the line. It skips over 36 * TOK_SEP, however (though that shouldn't happen). 37 * scanid() reads the next token (C identifier) into token[]. 38 * The caller has already read the first character of 39 * the identifier. Unlike macroid(), the token is 40 * never expanded. 41 * macroid() reads the next token (C identifier) into token[]. 42 * If it is a #defined macro, it is expanded, and 43 * macroid() returns TRUE, otherwise, FALSE. 44 * catenate() Does the dirty work of token concatenation, TRUE if it did. 45 * scanstring() Reads a string from the input stream, calling 46 * a user-supplied function for each character. 47 * This function may be output() to write the 48 * string to the output file, or save() to save 49 * the string in the work buffer. 50 * scannumber() Reads a C numeric constant from the input stream, 51 * calling the user-supplied function for each 52 * character. (output() or save() as noted above.) 53 * save() Save one character in the work[] buffer. 54 * savestring() Saves a string in malloc() memory. 55 * getfile() Initialize a new FILEINFO structure, called when 56 * #include opens a new file, or a macro is to be 57 * expanded. 58 * getmem() Get a specified number of bytes from malloc memory. 59 * output() Write one character to stdout (calling PUTCHAR) -- 60 * implemented as a function so its address may be 61 * passed to scanstring() and scannumber(). 62 * lookid() Scans the next token (identifier) from the input 63 * stream. Looks for it in the #defined symbol table. 64 * Returns a pointer to the definition, if found, or NULL 65 * if not present. The identifier is stored in token[]. 66 * defnedel() Define enter/delete subroutine. Updates the 67 * symbol table. 68 * get() Read the next byte from the current input stream, 69 * handling end of (macro/file) input and embedded 70 * comments appropriately. Note that the global 71 * instring is -- essentially -- a parameter to get(). 72 * cget() Like get(), but skip over TOK_SEP. 73 * unget() Push last gotten character back on the input stream. 74 * cerror(), cwarn(), cfatal(), cierror(), ciwarn() 75 * These routines format an print messages to the user. 76 * cerror & cwarn take a format and a single string argument. 77 * cierror & ciwarn take a format and a single int (char) argument. 78 * cfatal takes a format and a single string argument. 79 */ 80 81 /* 82 * This table must be rewritten for a non-Ascii machine. 83 * 84 * Note that several "non-visible" characters have special meaning: 85 * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion. 86 * Hex 1E TOK_SEP -- a delimiter for token concatenation 87 * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation 88 */ 89 #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D 90 << error type table is not correct >> 91 #endif 92 93 #if OK_DOLLAR 94 #define DOL LET 95 #else 96 #define DOL 000 97 #endif 98 99 #ifdef EBCDIC 100 101 char type[256] = { /* Character type codes Hex */ 102 END, 000, 000, 000, 000, SPA, 000, 000, /* 00 */ 103 000, 000, 000, 000, 000, 000, 000, 000, /* 08 */ 104 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 105 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ 106 000, 000, 000, 000, 000, 000, 000, 000, /* 20 */ 107 000, 000, 000, 000, 000, 000, 000, 000, /* 28 */ 108 000, 000, 000, 000, 000, 000, 000, 000, /* 30 */ 109 000, 000, 000, 000, 000, 000, 000, 000, /* 38 */ 110 SPA, 000, 000, 000, 000, 000, 000, 000, /* 40 */ 111 000, 000, 000, DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48 .<(+| */ 112 OP_AND, 000, 000, 000, 000, 000, 000, 000, /* 50 & */ 113 000, 000,OP_NOT, DOL,OP_MUL,OP_RPA, 000,OP_XOR, /* 58 !$*);^ */ 114 OP_SUB,OP_DIV, 000, 000, 000, 000, 000, 000, /* 60 -/ */ 115 000, 000, 000, 000,OP_MOD, LET, OP_GT,OP_QUE, /* 68 ,%_>? */ 116 000, 000, 000, 000, 000, 000, 000, 000, /* 70 */ 117 000, 000,OP_COL, 000, 000, QUO, OP_EQ, QUO, /* 78 `:#@'=" */ 118 000, LET, LET, LET, LET, LET, LET, LET, /* 80 abcdefg */ 119 LET, LET, 000, 000, 000, 000, 000, 000, /* 88 hi */ 120 000, LET, LET, LET, LET, LET, LET, LET, /* 90 jklmnop */ 121 LET, LET, 000, 000, 000, 000, 000, 000, /* 98 qr */ 122 000,OP_NOT, LET, LET, LET, LET, LET, LET, /* A0 ~stuvwx */ 123 LET, LET, 000, 000, 000, 000, 000, 000, /* A8 yz [ */ 124 000, 000, 000, 000, 000, 000, 000, 000, /* B0 */ 125 000, 000, 000, 000, 000, 000, 000, 000, /* B8 ] */ 126 000, LET, LET, LET, LET, LET, LET, LET, /* C0 {ABCDEFG */ 127 LET, LET, 000, 000, 000, 000, 000, 000, /* C8 HI */ 128 000, LET, LET, LET, LET, LET, LET, LET, /* D0 }JKLMNOP */ 129 LET, LET, 000, 000, 000, 000, 000, 000, /* D8 QR */ 130 BSH, 000, LET, LET, LET, LET, LET, LET, /* E0 \ STUVWX */ 131 LET, LET, 000, 000, 000, 000, 000, 000, /* E8 YZ */ 132 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* F0 01234567 */ 133 DIG, DIG, 000, 000, 000, 000, 000, 000, /* F8 89 */ 134 }; 135 136 #else 137 138 char type[256] = { /* Character type codes Hex */ 139 END, 000, 000, 000, 000, 000, 000, 000, /* 00 */ 140 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */ 141 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ 142 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ 143 SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */ 144 OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */ 145 DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */ 146 DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */ 147 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */ 148 LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */ 149 LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */ 150 LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */ 151 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */ 152 LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */ 153 LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */ 154 LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */ 155 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 156 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 157 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 158 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 159 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 160 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 161 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 162 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ 163 }; 164 165 #endif 166 167 168 /* 169 * C P P S y m b o l T a b l e s 170 */ 171 172 /* 173 * SBSIZE defines the number of hash-table slots for the symbol table. 174 * It must be a power of 2. 175 */ 176 #ifndef SBSIZE 177 #define SBSIZE 64 178 #endif 179 #define SBMASK (SBSIZE - 1) 180 #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1) 181 << error, SBSIZE must be a power of 2 >> 182 #endif 183 184 185 static DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */ 186 187 void InitCpp6() 188 { 189 int i; 190 for( i = 0; i < SBSIZE; i++ ) 191 symtab[ i ] = NULL; 192 } 193 194 195 196 void skipnl() 197 /* 198 * Skip to the end of the current input line. 199 */ 200 { 201 register int c; 202 203 do { /* Skip to newline */ 204 c = get(); 205 } while (c != '\n' && c != EOF_CHAR); 206 } 207 208 int 209 skipws() 210 /* 211 * Skip over whitespace 212 */ 213 { 214 register int c; 215 216 do { /* Skip whitespace */ 217 c = get(); 218 #if COMMENT_INVISIBLE 219 } while (type[c] == SPA || c == COM_SEP); 220 #else 221 } while (type[c] == SPA); 222 #endif 223 return (c); 224 } 225 226 void scanid(int c) 227 /* 228 * Get the next token (an id) into the token buffer. 229 * Note: this code is duplicated in lookid(). 230 * Change one, change both. 231 */ 232 { 233 register char *bp; 234 235 if (c == DEF_MAGIC) /* Eat the magic token */ 236 c = get(); /* undefiner. */ 237 bp = token; 238 do { 239 if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */ 240 *bp++ = (char)c; 241 c = get(); 242 } while (type[c] == LET || type[c] == DIG); 243 unget(); 244 *bp = EOS; 245 } 246 247 int 248 macroid(int c) 249 /* 250 * If c is a letter, scan the id. if it's #defined, expand it and scan 251 * the next character and try again. 252 * 253 * Else, return the character. If type[c] is a LET, the token is in token. 254 */ 255 { 256 register DEFBUF *dp; 257 258 if (infile != NULL && infile->fp != NULL) 259 recursion = 0; 260 while (type[c] == LET && (dp = lookid(c)) != NULL) { 261 expand(dp); 262 c = get(); 263 } 264 return (c); 265 } 266 267 int 268 catenate() 269 /* 270 * A token was just read (via macroid). 271 * If the next character is TOK_SEP, concatenate the next token 272 * return TRUE -- which should recall macroid after refreshing 273 * macroid's argument. If it is not TOK_SEP, unget() the character 274 * and return FALSE. 275 */ 276 { 277 register int c; 278 register char *token1; 279 280 #if OK_CONCAT 281 if (get() != TOK_SEP) { /* Token concatenation */ 282 unget(); 283 return (FALSE); 284 } 285 else { 286 token1 = savestring(token); /* Save first token */ 287 c = macroid(get()); /* Scan next token */ 288 switch(type[c]) { /* What was it? */ 289 case LET: /* An identifier, ... */ 290 if (strlen(token1) + strlen(token) >= NWORK) 291 cfatal("work buffer overflow doing %s #", token1); 292 sprintf(work, "%s%s", token1, token); 293 break; 294 295 case DIG: /* A digit string */ 296 strcpy(work, token1); 297 workp = work + strlen(work); 298 do { 299 save(c); 300 } while ((c = get()) != TOK_SEP); 301 /* 302 * The trailing TOK_SEP is no longer needed. 303 */ 304 save(EOS); 305 break; 306 307 default: /* An error, ... */ 308 #if ! COMMENT_INVISIBLE 309 if (isprint(c)) 310 cierror("Strange character '%c' after #", c); 311 else 312 cierror("Strange character (%d.) after #", c); 313 #endif 314 strcpy(work, token1); 315 unget(); 316 break; 317 } 318 /* 319 * work has the concatenated token and token1 has 320 * the first token (no longer needed). Unget the 321 * new (concatenated) token after freeing token1. 322 * Finally, setup to read the new token. 323 */ 324 free(token1); /* Free up memory */ 325 ungetstring(work); /* Unget the new thing, */ 326 return (TRUE); 327 } 328 #else 329 return (FALSE); /* Not supported */ 330 #endif 331 } 332 333 int 334 scanstring(int delim, 335 #ifndef _NO_PROTO 336 void (*outfun)( int ) /* BP */ /* Output function */ 337 #else 338 void (*outfun)() /* BP */ 339 #endif 340 ) 341 /* 342 * Scan off a string. Warning if terminated by newline or EOF. 343 * outfun() outputs the character -- to a buffer if in a macro. 344 * TRUE if ok, FALSE if error. 345 */ 346 { 347 register int c; 348 349 instring = TRUE; /* Don't strip comments */ 350 (*outfun)(delim); 351 while ((c = get()) != delim 352 && c != '\n' 353 && c != EOF_CHAR) { 354 355 if (c != DEF_MAGIC) 356 (*outfun)(c); 357 if (c == '\\') 358 (*outfun)(get()); 359 } 360 instring = FALSE; 361 if (c == delim) { 362 (*outfun)(c); 363 return (TRUE); 364 } 365 else { 366 cerror("Unterminated string", NULLST); 367 unget(); 368 return (FALSE); 369 } 370 } 371 372 void scannumber(int c, 373 #ifndef _NO_PROTO 374 register void (*outfun)( int ) /* BP */ /* Output/store func */ 375 #else 376 register void (*outfun)() /* BP */ 377 #endif 378 ) 379 /* 380 * Process a number. We know that c is from 0 to 9 or dot. 381 * Algorithm from Dave Conroy's Decus C. 382 */ 383 { 384 register int radix; /* 8, 10, or 16 */ 385 int expseen; /* 'e' seen in floater */ 386 int signseen; /* '+' or '-' seen */ 387 int octal89; /* For bad octal test */ 388 int dotflag; /* TRUE if '.' was seen */ 389 390 expseen = FALSE; /* No exponent seen yet */ 391 signseen = TRUE; /* No +/- allowed yet */ 392 octal89 = FALSE; /* No bad octal yet */ 393 radix = 10; /* Assume decimal */ 394 if ((dotflag = (c == '.')) != FALSE) { /* . something? */ 395 (*outfun)('.'); /* Always out the dot */ 396 if (type[(c = get())] != DIG) { /* If not a float numb, */ 397 unget(); /* Rescan strange char */ 398 return; /* All done for now */ 399 } 400 } /* End of float test */ 401 else if (c == '0') { /* Octal or hex? */ 402 (*outfun)(c); /* Stuff initial zero */ 403 radix = 8; /* Assume it's octal */ 404 c = get(); /* Look for an 'x' */ 405 if (c == 'x' || c == 'X') { /* Did we get one? */ 406 radix = 16; /* Remember new radix */ 407 (*outfun)(c); /* Stuff the 'x' */ 408 c = get(); /* Get next character */ 409 } 410 } 411 for (;;) { /* Process curr. char. */ 412 /* 413 * Note that this algorithm accepts "012e4" and "03.4" 414 * as legitimate floating-point numbers. 415 */ 416 if (radix != 16 && (c == 'e' || c == 'E')) { 417 if (expseen) /* Already saw 'E'? */ 418 break; /* Exit loop, bad nbr. */ 419 expseen = TRUE; /* Set exponent seen */ 420 signseen = FALSE; /* We can read '+' now */ 421 radix = 10; /* Decimal exponent */ 422 } 423 else if (radix != 16 && c == '.') { 424 if (dotflag) /* Saw dot already? */ 425 break; /* Exit loop, two dots */ 426 dotflag = TRUE; /* Remember the dot */ 427 radix = 10; /* Decimal fraction */ 428 } 429 else if (c == '+' || c == '-') { /* 1.0e+10 */ 430 if (signseen) /* Sign in wrong place? */ 431 break; /* Exit loop, not nbr. */ 432 /* signseen = TRUE; */ /* Remember we saw it */ 433 } 434 else { /* Check the digit */ 435 switch (c) { 436 case '8': case '9': /* Sometimes wrong */ 437 octal89 = TRUE; /* Do check later */ 438 case '0': case '1': case '2': case '3': 439 case '4': case '5': case '6': case '7': 440 break; /* Always ok */ 441 442 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 443 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 444 if (radix == 16) /* Alpha's are ok only */ 445 break; /* if reading hex. */ 446 default: /* At number end */ 447 goto done; /* Break from for loop */ 448 } /* End of switch */ 449 } /* End general case */ 450 (*outfun)(c); /* Accept the character */ 451 signseen = TRUE; /* Don't read sign now */ 452 c = get(); /* Read another char */ 453 } /* End of scan loop */ 454 /* 455 * When we break out of the scan loop, c contains the first 456 * character (maybe) not in the number. If the number is an 457 * integer, allow a trailing 'L' for long and/or a trailing 'U' 458 * for unsigned. If not those, push the trailing character back 459 * on the input stream. Floating point numbers accept a trailing 460 * 'L' for "long double". 461 */ 462 done: if (dotflag || expseen) { /* Floating point? */ 463 if (c == 'l' || c == 'L') { 464 (*outfun)(c); 465 c = get(); /* Ungotten later */ 466 } 467 } 468 else { /* Else it's an integer */ 469 /* 470 * We know that dotflag and expseen are both zero, now: 471 * dotflag signals "saw 'L'", and 472 * expseen signals "saw 'U'". 473 */ 474 for (;;) { 475 switch (c) { 476 case 'l': 477 case 'L': 478 if (dotflag) 479 goto nomore; 480 dotflag = TRUE; 481 break; 482 483 case 'u': 484 case 'U': 485 if (expseen) 486 goto nomore; 487 expseen = TRUE; 488 break; 489 490 default: 491 goto nomore; 492 } 493 (*outfun)(c); /* Got 'L' or 'U'. */ 494 c = get(); /* Look at next, too. */ 495 } 496 } 497 nomore: unget(); /* Not part of a number */ 498 if (octal89 && radix == 8) 499 cwarn("Illegal digit in octal number", NULLST); 500 } 501 502 void save(int c) 503 { 504 if (workp >= &work[NWORK]) { 505 work[NWORK-1] = '\0'; 506 cfatal("Work buffer overflow: %s", work); 507 } 508 else *workp++ = (char)c; 509 } 510 511 char * 512 savestring(char* text) 513 /* 514 * Store a string into free memory. 515 */ 516 { 517 register char *result; 518 519 result = getmem(strlen(text) + 1); 520 strcpy(result, text); 521 return (result); 522 } 523 524 FILEINFO * 525 getfile(int bufsize, char* name) 526 /* 527 * Common FILEINFO buffer initialization for a new file or macro. 528 */ 529 { 530 register FILEINFO *file; 531 register int size; 532 533 size = strlen(name); /* File/macro name */ 534 file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size); 535 file->parent = infile; /* Chain files together */ 536 file->fp = NULL; /* No file yet */ 537 file->filename = savestring(name); /* Save file/macro name */ 538 file->progname = NULL; /* No #line seen yet */ 539 file->unrecur = 0; /* No macro fixup */ 540 file->bptr = file->buffer; /* Initialize line ptr */ 541 file->buffer[0] = EOS; /* Force first read */ 542 file->line = 0; /* (Not used just yet) */ 543 if (infile != NULL) /* If #include file */ 544 infile->line = line; /* Save current line */ 545 infile = file; /* New current file */ 546 line = 1; /* Note first line */ 547 return (file); /* All done. */ 548 } 549 550 char * 551 getmem(int size) 552 /* 553 * Get a block of free memory. 554 */ 555 { 556 register char *result; 557 558 if ((result = malloc((unsigned) size)) == NULL) 559 cfatal("Out of memory", NULLST); 560 return (result); 561 } 562 563 564 DEFBUF * 565 lookid(int c) 566 /* 567 * Look for the next token in the symbol table. Returns token in "token". 568 * If found, returns the table pointer; Else returns NULL. 569 */ 570 { 571 register int nhash; 572 register DEFBUF *dp; 573 register char *np; 574 int temp = 0; 575 int isrecurse; /* For #define foo foo */ 576 577 np = token; 578 nhash = 0; 579 if (0 != (isrecurse = (c == DEF_MAGIC))) /* If recursive macro */ 580 c = get(); /* hack, skip DEF_MAGIC */ 581 do { 582 if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */ 583 *np++ = (char)c; /* Store token byte */ 584 nhash += c; /* Update hash value */ 585 } 586 c = get(); /* And get another byte */ 587 } while (type[c] == LET || type[c] == DIG); 588 unget(); /* Rescan terminator */ 589 *np = EOS; /* Terminate token */ 590 if (isrecurse) /* Recursive definition */ 591 return (NULL); /* undefined just now */ 592 nhash += (np - token); /* Fix hash value */ 593 dp = symtab[nhash & SBMASK]; /* Starting bucket */ 594 while (dp != (DEFBUF *) NULL) { /* Search symbol table */ 595 if (dp->hash == nhash /* Fast precheck */ 596 && (temp = strcmp(dp->name, token)) >= 0) 597 break; 598 dp = dp->link; /* Nope, try next one */ 599 } 600 return ((temp == 0) ? dp : NULL); 601 } 602 603 DEFBUF * 604 defendel(char* name, int delete) 605 /* 606 * Enter this name in the lookup table (delete = FALSE) 607 * or delete this name (delete = TRUE). 608 * Returns a pointer to the define block (delete = FALSE) 609 * Returns NULL if the symbol wasn't defined (delete = TRUE). 610 */ 611 { 612 register DEFBUF *dp; 613 register DEFBUF **prevp; 614 register char *np; 615 int nhash; 616 int temp; 617 int size; 618 619 for (nhash = 0, np = name; *np != EOS;) 620 nhash += *np++; 621 size = (np - name); 622 nhash += size; 623 prevp = &symtab[nhash & SBMASK]; 624 while ((dp = *prevp) != (DEFBUF *) NULL) { 625 if (dp->hash == nhash 626 && (temp = strcmp(dp->name, name)) >= 0) { 627 if (temp > 0) 628 dp = NULL; /* Not found */ 629 else { 630 *prevp = dp->link; /* Found, unlink and */ 631 if (dp->repl != NULL) /* Free the replacement */ 632 free(dp->repl); /* if any, and then */ 633 free((char *) dp); /* Free the symbol */ 634 } 635 break; 636 } 637 prevp = &dp->link; 638 } 639 if (!delete) { 640 dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size); 641 dp->link = *prevp; 642 *prevp = dp; 643 dp->hash = nhash; 644 dp->repl = NULL; 645 dp->nargs = 0; 646 strcpy(dp->name, name); 647 } 648 return (dp); 649 } 650 651 #if OSL_DEBUG_LEVEL > 1 652 653 void dumpdef(char *why) 654 { 655 register DEFBUF *dp; 656 register DEFBUF **syp; 657 FILE *pRememberOut = NULL; 658 659 if ( bDumpDefs ) /*ER */ 660 { 661 pRememberOut = pCppOut; 662 pCppOut = pDefOut; 663 } 664 fprintf( pCppOut, "CPP symbol table dump %s\n", why); 665 for (syp = symtab; syp < &symtab[SBSIZE]; syp++) { 666 if ((dp = *syp) != (DEFBUF *) NULL) { 667 fprintf( pCppOut, "symtab[%d]\n", (syp - symtab)); 668 do { 669 dumpadef((char *) NULL, dp); 670 } while ((dp = dp->link) != (DEFBUF *) NULL); 671 } 672 } 673 if ( bDumpDefs ) 674 { 675 fprintf( pCppOut, "\n"); 676 pCppOut = pRememberOut; 677 } 678 } 679 680 void dumpadef(char *why, register DEFBUF *dp) 681 { 682 register char *cp; 683 register int c; 684 FILE *pRememberOut = NULL; 685 686 /*ER dump #define's to pDefOut */ 687 if ( bDumpDefs ) 688 { 689 pRememberOut = pCppOut; 690 pCppOut = pDefOut; 691 } 692 fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs); 693 if (why != NULL) 694 fprintf( pCppOut, " (%s)", why); 695 if (dp->repl != NULL) { 696 fprintf( pCppOut, " => "); 697 for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) { 698 #ifdef SOLAR 699 if (c == DEL) { 700 c = *cp++ & 0xFF; 701 if( c == EOS ) break; 702 fprintf( pCppOut, "<%%%d>", c - MAC_PARM); 703 } 704 #else 705 if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) 706 fprintf( pCppOut, "<%%%d>", c - MAC_PARM); 707 #endif 708 else if (isprint(c) || c == '\n' || c == '\t') 709 PUTCHAR(c); 710 else if (c < ' ') 711 fprintf( pCppOut, "<^%c>", c + '@'); 712 else 713 fprintf( pCppOut, "<\\0%o>", c); 714 } 715 /*ER evaluate macros to pDefOut */ 716 #ifdef EVALDEFS 717 if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 ) 718 { 719 FILEINFO *infileSave = infile; 720 char *tokenSave = savestring( token ); 721 char *workSave = savestring( work ); 722 int lineSave = line; 723 int wronglineSave = wrongline; 724 int recursionSave = recursion; 725 FILEINFO *file; 726 EVALTYPE valEval; 727 728 bIsInEval = 1; 729 infile = NULL; /* start from scrap */ 730 line = 0; 731 wrongline = 0; 732 *token = EOS; 733 *work = EOS; 734 recursion = 0; 735 file = getfile( strlen( dp->repl ), dp->name ); 736 strcpy( file->buffer, dp->repl ); 737 fprintf( pCppOut, " ===> "); 738 nEvalOff = 0; 739 cppmain(); /* get() frees also *file */ 740 valEval = 0; 741 if ( 0 == evaluate( EvalBuf, &valEval ) ) 742 { 743 #ifdef EVALFLOATS 744 if ( valEval != (EVALTYPE)((long)valEval ) ) 745 fprintf( pCppOut, " ==eval=> %f", valEval ); 746 else 747 #endif 748 fprintf( pCppOut, " ==eval=> %ld", (long)valEval ); 749 } 750 recursion = recursionSave; 751 wrongline = wronglineSave; 752 line = lineSave; 753 strcpy( work, workSave ); 754 free( workSave ); 755 strcpy( token, tokenSave ); 756 free( tokenSave ); 757 infile = infileSave; 758 bIsInEval = 0; 759 } 760 #endif 761 } 762 else { 763 fprintf( pCppOut, ", no replacement."); 764 } 765 PUTCHAR('\n'); 766 if ( bDumpDefs ) 767 pCppOut = pRememberOut; 768 } 769 #endif 770 771 /* 772 * G E T 773 */ 774 775 int 776 get() 777 /* 778 * Return the next character from a macro or the current file. 779 * Handle end of file from #include files. 780 */ 781 { 782 register int c; 783 register FILEINFO *file; 784 register int popped; /* Recursion fixup */ 785 786 popped = 0; 787 get_from_file: 788 if ((file = infile) == NULL) 789 return (EOF_CHAR); 790 newline: 791 #if 0 792 fprintf( pCppOut, "get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n", 793 file->filename, recursion, line, 794 file->bptr - file->buffer, file->buffer); 795 #endif 796 /* 797 * Read a character from the current input line or macro. 798 * At EOS, either finish the current macro (freeing temp. 799 * storage) or read another line from the current input file. 800 * At EOF, exit the current file (#include) or, at EOF from 801 * the cpp input file, return EOF_CHAR to finish processing. 802 */ 803 if ((c = *file->bptr++ & 0xFF) == EOS) { 804 /* 805 * Nothing in current line or macro. Get next line (if 806 * input from a file), or do end of file/macro processing. 807 * In the latter case, jump back to restart from the top. 808 */ 809 if (file->fp == NULL) { /* NULL if macro */ 810 popped++; 811 recursion -= file->unrecur; 812 if (recursion < 0) 813 recursion = 0; 814 infile = file->parent; /* Unwind file chain */ 815 } 816 else { /* Else get from a file */ 817 if ((file->bptr = fgets(file->buffer, NBUFF, file->fp)) 818 != NULL) { 819 #if OSL_DEBUG_LEVEL > 1 820 if (debug > 1) { /* Dump it to stdout */ 821 fprintf( pCppOut, "\n#line %d (%s), %s", 822 line, file->filename, file->buffer); 823 } 824 #endif 825 goto newline; /* process the line */ 826 } 827 else { 828 if( file->fp != stdin ) 829 fclose(file->fp); /* Close finished file */ 830 if ((infile = file->parent) != NULL) { 831 /* 832 * There is an "ungotten" newline in the current 833 * infile buffer (set there by doinclude() in 834 * cpp1.c). Thus, we know that the mainline code 835 * is skipping over blank lines and will do a 836 * #line at its convenience. 837 */ 838 wrongline = TRUE; /* Need a #line now */ 839 } 840 } 841 } 842 /* 843 * Free up space used by the (finished) file or macro and 844 * restart input from the parent file/macro, if any. 845 */ 846 free(file->filename); /* Free name and */ 847 if (file->progname != NULL) /* if a #line was seen, */ 848 free(file->progname); /* free it, too. */ 849 free((char *) file); /* Free file space */ 850 if (infile == NULL) /* If at end of file */ 851 return (EOF_CHAR); /* Return end of file */ 852 line = infile->line; /* Reset line number */ 853 goto get_from_file; /* Get from the top. */ 854 } 855 /* 856 * Common processing for the new character. 857 */ 858 if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */ 859 goto newline; /* from a file */ 860 if (file->parent != NULL) { /* Macro or #include */ 861 if (popped != 0) 862 file->parent->unrecur += popped; 863 else { 864 recursion -= file->parent->unrecur; 865 if (recursion < 0) 866 recursion = 0; 867 file->parent->unrecur = 0; 868 } 869 } 870 #if (HOST == SYS_UNIX) 871 /*ER*/ if (c == '\r') 872 /*ER*/ return get(); /* DOS fuck */ 873 #endif 874 if (c == '\n') /* Maintain current */ 875 ++line; /* line counter */ 876 if (instring) /* Strings just return */ 877 return (c); /* the character. */ 878 else if (c == '/') { /* Comment? */ 879 instring = TRUE; /* So get() won't loop */ 880 /*MM c++ comments */ 881 /*MM*/ c = get(); 882 /*MM*/ if ((c != '*') && (c != '/')) { /* Next byte '*'? */ 883 instring = FALSE; /* Nope, no comment */ 884 unget(); /* Push the char. back */ 885 return ('/'); /* Return the slash */ 886 } 887 if (keepcomments) { /* If writing comments */ 888 PUTCHAR('/'); /* Write out the */ 889 /* initializer */ 890 /*MM*/ if( '*' == c ) 891 PUTCHAR('*'); 892 /*MM*/ else 893 /*MM*/ PUTCHAR('/'); 894 895 } 896 /*MM*/ if( '*' == c ){ 897 for (;;) { /* Eat a comment */ 898 c = get(); 899 test: if (keepcomments && c != EOF_CHAR) 900 cput(c); 901 switch (c) { 902 case EOF_CHAR: 903 cerror("EOF in comment", NULLST); 904 return (EOF_CHAR); 905 906 case '/': 907 if ((c = get()) != '*') /* Don't let comments */ 908 goto test; /* Nest. */ 909 #ifdef STRICT_COMMENTS 910 cwarn("Nested comments", NULLST); 911 #endif 912 /* Fall into * stuff */ 913 case '*': 914 if ((c = get()) != '/') /* If comment doesn't */ 915 goto test; /* end, look at next */ 916 instring = FALSE; /* End of comment, */ 917 if (keepcomments) { /* Put out the comment */ 918 cput(c); /* terminator, too */ 919 } 920 /* 921 * A comment is syntactically "whitespace" -- 922 * however, there are certain strange sequences 923 * such as 924 * #define foo(x) (something) 925 * foo|* comment *|(123) 926 * these are '/' ^ ^ 927 * where just returning space (or COM_SEP) will cause 928 * problems. This can be "fixed" by overwriting the 929 * '/' in the input line buffer with ' ' (or COM_SEP) 930 * but that may mess up an error message. 931 * So, we peek ahead -- if the next character is 932 * "whitespace" we just get another character, if not, 933 * we modify the buffer. All in the name of purity. 934 */ 935 if (*file->bptr == '\n' 936 || type[*file->bptr & 0xFF] == SPA) 937 goto newline; 938 #if COMMENT_INVISIBLE 939 /* 940 * Return magic (old-fashioned) syntactic space. 941 */ 942 return ((file->bptr[-1] = COM_SEP)); 943 #else 944 return ((file->bptr[-1] = ' ')); 945 #endif 946 947 case '\n': /* we'll need a #line */ 948 if (!keepcomments) 949 wrongline = TRUE; /* later... */ 950 default: /* Anything else is */ 951 break; /* Just a character */ 952 } /* End switch */ 953 } /* End comment loop */ 954 } 955 else{ /* c++ comment */ 956 /*MM c++ comment*/ 957 for (;;) { /* Eat a comment */ 958 c = get(); 959 if (keepcomments && c != EOF_CHAR) 960 cput(c); 961 if( EOF_CHAR == c ) 962 return (EOF_CHAR); 963 else if( '\n' == c ){ 964 instring = FALSE; /* End of comment, */ 965 return( c ); 966 } 967 } 968 } 969 } /* End if in comment */ 970 else if (!inmacro && c == '\\') { /* If backslash, peek */ 971 if ((c = get()) == '\n') { /* for a <nl>. If so, */ 972 wrongline = TRUE; 973 goto newline; 974 } 975 else { /* Backslash anything */ 976 unget(); /* Get it later */ 977 return ('\\'); /* Return the backslash */ 978 } 979 } 980 else if (c == '\f' || c == VT) /* Form Feed, Vertical */ 981 c = ' '; /* Tab are whitespace */ 982 else if (c == 0xef) /* eat up UTF-8 BOM */ 983 { 984 if((c = get()) == 0xbb) 985 { 986 if((c = get()) == 0xbf) 987 { 988 c = get(); 989 return c; 990 } 991 else 992 { 993 unget(); 994 unget(); 995 return 0xef; 996 } 997 } 998 else 999 { 1000 unget(); 1001 return 0xef; 1002 } 1003 } 1004 return (c); /* Just return the char */ 1005 } 1006 1007 void unget() 1008 /* 1009 * Backup the pointer to reread the last character. Fatal error 1010 * (code bug) if we backup too far. unget() may be called, 1011 * without problems, at end of file. Only one character may 1012 * be ungotten. If you need to unget more, call ungetstring(). 1013 */ 1014 { 1015 register FILEINFO *file; 1016 1017 if ((file = infile) == NULL) 1018 return; /* Unget after EOF */ 1019 if (--file->bptr < file->buffer) 1020 cfatal("Too much pushback", NULLST); 1021 if (*file->bptr == '\n') /* Ungetting a newline? */ 1022 --line; /* Unget the line number, too */ 1023 } 1024 1025 void ungetstring(char* text) 1026 /* 1027 * Push a string back on the input stream. This is done by treating 1028 * the text as if it were a macro. 1029 */ 1030 { 1031 register FILEINFO *file; 1032 #ifndef ZTC /* BP */ 1033 extern FILEINFO *getfile(); 1034 #endif 1035 file = getfile(strlen(text) + 1, ""); 1036 strcpy(file->buffer, text); 1037 } 1038 1039 int 1040 cget() 1041 /* 1042 * Get one character, absorb "funny space" after comments or 1043 * token concatenation 1044 */ 1045 { 1046 register int c; 1047 1048 do { 1049 c = get(); 1050 #if COMMENT_INVISIBLE 1051 } while (c == TOK_SEP || c == COM_SEP); 1052 #else 1053 } while (c == TOK_SEP); 1054 #endif 1055 return (c); 1056 } 1057 1058 /* 1059 * Error messages and other hacks. The first byte of severity 1060 * is 'S' for string arguments and 'I' for int arguments. This 1061 * is needed for portability with machines that have int's that 1062 * are shorter than char *'s. 1063 */ 1064 1065 static void domsg(char* severity, char* format, void* arg) 1066 /* 1067 * Print filenames, macro names, and line numbers for error messages. 1068 */ 1069 { 1070 register char *tp; 1071 register FILEINFO *file; 1072 1073 fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]); 1074 if (*severity == 'S') 1075 fprintf(stderr, format, (char *)arg); 1076 else 1077 fprintf(stderr, format, *((int *)arg) ); 1078 putc('\n', stderr); 1079 if ((file = infile) == NULL) 1080 return; /* At end of file */ 1081 if (file->fp != NULL) { 1082 tp = file->buffer; /* Print current file */ 1083 fprintf(stderr, "%s", tp); /* name, making sure */ 1084 if (tp[strlen(tp) - 1] != '\n') /* there's a newline */ 1085 putc('\n', stderr); 1086 } 1087 while ((file = file->parent) != NULL) { /* Print #includes, too */ 1088 if (file->fp == NULL) 1089 fprintf(stderr, "from macro %s\n", file->filename); 1090 else { 1091 tp = file->buffer; 1092 fprintf(stderr, "from file %s, line %d:\n%s", 1093 (file->progname != NULL) 1094 ? file->progname : file->filename, 1095 file->line, tp); 1096 if (tp[strlen(tp) - 1] != '\n') 1097 putc('\n', stderr); 1098 } 1099 } 1100 } 1101 1102 void cerror(char* format, char* sarg) 1103 /* 1104 * Print a normal error message, string argument. 1105 */ 1106 { 1107 domsg("SError", format, sarg); 1108 errors++; 1109 } 1110 1111 void cierror(char* format, int narg) 1112 /* 1113 * Print a normal error message, numeric argument. 1114 */ 1115 { 1116 domsg("IError", format, &narg); 1117 errors++; 1118 } 1119 1120 void cfatal(char* format, char* sarg) 1121 /* 1122 * A real disaster 1123 */ 1124 { 1125 domsg("SFatal error", format, sarg); 1126 exit(IO_ERROR); 1127 } 1128 1129 void cwarn(char* format, char* sarg) 1130 /* 1131 * A non-fatal error, string argument. 1132 */ 1133 { 1134 domsg("SWarning", format, sarg); 1135 } 1136 1137 void ciwarn(char* format, int narg) 1138 /* 1139 * A non-fatal error, numeric argument. 1140 */ 1141 { 1142 domsg("IWarning", format, &narg); 1143 } 1144 1145