Scanner.cpp
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* */
3 /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */
4 /* Copyright (c) 2005-2009 Rexx Language Association. All rights reserved. */
5 /* */
6 /* This program and the accompanying materials are made available under */
7 /* the terms of the Common Public License v1.0 which accompanies this */
8 /* distribution. A copy is also available at the following address: */
9 /* http://www.oorexx.org/license.html */
10 /* */
11 /* Redistribution and use in source and binary forms, with or */
12 /* without modification, are permitted provided that the following */
13 /* conditions are met: */
14 /* */
15 /* Redistributions of source code must retain the above copyright */
16 /* notice, this list of conditions and the following disclaimer. */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in */
19 /* the documentation and/or other materials provided with the distribution. */
20 /* */
21 /* Neither the name of Rexx Language Association nor the names */
22 /* of its contributors may be used to endorse or promote products */
23 /* derived from this software without specific prior written permission. */
24 /* */
25 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
26 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
27 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS */
28 /* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
29 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
30 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
31 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, */
32 /* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
33 /* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
34 /* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
35 /* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* */
37 /*----------------------------------------------------------------------------*/
38 /******************************************************************************/
39 /* REXX Kernel */
40 /* */
41 /* Scanner portion of the REXX Source File Class */
42 /* */
43 /******************************************************************************/
44 #include <ctype.h>
45 #include <string.h>
46 #include "RexxCore.h"
47 #include "StringClass.hpp"
48 #include "ArrayClass.hpp"
49 #include "SourceFile.hpp"
50 
51 #define HIGHEST_PRECEDENCE 100 // For abuttal inside symbol: 1+2i is parsed as 1+(2i) instead of (1+2)i
52 
54  RexxToken *token) /* target token */
55 /******************************************************************************/
56 /* Fucntion: Determine a token's operator precedence */
57 /******************************************************************************/
58 {
59  if (token->precedence != 0) return token->precedence; // If default precedence has been overriden (ex: abuttal inside symbol)
60 
61  switch (token->subclass)
62  { /* process based on subclass */
63 
64  default:
65  return 0; /* this is the bottom of the heap */
66  break;
67 
68  case OPERATOR_OR:
69  case OPERATOR_XOR:
70  return 1; /* various OR types are next */
71  break;
72 
73  case OPERATOR_AND:
74  return 2; /* AND operator ahead of ORs */
75  break;
76 
77  case OPERATOR_EQUAL: /* comparisons are all together */
81  case OPERATOR_LESSTHAN:
95  return 3; /* concatenates are next */
96  break;
97 
98  case OPERATOR_ABUTTAL:
100  case OPERATOR_BLANK:
101  return 4; /* concatenates are next */
102  break;
103 
104  case OPERATOR_PLUS:
105  case OPERATOR_SUBTRACT:
106  return 5; /* plus and minus next */
107  break;
108 
109  case OPERATOR_MULTIPLY:
110  case OPERATOR_DIVIDE:
111  case OPERATOR_INTDIV:
112  case OPERATOR_REMAINDER:
113  return 6; /* mulitiply and divide afer simples */
114  break;
115 
116  case OPERATOR_POWER:
117  return 7; /* almost the top of the heap */
118  break;
119 
120  case OPERATOR_BACKSLASH:
121  return 8; /* NOT is the top honcho */
122  break;
123  }
124 }
125 
126 /*********************************************************************
127 * The following table detects alphanumeric characters and *
128 * special characters that can be part of an REXX symbol. *
129 * The table also convert lower case letters to upper case. *
130 *********************************************************************/
132 #ifdef EBCDIC
133  // This table was built using the IBM-1047 code page. It should be
134  // universal across all EBCDIC code pages!
135  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
136  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
137  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
138  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
139  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
140  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
141  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
142  0, 0, 0, 0, 74, 75, 0, 0, 0, 0, /* ¢ */
143  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
144  90, 91, 0, 0, 0, 0, 0, 0, 0, 0, /* !$ */
145  0, 0, 0, 0, 0, 0, 0, 0, 0, 109, /* _ */
146  0, 111, 0, 0, 0, 0, 0, 0, 0, 0, /* ? */
147  0, 0, 0, 123, 124, 0, 0, 0, 0, 129, /* #@ a */
148 130, 131, 132, 133, 134, 135, 136, 137, 0, 0, /* bcdefghi */
149  0, 0, 0, 0, 0, 145, 146, 147, 148, 149, /* jklmn */
150 150, 151, 152, 153, 0, 0, 0, 0, 0, 0, /* opqr */
151  0, 0, 162, 163, 164, 165, 166, 167, 168, 169, /* stuvwxyz */
152  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
153  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
154  0, 0, 0, 193, 194, 195, 196, 197, 198, 199, /* ABCDEFG */
155 200, 201, 0, 0, 0, 0, 0, 0, 0, 209, /* HI J */
156 210, 211, 212, 213, 214, 215, 216, 217, 0, 0, /* KLMNOPQR */
157  0, 0, 0, 0, 0, 0, 226, 227, 228, 229, /* STUV */
158 230, 231, 232, 233, 0, 0, 0, 0, 0, 0, /* WXYZ */
159 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, /* 0123456789 */
160  0, 0, 0, 0, 0, 0 /* */
161 #else
162  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 9 */
163  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 19 */
164  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 29 */
165  0, 0, 0,33, 0, 35,36, 0, 0, 0, /* 30 - 39 (33 is ! 35 is # 36 is $) */
166  0, 0, 0, 0, 0, 0,46, 0,48,49, /* 40 - 49 (46 is . 48 is 0) */
167  50,51,52,53,54, 55,56,57, 0, 0, /* 50 - 59 (57 is 9) */
168  0, 0, 0,63,64, 65,66,67,68,69, /* 60 - 69 (63 is ? 64 is @ 65 is A) */
169  70,71,72,73,74, 75,76,77,78,79, /* 70 - 79 */
170  80,81,82,83,84, 85,86,87,88,89, /* 80 - 89 */
171  90, 0, 0, 0, 0, 95, 0,65,66,67, /* 90 - 99 (95 is _ 97 is a and */
172  /* becomes A) */
173  68,69,70,71,72, 73,74,75,76,77, /* 100 - 109 */
174  78,79,80,81,82, 83,84,85,86,87, /* 110 - 119 */
175  88,89,90, 0, 0, 0, 0, 0, 0, 0, /* 120 - 129 */
176  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 130 - 139 */
177  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 140 - 149 */
178  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 150 - 159 */
179  0, 0,162,0, 0, 0, 0, 0, 0, 0, /* 160 - 169 (162 is ¢) */
180  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 170 - 179 */
181  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 180 - 189 */
182  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 190 - 199 */
183  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 200 - 209 */
184  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 210 - 219 */
185  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 220 - 229 */
186  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 230 - 239 */
187  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 - 249 */
188  0, 0, 0, 0, 0, 0 /* 250 - 255 */
189 #endif
190 };
191 
192  /* some macros for commonly coded */
193  /* scanning operations...mostly to */
194  /* save some keystrokes and make */
195  /* things a little more readable */
196 #define GETCHAR() ((unsigned char)(this->current[size_v(this->line_offset)]))
197 #define MORELINE() (this->line_offset < this->current_length)
198 #define OPERATOR(op) (this->clause->newToken(TOKEN_OPERATOR, OPERATOR_##op, (RexxString *)OREF_##op, location))
199 #define CHECK_ASSIGNMENT(op, token) (token->checkAssignment(this, (RexxString *)OREF_ASSIGNMENT_##op))
200 
202  SourceLocation &location ) /* token location information */
203 /****************************************************************************/
204 /* Function: Record a tokens starting location */
205 /****************************************************************************/
206 {
207  // copy the start line location
208  location.setStart(line_number, line_offset);
209 }
210 
212  SourceLocation &location ) /* token location information */
213 /****************************************************************************/
214 /* Function: Record a tokens ending location */
215 /****************************************************************************/
216 {
217  // copy the end line location
218  location.setEnd(line_number, line_offset);
219 }
220 
222  unsigned int target, /* desired target character */
223  SourceLocation &location ) /* token location information */
224 /****************************************************************************/
225 /* Function: Find the next special character and verify against a target */
226 /****************************************************************************/
227 {
228  unsigned int inch = this->locateToken(OREF_NULL); /* find the next token */
229  /* have something else on this line? */
230  if (inch != CLAUSEEND_EOF && inch != CLAUSEEND_EOL)
231  {
232  if (GETCHAR() == target)
233  { /* is the next character a match? */
234  this->line_offset++; /* step over the next */
235  this->endLocation(location); /* update the end location part */
236  return true; /* got what we need! */
237  }
238  }
239  return false; // didn't find the one we're looking for
240 }
241 
243 /****************************************************************************/
244 /* Function: Scan source to skip over a nest of comments */
245 /****************************************************************************/
246 {
247  int level = 1; /* start the comment nesting */
248  this->line_offset += 2; /* step over the comment start */
249  size_t startline = this->line_number; /* remember the starting position */
250  while (level > 0)
251  { /* while still in a comment nest */
252  /* hit the end of a line? */
253  if (this->line_offset >= this->current_length)
254  {
255  this->nextLine(); /* need to go to the next line */
256  /* no more lines? */
257  if (this->line_number > this->line_count)
258  {
259  /* record current position in clause */
260  this->clause->setEnd(this->line_count, this->line_offset);
261  // update the error information
263  /* error, must report */
264  // The comment can be multiline, so must limit the amount of lines displayed in the error message
267  }
268  continue; /* go loop around */
269  }
270  unsigned int inch = GETCHAR(); /* get the next character */
271  this->line_offset++; /* step past the character */
272  /* is this the end delimeter? */
273  if (inch == '*' && GETCHAR() == '/')
274  {
275  level--; /* reduce the nesting level */
276  this->line_offset++; /* step the pointer over the close */
277  }
278  /* start of a new comment? */
279  else if (inch == '/' && GETCHAR() == '*')
280  {
281  level++; /* increment the level */
282  this->line_offset++; /* step the pointer over new start */
283  }
284  }
285 }
286 
288  RexxToken *previous ) /* previous token */
289 /****************************************************************************/
290 /* Function: Locate next significant token in source, skipping extra */
291 /* blanks and comments. */
292 /****************************************************************************/
293 {
294  size_t startline; /* backward reset line number */
295  sizeB_t startoffset; /* backward reset offset */
296 
297  bool blanks = false; /* are blanks significant? */
298 
299  unsigned int character = 0; /* no specific character type yet */
300  /* check if blanks should be returned*/
301  if (previous != OREF_NULL && /* no previous token, or */
302  /* have a symbol, literal, right */
303  /* paren or right square bracket */
304  (previous->classId == TOKEN_SYMBOL ||
305  previous->classId == TOKEN_LITERAL ||
306  previous->classId == TOKEN_SOURCE_LITERAL ||
307  previous->classId == TOKEN_RIGHT ||
308  previous->classId == TOKEN_SQRIGHT))
309  {
310  blanks = true; /* blanks are significant here */
311  }
312 
313  /* no more lines in file? */
314  if (this->line_number > this->line_count)
315  {
316  character = CLAUSEEND_EOF; /* return an end-of-file */
317  }
318  else if (!MORELINE()) /* reached the line end? */
319  {
320  character = CLAUSEEND_EOL; /* return an end-of-line */
321  }
322  else
323  {
324  /* while more program to scan */
325  while (this->line_offset < this->current_length)
326  {
327  unsigned int inch = GETCHAR(); /* get the next character */
328  if (inch==' ' || inch=='\t')
329  { /* blank or tab? */
330  if (blanks)
331  { /* is this significant? */
332  character = TOKEN_BLANK; /* have a blank character */
333  break; /* got what we need */
334  }
335  else
336  {
337  this->line_offset++; /* step the position */
338  continue; /* go around again */
339  }
340  }
341  /* possible continuation character? */
342  else if (inch == ',' || inch == '-')
343  {
344  /* line comment? */
345  if (inch == '-' && this->line_offset + 1 < this->current_length &&
346  this->current[size_v(this->line_offset + 1)] == '-')
347  {
348  this->line_offset = this->current_length;
349  break;
350  }
351 
352  character = inch; /* assume for now real character */
353  /* we check for EOL (possibly following blanks and comments) */
354  startoffset = this->line_offset;/* remember the location */
355  startline = this->line_number; /* remember the line position */
356  this->line_offset++; /* step the position */
357 
358  /* skip blanks and comments */
359  while (this->line_offset < this->current_length)
360  {
361  unsigned int inch2 = GETCHAR(); /* pick up the next character */
362  /* comment level start? */
363  if (inch2 == '/' && (this->line_offset + 1 < this->current_length) &&
364  this->current[size_v(this->line_offset + 1)] == '*')
365  {
366  this->comment(); /* go skip over the comment */
367  continue; /* and continue scanning */
368  }
369  /* line comment? */
370  if (inch2 == '-' && (this->line_offset + 1 < this->current_length) &&
371  this->current[size_v(this->line_offset + 1)] == '-')
372  {
373  /* go skip overto the end of line */
374  this->line_offset = this->current_length;
375  break;
376  }
377  /* non-blank outside comment */
378  if (inch2 != ' ' && inch2 != '\t')
379  {
380  break; /* done scanning */
381  }
382  this->line_offset++; /* step over this character */
383  }
384  /* found an EOL? */
385  if (this->line_offset >= this->current_length)
386  {
387  /* more lines in file? */
388  if (this->line_number < this->line_count)
389  {
390  this->nextLine(); /* step to the next line */
391  if (blanks)
392  { /* blanks allowed? */
393  character = TOKEN_BLANK; /* make this a blank token */
394  break; /* finished here */
395  }
396  }
397  }
398  else
399  { /* reset to the starting position */
400  this->position(startline, startoffset);
401  character = inch; /* this is a real character */
402  break; /* other non-blank, done scanning */
403  }
404  }
405  /* comment level start? */
406  else if (inch == '/' && (this->line_offset + 1 < this->current_length) &&
407  this->current[size_v(this->line_offset + 1)] == '*')
408  {
409  this->comment(); /* go skip over the comment */
410  }
411  else
412  { /* got the character */
413  character = inch; /* this is a good character */
414  break; /* done looping */
415  }
416  }
417  if (!MORELINE()) /* fallen off the end of the line? */
418  {
419  character = CLAUSEEND_EOL; /* this is an end of clause */
420  }
421  }
422  return character; /* return the character */
423 }
424 
426  sizeB_t start, /* start of the literal in line */
427  sizeB_t length, /* length of the literal to reduce */
428  int type ) /* type of literal to process */
429 /****************************************************************************/
430 /* Function: Convert and check a hex or binary constant, packing it down */
431 /* into a string object. */
432 /****************************************************************************/
433 {
434  int _first; /* switch to mark first group */
435  int blanks; /* switch to say if scanning blanks */
436  int count; /* count for group */
437  sizeB_t i; /* loop counter */
438  size_t j; /* loop counter */
439  sizeB_t k; /* loop counter */
440  sizeB_t m; /* temporary integer */
441  int byte; /* individual byte of literal */
442  int nibble; /* individual nibble of literal */
443  sizeB_t oddhex; /* odd number of characters in first */
444  sizeB_t inpointer; /* current input position */
445  int outpointer; /* current output pointer */
446  RexxString *value; /* reduced value */
447  sizeB_t real_length; /* real number of digits in string */
448  char error_output[2]; /* used for formatting error */
449 
450  _first = true; /* initialize group flags and */
451  count = 0; /* counters */
452  blanks = false;
453  error_output[1] = '\0'; /* terminate string */
454  /* set initial input/output positions*/
455  inpointer = start; /* get initial starting position */
456 
457  if (length == 0) /* hex or binary null string? */
458  {
459  value = OREF_NULLSTRING; /* this is a null string */
460  }
461  else
462  { /* data to reduce */
463  /* first scan is to check REXX rules for validity of grouping */
464  /* and to remove blanks */
465 
466  real_length = length; /* pick up the string length */
467  for (i = 0; i < length; i++)
468  { /* loop through entire string */
469  /* got a blank? */
470  if (this->current[size_v(inpointer)] == ' ' || this->current[size_v(inpointer)] == '\t')
471  {
472  blanks = true; /* remember scanning blanks */
473  /* don't like initial blanks or groups after the first */
474  /* which are not in twos (hex) or fours (binary) */
475  if (i == 0 || /* if at the beginning */
476  (!_first && /* or past first group and not the */
477  /* correct size */
478  (((count&1) && type == LITERAL_HEX) ||
479  ((count&3) && type == LITERAL_BIN))))
480  {
481  m = i+1; /* place holder for new_integer invocation */
482  // update the error information
484  if (type == LITERAL_HEX) /* hex string? */
485  {
486  /* report correct error */
488  }
489  else /* need the binary message */
490  {
492  }
493  }
494  count = 0; /* this starts a new group */
495  real_length--; /* this shortens the value */
496 
497  }
498  else
499  {
500  if (blanks) /* had a blank group? */
501  {
502  _first = false; /* no longer on the lead grouping */
503  }
504  blanks = false; /* not processing blanks now */
505  count++; /* count this significant character */
506  }
507  inpointer++; /* step the input position */
508  }
509 
510  if (blanks || /* trailing blanks or */
511  (!_first && /* last group isn't correct count? */
512  (((count&1) && type == LITERAL_HEX) ||
513  ((count&3) && type == LITERAL_BIN))))
514  {
515  m = i-1; /* place holder for new_integer invocation */
516  // update the error information
518  if (type == LITERAL_HEX) /* hex string? */
519  {
520  /* report correct error */
522  }
523  else /* need the binary message */
524  {
526  }
527  }
528 
529  /* second scan is to create the string value determined by the */
530  /* hex or binary constant. */
531 
532  i = real_length; /* get the adjusted length */
533  /* reset the scan pointers */
534  inpointer = start; /* reset the scan pointer */
535  outpointer = 0; /* set the position a start */
536  if (type == LITERAL_HEX)
537  { /* hex literal? */
538  oddhex = i&1; /* get any odd count */
539  i >>= 1; /* divide by 2 ... and */
540  i += oddhex; /* add in the odd count */
541  value = raw_string(i); /* get the final value */
542 
543  for (j = 0; j < i; j++)
544  { /* loop for the appropriate count */
545  byte = 0; /* current byte is zero */
546  for (k = oddhex; k < 2; k++)
547  { /* loop either 1 or 2 times */
548  /* get the next nibble */
549  nibble = this->current[size_v(inpointer)];
550  inpointer++; /* step to the next character */
551  while (nibble == ' ' || nibble == '\t')
552  { /* step over any inter-nibble blanks */
553  /* get the next nibble */
554  nibble = this->current[size_v(inpointer)];
555  inpointer++; /* step to the next character */
556  }
557  /* real digit? */
558  if (nibble >= '0' && nibble <= '9')
559  nibble -= '0'; /* make base zero */
560  /* lowercase hex digit? */
561  else if (nibble >= 'a' && nibble <= 'f')
562  {
563  nibble -= 'a'; /* subtract lowest and */
564  nibble += 10; /* add 10 to digit */
565  } /* uppercase hex digit? */
566  else if (nibble >= 'A' && nibble <= 'F')
567  {
568  nibble -= 'A'; /* subtract lowest and */
569  nibble += 10; /* add 10 to digit */
570  }
571  else
572  {
573  // update the error information
575  error_output[0] = nibble; /* copy the error character */
576  /* report the invalid character */
577  syntaxError(Error_Invalid_hex_invhex, new_string(&error_output[0]));
578  }
579  byte <<= 4; /* shift the last nibble over */
580  byte += nibble; /* add in the next nibble */
581  }
582  oddhex = 0; /* remainder are full bytes */
583  value->putCharB(outpointer, byte);/* store this in the output position */
584  outpointer++; /* step to the next position */
585  }
586  value = this->commonString(value); /* now force to a common string */
587  }
588  else
589  { /* convert to binary */
590  oddhex = i&7; /* get the leading byte count */
591  if (oddhex != 0) /* incomplete byte? */
592  {
593  oddhex = 8 - oddhex; /* get the padding count */
594  }
595  i += oddhex; /* and add that into total */
596  i >>= 3; /* get the byte count */
597  value = raw_string(i); /* get the final value */
598 
599  for (j = 0; j < i; j++)
600  { /* loop through the entire string */
601  byte = 0; /* zero the byte */
602  for (k = oddhex; k < 8; k++)
603  { /* loop through each byte segment */
604  /* get the next bit */
605  nibble = this->current[size_v(inpointer)];
606  inpointer++; /* step to the next character */
607  while (nibble == ' ' || nibble == '\t')
608  { /* step over any inter-nibble blanks */
609  /* get the next nibble */
610  nibble = this->current[size_v(inpointer)];
611  inpointer++; /* step to the next character */
612  }
613  byte <<= 1; /* shift the accumulator */
614  if (nibble == '1') /* got a one bit? */
615  {
616  byte++; /* add in the bit */
617  }
618  else if (nibble != '0')
619  { /* not a '0' either? */
620  // update the error information
622  error_output[0] = nibble; /* copy the error character */
623  /* report the invalid character */
624  syntaxError(Error_Invalid_hex_invbin, new_string(&error_output[0]));
625  }
626  }
627  oddhex = 0; /* use 8 bits for the remaining group*/
628  value->putCharB(outpointer, byte);/* store this in the output position */
629  outpointer++; /* step to the next position */
630  }
631  value = this->commonString(value); /* now force to a common string */
632  }
633  }
634  return value; /* return newly created string */
635 }
636 
637 RexxToken *RexxSource::sourceLiteral(size_t clause_free, SourceLocation location)
638 {
639  sizeB_t start = this->line_offset; /* save the starting point */
640  size_t startline = this->line_number; /* remember the starting position */
641  sizeB_t sourceLiteralEnd; /* end of source literal */
642  sizeB_t length; /* length of extracted token */
643  RexxToken *previous = OREF_NULL;
644  for (;;)
645  { /* spin through the source literal */
646  if (this->nextSpecial('}', location))
647  {
648  sourceLiteralEnd = this->line_offset - 1; /* remember end location */
649  break;
650  }
651  RexxToken *token = this->sourceNextToken(previous);
652  if (token == OREF_NULL)
653  { /* reached the end of the source? */
654  this->clause->setEnd(this->line_number, this->line_offset);
656  // The source literal can be multiline, so must limit the amount of lines displayed in the error message
659  }
660  previous = token;
661  }
662  this->clause->setEnd(this->line_number, this->line_offset);
663  length = sourceLiteralEnd - start; /* get length of literal data */
664  RexxString *value = this->extract(location, true);
665  value = this->commonString(value);
666  location.setLimitedTrace(true); /* don't do that before this->extract */
667  this->endLocation(location); /* record the end position */
668  this->clause->free = clause_free; /* all the tokens scanned for the source literal are replaced by the source literal token */
669  return this->clause->newToken(TOKEN_SOURCE_LITERAL, 0, value, location);
670 }
671 
673  RexxToken *previous ) /* previous token scanned off */
674 /*********************************************************************/
675 /* Extract a token from the source and create a new token object. */
676 /* The token type and sub-type are set in the token, and any string */
677 /* value extracted. */
678 /*********************************************************************/
679 {
680  RexxToken *token = OREF_NULL; /* working token */
681  RexxString *value; /* associate string value */
682  unsigned int inch; /* working input character */
683  sizeB_t eoffset; /* location of exponential */
684  int state; /* state of symbol scanning */
685  sizeB_t start; /* scan start location */
686  sizeB_t litend; /* end of literal data */
687  sizeB_t length; /* length of extracted token */
688  int dot_count; /* count of periods in symbol */
689  unsigned int literal_delimiter; /* literal string delimiter */
690  int type; /* type of literal token */
691  sizeB_t i; /* loop counter */
692  sizeB_t j; /* loop counter */
693  int subclass; /* sub type of the token */
694  int numeric; /* numeric type flag */
695  SourceLocation location; /* token location information */
696  char tran; /* translated character */
697  char badchar[4]; /* working buffer for errors */
698  char hexbadchar[4]; /* working buffer for errors */
699 
700  /* definitions of states of exponential numeric scan */
701 #define EXP_START 0
702 #define EXP_EXCLUDED 1
703 #define EXP_DIGIT 2
704 #define EXP_SPOINT 3
705 #define EXP_POINT 4
706 #define EXP_E 5
707 #define EXP_ESIGN 6
708 #define EXP_EDIGIT 7
709 // When parsing a symbol having the form <number><after number>, stop immediatly
710 // after number, where number is such as datatype(number) = "NUM".
711 // Ex: 2a is the number 2 followed by the symbol A.
712 #define AFTER_INTEGER 8
713 #define AFTER_NUMBER 9
714 
715  if (this->clause->cachedToken != OREF_NULL)
716  {
717  token = this->clause->cachedToken;
718  this->clause->cachedToken = OREF_NULL;
719  return token;
720  }
721 
722  for (;;)
723  { /* loop until we find a significant */
724  /* token */
725  inch = this->locateToken(previous);/* locate the next token position */
726 
727  // record a starting location.
729 
730  if (inch == CLAUSEEND_EOF)
731  { /* reach the end of the source? */
732  token = OREF_NULL; /* no token to return */
733  break; /* finished */
734  }
735  else if (inch == CLAUSEEND_EOL)
736  { /* some other end-of-clause */
737  /* make end the end of the line */
738  location.setEndOffset(current_length);
739  /* return a clause terminator */
740  token = this->clause->newToken(TOKEN_EOC, CLAUSEEND_EOL, OREF_NULL, location);
741  this->nextLine(); /* step to the next line */
742  break; /* have something to return */
743  }
744  else if (inch == TOKEN_BLANK )
745  { /* some sort of white space? */
746  /* now go ahead to the next token */
747  inch = this->locateToken(OREF_NULL);
748  /* is this blank significant? */
749  if (inch != CLAUSEEND_EOL && /* not at the end */
750  (isSymbolCharacter(inch) || /* and next is a symbol token */
751  inch == '\"' || /* or start of a " quoted literal */
752  inch == '\'' || /* or start of a ' quoted literal */
753  inch == '(' || /* or a left parenthesis */
754  inch == '[' || /* or a left square bracket */
755  inch == '{' )) /* or a left curly bracket */
756  {
757  /* return blank token */
758  token = this->clause->newToken(TOKEN_BLANK, OPERATOR_BLANK, (RexxString *)OREF_BLANK, location);
759  }
760  else /* non-significant blank */
761  {
762  continue; /* just loop around again */
763  }
764  }
765  else
766  { /* non-special token type */
767  /* process different token types */
768  tran = translateChar(inch); /* do the table mapping */
769  if (tran != 0)
770  { /* have a symbol character? */
771  state = EXP_START; /* in a clean state now */
772  eoffset = 0; /* no exponential sign yet */
773  start = this->line_offset; /* remember token start position */
774  dot_count = 0; /* no periods yet */
775  for (;;)
776  { /* loop through the token */
777  if (inch == '.') /* have a period? */
778  {
779  dot_count++; /* remember we saw this one */
780  }
781 
782  /* finite state machine to establish numeric constant (with possible */
783  /* included sign in exponential form) */
784 
785  switch (state)
786  { /* process based on current state */
787 
788  case EXP_START: /* beginning of scan */
789  /* have a digit at the start? */
790  if (inch >= '0' && inch <= '9')
791  {
792  state = EXP_DIGIT; /* now scanning digits */ // 0..9 ==> EXP_DIGIT
793  }
794  else if (inch == '.') /* start with a decimal point? */
795  {
796  state = EXP_SPOINT; /* now scanning after the decimal */ // . ==> EXP_SPOINT
797  }
798  else /* must be a non-numeric character */
799  {
800  state = EXP_EXCLUDED; /* no longer a number */ // (neither 0..9 nor .) ==> EXP_EXCLUDED
801  }
802  break; /* go process the next character */
803 
804  case EXP_DIGIT: /* have at least one digit mantissa */
805  if (inch=='.') /* decimal point? */
806  {
807  state = EXP_POINT; /* we've hit a decimal point */ // (0..9)+ . ==> EXP_POINT
808  }
809  else if (tran=='E') /* start of exponential? */
810  {
811  state = EXP_E; /* remember we've had the 'E' form */ // (0..9)+ e|E ==> EXP_E
812  }
813  /* non-digit? */
814  else if (inch < '0' || inch > '9')
815  {
816  state = AFTER_INTEGER; // EXP_EXCLUDED; /* no longer scanning a number */ // (0..9)+ (not 0..9) ==> AFTER_NUMBER
817  }
818  /* a digit leaves the state unchanged at EXP_DIGIT */
819  break; /* go get the next character */ // (0..9)+ (0..9) ==> EXP_DIGIT
820 
821  case EXP_SPOINT: /* leading decimal point */
822  /* not a digit? */
823  if (inch < '0' || inch > '9')
824  {
825  state = EXP_EXCLUDED; /* not a number */ // . (not 0..9) ==> EXP_EXCLUDED
826  }
827  else /* digit character */
828  {
829  state = EXP_POINT; /* processing a decimal number */ // . (0..9) ==> EXP_POINT
830  }
831  break; /* go process the next character */
832 
833  case EXP_POINT: /* have a decimal point */
834  if (tran == 'E') /* found the exponential? */
835  {
836  state = EXP_E; /* set exponent state */ // (0..9)+ . (0..9)* e|E ==> EXP_E (yes, can have ZERO digits after decimal point)
837  // . (0..9)+ e|E ==> EXP_E
838  }
839  /* non-digit found? */
840  else if (inch < '0' || inch > '9')
841  {
842  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ . (0..9)* (not 0..9) ==> AFTER_NUMBER (yes, can have ZERO digits after deciaml point)
843  // . (0..9)+ (not 0..9) ==> AFTER_NUMBER
844  }
845  /* a digit leaves the state unchanged at EXP_POINT */
846  break; /* go get another character */ // (0..9)+ . (0..9)* (0..9) ==> EXP_POINT
847  // . (0..9)+ (0..9) ==> EXP_POINT
848  case EXP_E: /* just had an exponent */
849  /* next one a digit? */
850  if (inch >= '0' && inch <= '9')
851  {
852  state = EXP_EDIGIT; /* now looking for exponent digits */ // (0..9)+ e|E (0..9) ==> EXP_EDIGIT
853  // (0..9)+ . (0..9)* e|E (0..9) ==> EXP_EDIGIT
854  // . (0..9)+ e|E (0..9) ==> EXP_EDIGIT
855  }
856 
857  /* a sign will be collected by the apparent end of symbol code below */
858  break; /* finished */
859 
860  case EXP_ESIGN: /* just had a signed exponent */
861  /* got a digit? */
862  if (inch >= '0' && inch <= '9')
863  {
864  state = EXP_EDIGIT; /* now looking for the exponent */ // (0..9)+ e|E +|- (0..9) ==> EXP_EDIGIT
865  // (0..9)+ . (0..9)* e|E +|- (0..9) ==> EXP_EDIGIT
866  // . (0..9)+ e|E +|- (0..9) ==> EXP_EDIGIT
867  }
868  else
869  {
870  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ e|E +|- (not 0..9) ==> AFTER_NUMBER
871  // (0..9)+ . (0..9)* e|E +|- (not 0..9) ==> AFTER_NUMBER
872  // . (0..9)+ e|E +|- (not 0..9) ==> AFTER_NUMBER
873  }
874  break; /* go get the next digits */
875 
876  case EXP_EDIGIT: /* processing the exponent digits */
877  /* not a digit? */
878  if (inch < '0' || inch > '9')
879  {
880  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
881  // (0..9)+ . (0..9)* e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
882  // . (0..9)+ e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
883  // (0..9)+ e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
884  // (0..9)+ . (0..9)* e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
885  // . (0..9)+ e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
886  }
887  break; /* go get the next character */ // (0..9)+ e|E (0..9)+ (0..9) ==> EXP_EDIGIT
888  // (0..9)+ . (0..9)* e|E (0..9)+ (0..9) ==> EXP_EDIGIT
889  // . (0..9)+ e|E (0..9)+ (0..9) ==> EXP_EDIGIT
890  // (0..9)+ e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
891  // (0..9)+ . (0..9)* e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
892  // . (0..9)+ e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
893 
894  /* once EXP_EXCLUDED is reached the state doesn't change */
895  }
896 
897  if (state == AFTER_INTEGER || state == AFTER_NUMBER)
898  {
899  break;
900  }
901 
902  if (state == EXP_E && eoffset == 0)
903  {
904  eoffset = this->line_offset; // remember current position BEFORE skipping e|E : in case of bad exponent, I don't want to include e|E in the number.
905  }
906 
907  this->line_offset++; /* step the source pointer */
908 
909  if (state == EXP_EDIGIT)
910  {
911  eoffset = this->line_offset; // any digit after e|E is part of the number
912  }
913 
914  /* had a bad exponent part? */
915  //if (eoffset != 0 && state == EXP_EXCLUDED)
916  //{
917  // /* back up the scan pointer */
918  // this->line_offset = eoffset;
919  // break; /* and we're finished with this */
920  //}
921 
922  if (!MORELINE()) /* reached the end of the line? */
923  {
924  break; /* done processing */
925  }
926 
927  inch = GETCHAR(); /* get the next character */
928  tran = translateChar(inch); /* translate the next character */
929  if (tran != 0) /* good symbol character? */
930  {
931  continue; /* loop through the state machine */
932  }
933  /* check for sign in correct state */
934  if (state == EXP_E && (inch == '+' || inch == '-'))
935  {
936  // /* remember current position */
937  // eoffset = this->line_offset;
938  state = EXP_ESIGN; /* now looking for the exponent */ // (0..9)+ e|E +|- ==> EXP_ESIGN
939  // (0..9)+ . (0..9)* e|E +|- ==> EXP_ESIGN
940  // . (0..9)+ e|E +|- ==> EXP_ESIGN
941  this->line_offset++; /* step past the sign */
942  if (!MORELINE())
943  { /* reached the end of the line? */
944  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */
945  break; /* quit looping */
946  }
947  inch = GETCHAR(); /* get the next character */
948  tran = translateChar(inch);/* translate the next character */
949  if (tran != 0) /* good character? */
950  {
951  continue; /* loop around */
952  }
953  else
954  { /* bad character */
955  state = AFTER_NUMBER; // EXP_EXCLUDED; /* not a number */
956  break; /* break out of here */
957  }
958  }
959  else
960  {
961  break; /* reached a non-symbol character */
962  }
963  }
964  /* this must be the end of the symbol - check whether we have too much */
965  /* need to step backward? */
966  if (eoffset != 0 && state != EXP_EDIGIT)
967  {
968  this->line_offset = eoffset; /* restore the source pointer */
969  }
970  /* get the token length */
971  length = this->line_offset - start;
972  value = raw_string(length); /* get the final value */
973  numeric = 0; /* not a numeric constant yet */
974  for (i = 0; i < length; i++)
975  { /* copy over and translate the value */
976  /* copy over the symbol value */
977  /* (translating to uppercase */
978  /* get the next character */
979  inch = this->current[size_v(start + i)]; // todo m17n
980  if (isSymbolCharacter(inch)) /* normal symbol character (not +/-) */
981  {
982  inch = translateChar(inch); /* translate to uppercase */
983  }
984  value->putCharB(i, inch);
985  }
986  value->setUpperOnly(); /* only contains uppercase */
987  /* now force to a common string */
988  value = this->commonString(value);
989  /* record current position in clause */
990  this->clause->setEnd(this->line_number, this->line_offset);
991  if (length > (size_t)MAX_SYMBOL_LENGTH)/* result too long? */
992  {
993  // update the error information
995  /* report the error */
997  }
998  inch = this->current[size_v(start)]; /* get the first character */
999  if (length == 1 && inch == '.')/* have a solo period? */
1000  {
1001  subclass = SYMBOL_DUMMY; /* this is the place holder */
1002  }
1003  /* have a digit? */
1004  else if (inch >= '0' && inch <= '9')
1005  {
1006  subclass = SYMBOL_CONSTANT; /* have a constant symbol */
1007  /* can we optimize to an integer? */
1008  if ((state == EXP_DIGIT || state == AFTER_INTEGER) && length < Numerics::DEFAULT_DIGITS)
1009  {
1010  /* no leading zero or only zero? */
1011  if (inch != '0' || length == 1)
1012  {
1013  /* we can make this an integer object*/
1014  numeric = INTEGER_CONSTANT;
1015  }
1016  }
1017  }
1018  else if (inch == '.')
1019  {
1020  /* this is an environment symbol */
1021  subclass = SYMBOL_DOTSYMBOL;
1022  }
1023  else
1024  { /* variable type symbol */
1025  /* set the default extended type */
1026  subclass = SYMBOL_VARIABLE;
1027  if (dot_count > 0)
1028  { /* have a period in the name? */
1029  /* end in a dot? */
1030  if (dot_count == 1 && value->getCharB(length-1) == '.')
1031  {
1032  /* this is a stem variable */
1033  subclass = SYMBOL_STEM;
1034  }
1035  else /* have a compound variable */
1036  {
1037  subclass = SYMBOL_COMPOUND;
1038  }
1039  }
1040  }
1041  this->endLocation(location); /* record the end position */
1042  /* get a symbol token */
1043  token = this->clause->newToken(TOKEN_SYMBOL, subclass, value, location);
1044  token->setNumeric(numeric); /* record any numeric side info */
1045 
1046  if (state == AFTER_INTEGER || state == AFTER_NUMBER)
1047  {
1048  // The tokenizer has splitted a symbol of the form <number><after number> in two distinct tokens.
1049  // An abuttal operator is inserted to re-concatenate <number> with <after number>.
1050  // In this context, the precedence of this abuttal operator is very high, to ensure both tokens are always linked together.
1051  SourceLocation location;
1052  this->startLocation(location); // The token abuttal starts at current source position
1053  this->endLocation(location); // The token abuttal ends at current source position (empty string)
1054  // Creates the token of the abuttal operator.
1055  RexxToken *token = this->clause->newToken(TOKEN_OPERATOR, OPERATOR_ABUTTAL, OREF_NULLSTRING, location);
1056  token->precedence = HIGHEST_PRECEDENCE;
1057  this->clause->cachedToken = token; // Will be returned on next call
1058  }
1059  }
1060  /* start of a quoted string? */
1061  else if (inch=='\'' || inch=='\"')
1062  {
1063  literal_delimiter = inch; /* save the starting character */
1064  start = this->line_offset + 1; /* save the starting point */
1065  dot_count = 0; /* no doubled quotes yet */
1066  type = 0; /* working with a straight literal */
1067  for (;;)
1068  { /* spin through the string */
1069  this->line_offset++; /* step the pointer */
1070  if (!MORELINE())
1071  { /* reached the end of the line? */
1072  /* record current position in clause */
1073  this->clause->setEnd(this->line_number, this->line_offset);
1074  // update the error information
1076  if (literal_delimiter == '\'')
1077  {
1078  /* raise the appropriate error */
1080  }
1081  else
1082  {
1083  /* must be a double quote */
1085  }
1086  }
1087  inch = GETCHAR(); /* get the next character */
1088  /* is this the delimiter? */
1089  if (literal_delimiter == inch)
1090  {
1091  /* remember end location */
1092  litend = this->line_offset - 1;
1093  this->line_offset++; /* step to the next character */
1094  if (!MORELINE()) /* end of the line? */
1095  {
1096  break; /* we're finished */
1097  }
1098  inch = GETCHAR(); /* get the next character */
1099  /* not a doubled quote? */
1100  if (inch != literal_delimiter)
1101  {
1102  break; /* got the end */
1103  }
1104  dot_count++; /* remember count of doubled quotes */
1105  }
1106  }
1107  if (MORELINE())
1108  { /* have more on this line? */
1109  inch = GETCHAR(); /* get the next character */
1110  /* potentially a hex string? */
1111  if (inch == 'x' || inch == 'X')
1112  {
1113  this->line_offset++; /* step to the next character */
1114  /* the end of the line, or */
1115  /* have another symbol character */
1116  if (MORELINE() && isSymbolCharacter(GETCHAR()))
1117  {
1118  this->line_offset--; /* step back to the X */
1119  }
1120  else
1121  {
1122  type = LITERAL_HEX; /* set the appropriate type */
1123  }
1124  }
1125  /* potentially a binary string? */
1126  else if (inch == 'b' || inch == 'B')
1127  {
1128  this->line_offset++; /* step to the next character */
1129  /* the end of the line, or */
1130  /* have another symbol character */
1131  if (MORELINE() && isSymbolCharacter(GETCHAR()))
1132  {
1133  this->line_offset--; /* step back to the B */
1134  }
1135  else
1136  {
1137  type = LITERAL_BIN; /* set the appropriate type */
1138  }
1139  }
1140  }
1141  length = litend - start + 1; /* calculate the literal length */
1142  /* record current position in clause */
1143  this->clause->setEnd(this->line_number, this->line_offset);
1144  if (type) /* need to pack a literal? */
1145  {
1146  /* compress into packed form */
1147  value = this->packLiteral(start, litend - start + 1, type) ;
1148  }
1149  else
1150  {
1151  length = litend - start + 1; /* get length of literal data */
1152  /* get the final value string */
1153  value = raw_string(length - dot_count);
1154  /* copy over and translate the value */
1155  for (i = 0, j = 0; j < length; i++, j++)
1156  {
1157  /* get the next character */
1158  inch = this->current[size_v(start + j)];
1159  /* same as our delimiter? */
1160  if (inch == literal_delimiter)
1161  {
1162  j++; /* step one extra */
1163  }
1164  value->putCharB(i, inch); /* copy over the literal data */
1165  }
1166  /* now force to a common string */
1167  value = this->commonString(value);
1168  }
1169  this->endLocation(location); /* record the end position */
1170  /* get a string token */
1171  token = this->clause->newToken(TOKEN_LITERAL, 0, value, location);
1172  }
1173  else
1174  { /* other special character */
1175  this->line_offset++; /* step past it */
1176 
1177  switch (inch)
1178  { /* process operators and punctuation */
1179 
1180  case ')': /* right parenthesis? */
1181  /* this is a special character class */
1182  token = this->clause->newToken(TOKEN_RIGHT, 0, OREF_NULL, location);
1183  break;
1184 
1185  case ']': /* right square bracket */
1186  /* this is a special character class */
1187  token = this->clause->newToken(TOKEN_SQRIGHT, 0, OREF_NULL, location);
1188  break;
1189 
1190  case '}':
1191  /* end of source literal, but when seen here, it's an error */
1193 
1194  case '(': /* left parenthesis */
1195  /* this is a special character class */
1196  token = this->clause->newToken(TOKEN_LEFT, 0, OREF_NULL, location);
1197  break;
1198 
1199  case '[': /* left square bracket */
1200  /* this is a special character class */
1201  token = this->clause->newToken(TOKEN_SQLEFT, 0, OREF_NULL, location);
1202  break;
1203 
1204  case '{':
1205  /* start of source literal */
1206  token = this->sourceLiteral(this->clause->free, location);
1207  break;
1208 
1209  case ',': /* comma */
1210  /* this is a special character class */
1211  token = this->clause->newToken(TOKEN_COMMA, 0, OREF_NULL, location);
1212  break;
1213 
1214  case ';': /* semicolon */
1215  /* this is a special character class */
1216  token = this->clause->newToken(TOKEN_EOC, CLAUSEEND_SEMICOLON, OREF_NULL, location);
1217  break;
1218 
1219  case ':': /* colon */
1220  /* next one a colon also? */
1221  if (this->nextSpecial(':', location))
1222  {
1223  /* this is a special character class */
1224  token = this->clause->newToken(TOKEN_DCOLON, 0, OREF_NULL, location);
1225  }
1226  else
1227  {
1228  /* this is a special character class */
1229  token = this->clause->newToken(TOKEN_COLON, 0, OREF_NULL, location);
1230  }
1231  break;
1232 
1233  case '~': /* message send? */
1234  /* next one a tilde also? */
1235  if (this->nextSpecial('~', location))
1236  /* this is a special character class */
1237  token = this->clause->newToken(TOKEN_DTILDE, 0, OREF_NULL, location);
1238  else
1239  /* this is a special character class */
1240  token = this->clause->newToken(TOKEN_TILDE, 0, OREF_NULL, location);
1241  break;
1242 
1243  case '+': /* plus sign */
1244  /* addition operator */
1245  token = OPERATOR(PLUS); /* this is an operator class */
1246  CHECK_ASSIGNMENT(PLUS, token); // this is allowed as an assignment shortcut
1247  break;
1248 
1249  case '-': /* minus sign */
1250  /* subtraction operator */
1251  token = OPERATOR(SUBTRACT); /* this is an operator class */
1252  CHECK_ASSIGNMENT(SUBTRACT, token); // this is allowed as an assignment shortcut
1253  break;
1254 
1255  case '%': /* percent sign */
1256  /* integer divide operator */
1257  token = OPERATOR(INTDIV); /* this is an operator class */
1258  CHECK_ASSIGNMENT(INTDIV, token); // this is allowed as an assignment shortcut
1259  break;
1260 
1261  case '/': /* forward slash */
1262  /* this is division */
1263  /* next one a slash also? */
1264  if (this->nextSpecial('/', location))
1265  {
1266 
1267  token = OPERATOR(REMAINDER);
1268  CHECK_ASSIGNMENT(REMAINDER, token); // this is allowed as an assignment shortcut
1269  }
1270  /* this is an operator class */
1271  else
1272  {
1273  token = OPERATOR(DIVIDE); /* this is an operator class */
1274  CHECK_ASSIGNMENT(DIVIDE, token); // this is allowed as an assignment shortcut
1275  }
1276  break;
1277 
1278  case '*': /* asterisk? */
1279  /* this is multiply */
1280  /* next one a star also? */
1281  if (this->nextSpecial('*', location))
1282  {
1283  token = OPERATOR(POWER); /* this is an operator class */
1284  CHECK_ASSIGNMENT(POWER, token); // this is allowed as an assignment shortcut
1285  }
1286  else /* this is an operator class */
1287  {
1288 
1289  token = OPERATOR(MULTIPLY);
1290  CHECK_ASSIGNMENT(MULTIPLY, token); // this is allowed as an assignment shortcut
1291  }
1292  break;
1293 
1294  case '&': /* ampersand? */
1295  /* this is the and operator */
1296  /* next one an ampersand also? */
1297  if (this->nextSpecial('&', location))
1298  {
1299 
1300  token = OPERATOR(XOR); /* this is an operator class */
1301  CHECK_ASSIGNMENT(XOR, token); // this is allowed as an assignment shortcut
1302  }
1303  else /* this is an operator class */
1304  {
1305  token = OPERATOR(AND);
1306  CHECK_ASSIGNMENT(AND, token); // this is allowed as an assignment shortcut
1307  }
1308  break;
1309 
1310  case '|': /* vertical bar? */
1311  /* this is an or operator */
1312  /* next one a vertical bar also? */
1313  if (this->nextSpecial('|', location))
1314  {
1315  /* this is a concatenation */
1316  token = OPERATOR(CONCATENATE);
1317  CHECK_ASSIGNMENT(CONCATENATE, token); // this is allowed as an assignment shortcut
1318  }
1319  else /* this is an operator class */
1320  {
1321 
1322  token = OPERATOR(OR); /* this is the OR operator */
1323  CHECK_ASSIGNMENT(OR, token); // this is allowed as an assignment shortcut
1324  }
1325  break;
1326 
1327  case '=': /* equal sign? */
1328  /* set this an an equal */
1329  /* next one an equal sign also? */
1330  if (this->nextSpecial('=', location))
1331  {
1332  /* this is an operator class */
1333  token = OPERATOR(STRICT_EQUAL);
1334  }
1335  else /* this is an operator class */
1336  {
1337  token = OPERATOR(EQUAL);
1338  }
1339  break;
1340 
1341  case '<': /* less than sign? */
1342  /* next one a less than also? */
1343  if (this->nextSpecial('<', location))
1344  {
1345  /* have an equal sign after that? */
1346  if (this->nextSpecial('=', location))
1347  {
1348  /* this is an operator class */
1349  token = OPERATOR(STRICT_LESSTHAN_EQUAL);
1350  }
1351  else /* this is an operator class */
1352  {
1353  token = OPERATOR(STRICT_LESSTHAN);
1354  }
1355  }
1356  /* next one an equal sign? */
1357  else if (this->nextSpecial('=', location))
1358  {
1359  /* this is the <= operator */
1360  token = OPERATOR(LESSTHAN_EQUAL);
1361  }
1362  /* next one a greater than sign? */
1363  else if (this->nextSpecial('>', location))
1364  {
1365  /* this is the <> operator */
1366  token = OPERATOR(LESSTHAN_GREATERTHAN);
1367  }
1368  else /* this simply the < operator */
1369  {
1370  token = OPERATOR(LESSTHAN);
1371  }
1372  break;
1373 
1374  case '>': /* greater than sign? */
1375  /* next one a greater than also? */
1376  if (this->nextSpecial('>', location))
1377  {
1378  /* have an equal sign after that? */
1379  if (this->nextSpecial('=', location))
1380  {
1381  /* this is the >>= operator */
1382  token = OPERATOR(STRICT_GREATERTHAN_EQUAL);
1383  }
1384  else /* this is the >> operator */
1385  {
1386  token = OPERATOR(STRICT_GREATERTHAN);
1387  }
1388  }
1389  /* next one an equal sign? */
1390  else if (this->nextSpecial('=', location))
1391  {
1392  /* this is the >= operator */
1393  token = OPERATOR(GREATERTHAN_EQUAL);
1394  }
1395  /* next one a less than sign? */
1396  else if (this->nextSpecial('<', location))
1397  {
1398  /* this is the <> operator */
1399  token = OPERATOR(GREATERTHAN_LESSTHAN);
1400  }
1401  else /* this simply the > operator */
1402  {
1403  token = OPERATOR(GREATERTHAN);
1404  }
1405  break;
1406 
1407  case '\\': /* backslash */
1408  /* next one an equal sign? */
1409  if (this->nextSpecial('=', location))
1410  {
1411  /* have an equal sign after that? */
1412  if (this->nextSpecial('=', location))
1413  {
1414  /* this is the \== operator */
1415  token = OPERATOR(STRICT_BACKSLASH_EQUAL);
1416  }
1417  else /* this is the \= operator */
1418  {
1419  token = OPERATOR(BACKSLASH_EQUAL);
1420  }
1421  }
1422  /* next one a greater than sign? */
1423  else if (this->nextSpecial('>', location))
1424  {
1425  /* have another greater than next? */
1426  if (this->nextSpecial('>', location))
1427  {
1428  /* this is the >> operator */
1429  token = OPERATOR(STRICT_BACKSLASH_GREATERTHAN);
1430  }
1431  else /* this is the > operator */
1432  {
1433  token = OPERATOR(BACKSLASH_GREATERTHAN);
1434  }
1435  }
1436  /* next one a less than sign? */
1437  else if (this->nextSpecial('<', location))
1438  {
1439  /* have another less than next? */
1440  if (this->nextSpecial('<', location))
1441  {
1442  /* this is the << operator */
1443  token = OPERATOR(STRICT_BACKSLASH_LESSTHAN);
1444  }
1445  else /* this is the < operator */
1446  {
1447  token = OPERATOR(BACKSLASH_LESSTHAN);
1448  }
1449  }
1450  else /* this is just the NOT operator */
1451  {
1452  token = OPERATOR(BACKSLASH);
1453  }
1454  break;
1455 
1456  // we accept either of these as alternatives
1457  case (unsigned char)0xAA: /* logical not (need unsigned cast) */
1458  case (unsigned char)0xAC: /* logical not (need unsigned cast) */
1459  /* next one an equal sign? */
1460  if (this->nextSpecial('=', location))
1461  {
1462  /* have an equal sign after that? */
1463  if (this->nextSpecial('=', location))
1464  {
1465  /* this is the \== operator */
1466  token = OPERATOR(STRICT_BACKSLASH_EQUAL);
1467  }
1468  else /* this is the \= operator */
1469  {
1470  token = OPERATOR(BACKSLASH_EQUAL);
1471  }
1472  }
1473  /* next one a greater than sign? */
1474  else if (this->nextSpecial('>', location))
1475  {
1476  /* have another greater than next? */
1477  if (this->nextSpecial('>', location))
1478  {
1479  /* this is the >> operator */
1480  token = OPERATOR(STRICT_BACKSLASH_GREATERTHAN);
1481  }
1482  else /* this is the > operator */
1483  {
1484  token = OPERATOR(BACKSLASH_GREATERTHAN);
1485  }
1486  }
1487  /* next one a less than sign? */
1488  else if (this->nextSpecial('<', location))
1489  {
1490  /* have another less than next? */
1491  if (this->nextSpecial('<', location))
1492  {
1493  /* this is the << operator */
1494  token = OPERATOR(STRICT_BACKSLASH_LESSTHAN);
1495  }
1496  else /* this is the < operator */
1497  {
1498  token = OPERATOR(BACKSLASH_LESSTHAN);
1499  }
1500  } /* this is just the BACKSLASH operator */
1501  else
1502  {
1503  token = OPERATOR(BACKSLASH);
1504  }
1505  break;
1506 
1507  default: /* something else found */
1508  /* record current position in clause */
1509  this->clause->setEnd(this->line_number, this->line_offset);
1510  // update the error information
1512  sprintf(badchar, "%c", inch);
1513  sprintf(hexbadchar, "%2.2X", inch);
1514  /* report the error */
1516  break;
1517  }
1518  }
1519  }
1520  break; /* have a token now */
1521  }
1522  return token; /* return the next token */
1523 }
RexxInteger * new_integer(wholenumber_t v)
#define OREF_NULL
Definition: RexxCore.h:60
const int MAX_SYMBOL_LENGTH
Definition: RexxCore.h:74
#define Error_Unmatched_quote_comment
#define Error_Unexpected_curly_bracket
#define Error_Unmatched_quote_single
#define Error_Invalid_character_char
#define Error_Name_too_long_name
#define Error_Invalid_hex_binblank
#define Error_Unmatched_parenthesis_curly
#define Error_Invalid_hex_invbin
#define Error_Invalid_hex_hexblank
#define Error_Unmatched_quote_double
#define Error_Invalid_hex_invhex
#define EXP_EDIGIT
#define OPERATOR(op)
Definition: Scanner.cpp:198
#define GETCHAR()
Definition: Scanner.cpp:196
#define EXP_EXCLUDED
#define MORELINE()
Definition: Scanner.cpp:197
#define AFTER_NUMBER
#define EXP_E
#define HIGHEST_PRECEDENCE
Definition: Scanner.cpp:51
#define EXP_START
#define CHECK_ASSIGNMENT(op, token)
Definition: Scanner.cpp:199
#define AFTER_INTEGER
#define EXP_POINT
#define EXP_DIGIT
#define EXP_SPOINT
#define EXP_ESIGN
RexxString * new_string(const char *s, stringsizeB_t bl, sizeC_t cl=-1)
RexxString * raw_string(stringsizeB_t bl, stringsizeC_t cl=-1)
#define OPERATOR_DIVIDE
Definition: Token.hpp:113
#define TOKEN_LITERAL
Definition: Token.hpp:79
#define OPERATOR_SUBTRACT
Definition: Token.hpp:111
#define OPERATOR_INTDIV
Definition: Token.hpp:114
#define TOKEN_COLON
Definition: Token.hpp:87
#define TOKEN_OPERATOR
Definition: Token.hpp:80
#define OPERATOR_BACKSLASH
Definition: Token.hpp:141
#define OPERATOR_EQUAL
Definition: Token.hpp:120
#define TOKEN_LEFT
Definition: Token.hpp:84
#define SYMBOL_CONSTANT
Definition: Token.hpp:98
#define OPERATOR_XOR
Definition: Token.hpp:140
#define OPERATOR_STRICT_GREATERTHAN_EQUAL
Definition: Token.hpp:134
#define OPERATOR_POWER
Definition: Token.hpp:116
#define TOKEN_BLANK
Definition: Token.hpp:77
#define OPERATOR_BACKSLASH_EQUAL
Definition: Token.hpp:121
#define SYMBOL_DOTSYMBOL
Definition: Token.hpp:104
#define OPERATOR_PLUS
Definition: Token.hpp:110
#define TOKEN_DCOLON
Definition: Token.hpp:92
#define CLAUSEEND_SEMICOLON
Definition: Token.hpp:259
#define LITERAL_HEX
Definition: Token.hpp:106
#define OPERATOR_ABUTTAL
Definition: Token.hpp:117
#define OPERATOR_STRICT_EQUAL
Definition: Token.hpp:128
#define OPERATOR_LESSTHAN_EQUAL
Definition: Token.hpp:127
#define TOKEN_COMMA
Definition: Token.hpp:82
#define OPERATOR_STRICT_BACKSLASH_EQUAL
Definition: Token.hpp:129
#define OPERATOR_GREATERTHAN_LESSTHAN
Definition: Token.hpp:137
#define SYMBOL_COMPOUND
Definition: Token.hpp:101
#define OPERATOR_GREATERTHAN_EQUAL
Definition: Token.hpp:126
#define OPERATOR_BACKSLASH_LESSTHAN
Definition: Token.hpp:125
#define SYMBOL_STEM
Definition: Token.hpp:102
#define OPERATOR_CONCATENATE
Definition: Token.hpp:118
#define OPERATOR_MULTIPLY
Definition: Token.hpp:112
#define OPERATOR_OR
Definition: Token.hpp:139
#define SYMBOL_VARIABLE
Definition: Token.hpp:99
#define OPERATOR_LESSTHAN_GREATERTHAN
Definition: Token.hpp:136
#define OPERATOR_LESSTHAN
Definition: Token.hpp:124
#define TOKEN_SQRIGHT
Definition: Token.hpp:91
#define OPERATOR_STRICT_LESSTHAN_EQUAL
Definition: Token.hpp:135
#define OPERATOR_AND
Definition: Token.hpp:138
#define OPERATOR_GREATERTHAN
Definition: Token.hpp:122
#define CLAUSEEND_EOL
Definition: Token.hpp:260
#define CLAUSEEND_EOF
Definition: Token.hpp:258
#define OPERATOR_STRICT_GREATERTHAN
Definition: Token.hpp:130
#define OPERATOR_REMAINDER
Definition: Token.hpp:115
#define TOKEN_DTILDE
Definition: Token.hpp:89
#define TOKEN_EOC
Definition: Token.hpp:81
#define OPERATOR_STRICT_LESSTHAN
Definition: Token.hpp:132
#define TOKEN_RIGHT
Definition: Token.hpp:85
#define TOKEN_SQLEFT
Definition: Token.hpp:90
#define OPERATOR_BACKSLASH_GREATERTHAN
Definition: Token.hpp:123
#define TOKEN_TILDE
Definition: Token.hpp:88
#define OPERATOR_STRICT_BACKSLASH_LESSTHAN
Definition: Token.hpp:133
#define INTEGER_CONSTANT
Definition: Token.hpp:105
#define TOKEN_SYMBOL
Definition: Token.hpp:78
#define OPERATOR_BLANK
Definition: Token.hpp:119
#define OPERATOR_STRICT_BACKSLASH_GREATERTHAN
Definition: Token.hpp:131
#define SYMBOL_DUMMY
Definition: Token.hpp:103
#define LITERAL_BIN
Definition: Token.hpp:107
#define TOKEN_SOURCE_LITERAL
Definition: Token.hpp:95
static const size_t DEFAULT_DIGITS
Definition: Numerics.hpp:66
RexxToken * newToken(int, int, RexxString *, SourceLocation &)
Definition: Clause.cpp:143
void setEnd(size_t, sizeB_t)
Definition: Clause.cpp:106
size_t free
Definition: Clause.hpp:84
RexxToken * cachedToken
Definition: Clause.hpp:92
const SourceLocation & getLocation()
Definition: Clause.hpp:72
sizeB_t line_offset
Definition: SourceFile.hpp:468
RexxToken * sourceNextToken(RexxToken *)
Definition: Scanner.cpp:672
unsigned int locateToken(RexxToken *)
Definition: Scanner.cpp:287
RexxClause * clause
Definition: SourceFile.hpp:461
static int precedence(RexxToken *)
Definition: Scanner.cpp:53
void startLocation(SourceLocation &)
Definition: Scanner.cpp:201
void syntaxError(int errorcode, RexxInstruction *i)
Definition: SourceFile.hpp:319
size_t line_number
Definition: SourceFile.hpp:467
SourceLocation clauseLocation
Definition: SourceFile.hpp:462
RexxToken * sourceLiteral(size_t, SourceLocation)
Definition: Scanner.cpp:637
bool nextSpecial(unsigned int, SourceLocation &)
Definition: Scanner.cpp:221
static bool isSymbolCharacter(codepoint_t ch)
Definition: SourceFile.hpp:386
void syntaxErrorAt(int errorcode, RexxToken *token)
Definition: SourceFile.hpp:321
size_t line_count
Definition: SourceFile.hpp:466
void endLocation(SourceLocation &)
Definition: Scanner.cpp:211
void comment()
Definition: Scanner.cpp:242
RexxString * packLiteral(sizeB_t, sizeB_t, int)
Definition: Scanner.cpp:425
sizeB_t current_length
Definition: SourceFile.hpp:465
RexxString * commonString(RexxString *)
RexxString * extract(SourceLocation &, bool=false)
const char * current
Definition: SourceFile.hpp:460
void position(size_t, sizeB_t)
Definition: SourceFile.cpp:518
static int translateChar(codepoint_t ch)
Definition: SourceFile.hpp:393
static int characterTable[]
Definition: SourceFile.hpp:546
void nextLine()
Definition: SourceFile.cpp:504
char putCharB(sizeB_t p, char c)
char getCharB(sizeB_t p)
void setUpperOnly()
void setNumeric(int v)
Definition: Token.hpp:441
int precedence
Definition: Token.hpp:451
int classId
Definition: Token.hpp:448
int subclass
Definition: Token.hpp:449
void setEnd(SourceLocation &l)
void setStart(SourceLocation &l)
void setEndOffset(sizeB_t l)
void setLocation(size_t line, sizeB_t offset, size_t end, sizeB_t end_offset, bool limited_trace=false)
void setLimitedTrace(bool b)
int type
Definition: cmdparse.cpp:383
#define size_v(X)
Definition: rexx.h:237
stringsizeB_t sizeB_t
Definition: rexx.h:248