Scanner.cpp
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* */
3 /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */
4 /* Copyright (c) 2005-2009 Rexx Language Association. All rights reserved. */
5 /* */
6 /* This program and the accompanying materials are made available under */
7 /* the terms of the Common Public License v1.0 which accompanies this */
8 /* distribution. A copy is also available at the following address: */
9 /* http://www.oorexx.org/license.html */
10 /* */
11 /* Redistribution and use in source and binary forms, with or */
12 /* without modification, are permitted provided that the following */
13 /* conditions are met: */
14 /* */
15 /* Redistributions of source code must retain the above copyright */
16 /* notice, this list of conditions and the following disclaimer. */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in */
19 /* the documentation and/or other materials provided with the distribution. */
20 /* */
21 /* Neither the name of Rexx Language Association nor the names */
22 /* of its contributors may be used to endorse or promote products */
23 /* derived from this software without specific prior written permission. */
24 /* */
25 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
26 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
27 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS */
28 /* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
29 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
30 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
31 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, */
32 /* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
33 /* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
34 /* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
35 /* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* */
37 /*----------------------------------------------------------------------------*/
38 /******************************************************************************/
39 /* REXX Kernel */
40 /* */
41 /* Scanner portion of the REXX Source File Class */
42 /* */
43 /******************************************************************************/
44 #include <ctype.h>
45 #include <string.h>
46 #include "RexxCore.h"
47 #include "StringClass.hpp"
48 #include "ArrayClass.hpp"
49 #include "SourceFile.hpp"
50 
51 #define HIGHEST_PRECEDENCE 100 // For abuttal inside symbol: 1+2i is parsed as 1+(2i) instead of (1+2)i
52 
54  RexxToken *token) /* target token */
55 /******************************************************************************/
56 /* Fucntion: Determine a token's operator precedence */
57 /******************************************************************************/
58 {
59  if (token->precedence != 0) return token->precedence; // If default precedence has been overriden (ex: abuttal inside symbol)
60 
61  switch (token->subclass)
62  { /* process based on subclass */
63 
64  default:
65  return 0; /* this is the bottom of the heap */
66  break;
67 
68  case OPERATOR_OR:
69  case OPERATOR_XOR:
70  return 1; /* various OR types are next */
71  break;
72 
73  case OPERATOR_AND:
74  return 2; /* AND operator ahead of ORs */
75  break;
76 
77  case OPERATOR_EQUAL: /* comparisons are all together */
81  case OPERATOR_LESSTHAN:
95  return 3; /* concatenates are next */
96  break;
97 
98  case OPERATOR_ABUTTAL:
100  case OPERATOR_BLANK:
101  return 4; /* concatenates are next */
102  break;
103 
104  case OPERATOR_PLUS:
105  case OPERATOR_SUBTRACT:
106  return 5; /* plus and minus next */
107  break;
108 
109  case OPERATOR_MULTIPLY:
110  case OPERATOR_DIVIDE:
111  case OPERATOR_INTDIV:
112  case OPERATOR_REMAINDER:
113  return 6; /* mulitiply and divide afer simples */
114  break;
115 
116  case OPERATOR_POWER:
117  return 7; /* almost the top of the heap */
118  break;
119 
120  case OPERATOR_BACKSLASH:
121  return 8; /* NOT is the top honcho */
122  break;
123  }
124 }
125 
126 /*********************************************************************
127 * The following table detects alphanumeric characters and *
128 * special characters that can be part of an REXX symbol. *
129 * The table also convert lower case letters to upper case. *
130 *********************************************************************/
132 #ifdef EBCDIC
133  // This table was built using the IBM-1047 code page. It should be
134  // universal across all EBCDIC code pages!
135  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
136  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
137  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
138  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
139  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
140  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
141  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
142  0, 0, 0, 0, 74, 75, 0, 0, 0, 0, /* ¢ */
143  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
144  90, 91, 0, 0, 0, 0, 0, 0, 0, 0, /* !$ */
145  0, 0, 0, 0, 0, 0, 0, 0, 0, 109, /* _ */
146  0, 111, 0, 0, 0, 0, 0, 0, 0, 0, /* ? */
147  0, 0, 0, 123, 124, 0, 0, 0, 0, 129, /* #@ a */
148 130, 131, 132, 133, 134, 135, 136, 137, 0, 0, /* bcdefghi */
149  0, 0, 0, 0, 0, 145, 146, 147, 148, 149, /* jklmn */
150 150, 151, 152, 153, 0, 0, 0, 0, 0, 0, /* opqr */
151  0, 0, 162, 163, 164, 165, 166, 167, 168, 169, /* stuvwxyz */
152  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
153  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */
154  0, 0, 0, 193, 194, 195, 196, 197, 198, 199, /* ABCDEFG */
155 200, 201, 0, 0, 0, 0, 0, 0, 0, 209, /* HI J */
156 210, 211, 212, 213, 214, 215, 216, 217, 0, 0, /* KLMNOPQR */
157  0, 0, 0, 0, 0, 0, 226, 227, 228, 229, /* STUV */
158 230, 231, 232, 233, 0, 0, 0, 0, 0, 0, /* WXYZ */
159 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, /* 0123456789 */
160  0, 0, 0, 0, 0, 0 /* */
161 #else
162  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 9 */
163  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 19 */
164  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 29 */
165  0, 0, 0,33, 0, 35,36, 0, 0, 0, /* 30 - 39 (33 is ! 35 is # 36 is $) */
166  0, 0, 0, 0, 0, 0,46, 0,48,49, /* 40 - 49 (46 is . 48 is 0) */
167  50,51,52,53,54, 55,56,57, 0, 0, /* 50 - 59 (57 is 9) */
168  0, 0, 0,63,64, 65,66,67,68,69, /* 60 - 69 (63 is ? 64 is @ 65 is A) */
169  70,71,72,73,74, 75,76,77,78,79, /* 70 - 79 */
170  80,81,82,83,84, 85,86,87,88,89, /* 80 - 89 */
171  90, 0, 0, 0, 0, 95, 0,65,66,67, /* 90 - 99 (95 is _ 97 is a and */
172  /* becomes A) */
173  68,69,70,71,72, 73,74,75,76,77, /* 100 - 109 */
174  78,79,80,81,82, 83,84,85,86,87, /* 110 - 119 */
175  88,89,90, 0, 0, 0, 0, 0, 0, 0, /* 120 - 129 */
176  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 130 - 139 */
177  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 140 - 149 */
178  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 150 - 159 */
179  0, 0,162,0, 0, 0, 0, 0, 0, 0, /* 160 - 169 (162 is ¢) */
180  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 170 - 179 */
181  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 180 - 189 */
182  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 190 - 199 */
183  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 200 - 209 */
184  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 210 - 219 */
185  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 220 - 229 */
186  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 230 - 239 */
187  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 - 249 */
188  0, 0, 0, 0, 0, 0 /* 250 - 255 */
189 #endif
190 };
191 
192  /* some macros for commonly coded */
193  /* scanning operations...mostly to */
194  /* save some keystrokes and make */
195  /* things a little more readable */
196 #define GETCHAR() ((unsigned char)(this->current[this->line_offset]))
197 #define MORELINE() (this->line_offset < this->current_length)
198 #define OPERATOR(op) (this->clause->newToken(TOKEN_OPERATOR, OPERATOR_##op, (RexxString *)OREF_##op, location))
199 #define CHECK_ASSIGNMENT(op, token) (token->checkAssignment(this, (RexxString *)OREF_ASSIGNMENT_##op))
200 
202  SourceLocation &location ) /* token location information */
203 /****************************************************************************/
204 /* Function: Record a tokens starting location */
205 /****************************************************************************/
206 {
207  // copy the start line location
208  location.setStart(line_number, line_offset);
209 }
210 
212  SourceLocation &location ) /* token location information */
213 /****************************************************************************/
214 /* Function: Record a tokens ending location */
215 /****************************************************************************/
216 {
217  // copy the end line location
218  location.setEnd(line_number, line_offset);
219 }
220 
222  unsigned int target, /* desired target character */
223  SourceLocation &location, /* token location information */
224  bool advance)
225 /****************************************************************************/
226 /* Function: Find the next special character and verify against a target */
227 /****************************************************************************/
228 {
229  unsigned int inch = this->locateToken(OREF_NULL); /* find the next token */
230  /* have something else on this line? */
231  if (inch != CLAUSEEND_EOF && inch != CLAUSEEND_EOL)
232  {
233  if (GETCHAR() == target)
234  { /* is the next character a match? */
235  if (advance)
236  {
237  this->line_offset++; /* step over the next */
238  this->endLocation(location); /* update the end location part */
239  }
240  return true; /* got what we need! */
241  }
242  }
243  return false; // didn't find the one we're looking for
244 }
245 
247 /****************************************************************************/
248 /* Function: Scan source to skip over a nest of comments */
249 /****************************************************************************/
250 {
251  int level = 1; /* start the comment nesting */
252  this->line_offset += 2; /* step over the comment start */
253  size_t startline = this->line_number; /* remember the starting position */
254  while (level > 0)
255  { /* while still in a comment nest */
256  /* hit the end of a line? */
257  if (this->line_offset >= this->current_length)
258  {
259  this->nextLine(); /* need to go to the next line */
260  /* no more lines? */
261  if (this->line_number > this->line_count)
262  {
263  /* record current position in clause */
264  this->clause->setEnd(this->line_count, this->line_offset);
265  // update the error information
267  /* error, must report */
268  // The comment can be multiline, so must limit the amount of lines displayed in the error message
271  }
272  continue; /* go loop around */
273  }
274  unsigned int inch = GETCHAR(); /* get the next character */
275  this->line_offset++; /* step past the character */
276  /* is this the end delimeter? */
277  if (inch == '*' && GETCHAR() == '/')
278  {
279  level--; /* reduce the nesting level */
280  this->line_offset++; /* step the pointer over the close */
281  }
282  /* start of a new comment? */
283  else if (inch == '/' && GETCHAR() == '*')
284  {
285  level++; /* increment the level */
286  this->line_offset++; /* step the pointer over new start */
287  }
288  }
289 }
290 
292  RexxToken *previous ) /* previous token */
293 /****************************************************************************/
294 /* Function: Locate next significant token in source, skipping extra */
295 /* blanks and comments. */
296 /****************************************************************************/
297 {
298  size_t startline; /* backward reset line number */
299  size_t startoffset; /* backward reset offset */
300 
301  bool blanks = false; /* are blanks significant? */
302 
303  unsigned int character = 0; /* no specific character type yet */
304  /* check if blanks should be returned*/
305  if (previous != OREF_NULL && /* no previous token, or */
306  /* have a symbol, literal, right */
307  /* paren or right square bracket */
308  (previous->classId == TOKEN_SYMBOL ||
309  previous->classId == TOKEN_LITERAL ||
310  previous->classId == TOKEN_SOURCE_LITERAL ||
311  previous->classId == TOKEN_RIGHT ||
312  previous->classId == TOKEN_SQRIGHT))
313  {
314  blanks = true; /* blanks are significant here */
315  }
316 
317  /* no more lines in file? */
318  if (this->line_number > this->line_count)
319  {
320  character = CLAUSEEND_EOF; /* return an end-of-file */
321  }
322  else if (!MORELINE()) /* reached the line end? */
323  {
324  character = CLAUSEEND_EOL; /* return an end-of-line */
325  }
326  else
327  {
328  /* while more program to scan */
329  while (this->line_offset < this->current_length)
330  {
331  unsigned int inch = GETCHAR(); /* get the next character */
332  if (inch==' ' || inch=='\t')
333  { /* blank or tab? */
334  if (blanks)
335  { /* is this significant? */
336  character = TOKEN_BLANK; /* have a blank character */
337  break; /* got what we need */
338  }
339  else
340  {
341  this->line_offset++; /* step the position */
342  continue; /* go around again */
343  }
344  }
345  /* possible continuation character? */
346  else if (inch == ',' || inch == '-')
347  {
348  /* line comment? */
349  if (inch == '-' && this->line_offset + 1 < this->current_length &&
350  this->current[this->line_offset + 1] == '-')
351  {
352  this->line_offset = this->current_length;
353  break;
354  }
355 
356  character = inch; /* assume for now real character */
357  /* we check for EOL (possibly following blanks and comments) */
358  startoffset = this->line_offset;/* remember the location */
359  startline = this->line_number; /* remember the line position */
360  this->line_offset++; /* step the position */
361 
362  /* skip blanks and comments */
363  while (this->line_offset < this->current_length)
364  {
365  unsigned int inch2 = GETCHAR(); /* pick up the next character */
366  /* comment level start? */
367  if (inch2 == '/' && (this->line_offset + 1 < this->current_length) &&
368  this->current[this->line_offset + 1] == '*')
369  {
370  this->comment(); /* go skip over the comment */
371  continue; /* and continue scanning */
372  }
373  /* line comment? */
374  if (inch2 == '-' && (this->line_offset + 1 < this->current_length) &&
375  this->current[this->line_offset + 1] == '-')
376  {
377  /* go skip overto the end of line */
378  this->line_offset = this->current_length;
379  break;
380  }
381  /* non-blank outside comment */
382  if (inch2 != ' ' && inch2 != '\t')
383  {
384  break; /* done scanning */
385  }
386  this->line_offset++; /* step over this character */
387  }
388  /* found an EOL? */
389  if (this->line_offset >= this->current_length)
390  {
391  /* more lines in file? */
392  if (this->line_number < this->line_count)
393  {
394  this->nextLine(); /* step to the next line */
395  if (blanks)
396  { /* blanks allowed? */
397  character = TOKEN_BLANK; /* make this a blank token */
398  break; /* finished here */
399  }
400  }
401  }
402  else
403  { /* reset to the starting position */
404  this->position(startline, startoffset);
405  character = inch; /* this is a real character */
406  break; /* other non-blank, done scanning */
407  }
408  }
409  /* comment level start? */
410  else if (inch == '/' && (this->line_offset + 1 < this->current_length) &&
411  this->current[this->line_offset + 1] == '*')
412  {
413  this->comment(); /* go skip over the comment */
414  }
415  else
416  { /* got the character */
417  character = inch; /* this is a good character */
418  break; /* done looping */
419  }
420  }
421  if (!MORELINE()) /* fallen off the end of the line? */
422  {
423  character = CLAUSEEND_EOL; /* this is an end of clause */
424  }
425  }
426  return character; /* return the character */
427 }
428 
430  size_t start, /* start of the literal in line */
431  size_t length, /* length of the literal to reduce */
432  int type ) /* type of literal to process */
433 /****************************************************************************/
434 /* Function: Convert and check a hex or binary constant, packing it down */
435 /* into a string object. */
436 /****************************************************************************/
437 {
438  int _first; /* switch to mark first group */
439  int blanks; /* switch to say if scanning blanks */
440  int count; /* count for group */
441  size_t i; /* loop counter */
442  size_t j; /* loop counter */
443  size_t k; /* loop counter */
444  size_t m; /* temporary integer */
445  int byte; /* individual byte of literal */
446  int nibble; /* individual nibble of literal */
447  size_t oddhex; /* odd number of characters in first */
448  size_t inpointer; /* current input position */
449  int outpointer; /* current output pointer */
450  RexxString *value; /* reduced value */
451  size_t real_length; /* real number of digits in string */
452  char error_output[2]; /* used for formatting error */
453 
454  _first = true; /* initialize group flags and */
455  count = 0; /* counters */
456  blanks = false;
457  error_output[1] = '\0'; /* terminate string */
458  /* set initial input/output positions*/
459  inpointer = start; /* get initial starting position */
460 
461  if (length == 0) /* hex or binary null string? */
462  {
463  value = OREF_NULLSTRING; /* this is a null string */
464  }
465  else
466  { /* data to reduce */
467  /* first scan is to check REXX rules for validity of grouping */
468  /* and to remove blanks */
469 
470  real_length = length; /* pick up the string length */
471  for (i = 0; i < length; i++)
472  { /* loop through entire string */
473  /* got a blank? */
474  if (this->current[inpointer] == ' ' || this->current[inpointer] == '\t')
475  {
476  blanks = true; /* remember scanning blanks */
477  /* don't like initial blanks or groups after the first */
478  /* which are not in twos (hex) or fours (binary) */
479  if (i == 0 || /* if at the beginning */
480  (!_first && /* or past first group and not the */
481  /* correct size */
482  (((count&1) && type == LITERAL_HEX) ||
483  ((count&3) && type == LITERAL_BIN))))
484  {
485  m = i+1; /* place holder for new_integer invocation */
486  // update the error information
488  if (type == LITERAL_HEX) /* hex string? */
489  {
490  /* report correct error */
492  }
493  else /* need the binary message */
494  {
496  }
497  }
498  count = 0; /* this starts a new group */
499  real_length--; /* this shortens the value */
500 
501  }
502  else
503  {
504  if (blanks) /* had a blank group? */
505  {
506  _first = false; /* no longer on the lead grouping */
507  }
508  blanks = false; /* not processing blanks now */
509  count++; /* count this significant character */
510  }
511  inpointer++; /* step the input position */
512  }
513 
514  if (blanks || /* trailing blanks or */
515  (!_first && /* last group isn't correct count? */
516  (((count&1) && type == LITERAL_HEX) ||
517  ((count&3) && type == LITERAL_BIN))))
518  {
519  m = i-1; /* place holder for new_integer invocation */
520  // update the error information
522  if (type == LITERAL_HEX) /* hex string? */
523  {
524  /* report correct error */
526  }
527  else /* need the binary message */
528  {
530  }
531  }
532 
533  /* second scan is to create the string value determined by the */
534  /* hex or binary constant. */
535 
536  i = real_length; /* get the adjusted length */
537  /* reset the scan pointers */
538  inpointer = start; /* reset the scan pointer */
539  outpointer = 0; /* set the position a start */
540  if (type == LITERAL_HEX)
541  { /* hex literal? */
542  oddhex = i&1; /* get any odd count */
543  i >>= 1; /* divide by 2 ... and */
544  i += oddhex; /* add in the odd count */
545  value = raw_string(i); /* get the final value */
546 
547  for (j = 0; j < i; j++)
548  { /* loop for the appropriate count */
549  byte = 0; /* current byte is zero */
550  for (k = oddhex; k < 2; k++)
551  { /* loop either 1 or 2 times */
552  /* get the next nibble */
553  nibble = this->current[inpointer];
554  inpointer++; /* step to the next character */
555  while (nibble == ' ' || nibble == '\t')
556  { /* step over any inter-nibble blanks */
557  /* get the next nibble */
558  nibble = this->current[inpointer];
559  inpointer++; /* step to the next character */
560  }
561  /* real digit? */
562  if (nibble >= '0' && nibble <= '9')
563  nibble -= '0'; /* make base zero */
564  /* lowercase hex digit? */
565  else if (nibble >= 'a' && nibble <= 'f')
566  {
567  nibble -= 'a'; /* subtract lowest and */
568  nibble += 10; /* add 10 to digit */
569  } /* uppercase hex digit? */
570  else if (nibble >= 'A' && nibble <= 'F')
571  {
572  nibble -= 'A'; /* subtract lowest and */
573  nibble += 10; /* add 10 to digit */
574  }
575  else
576  {
577  // update the error information
579  error_output[0] = nibble; /* copy the error character */
580  /* report the invalid character */
581  syntaxError(Error_Invalid_hex_invhex, new_string(&error_output[0]));
582  }
583  byte <<= 4; /* shift the last nibble over */
584  byte += nibble; /* add in the next nibble */
585  }
586  oddhex = 0; /* remainder are full bytes */
587  value->putChar(outpointer, byte);/* store this in the output position */
588  outpointer++; /* step to the next position */
589  }
590  value = this->commonString(value); /* now force to a common string */
591  }
592  else
593  { /* convert to binary */
594  oddhex = i&7; /* get the leading byte count */
595  if (oddhex != 0) /* incomplete byte? */
596  {
597  oddhex = 8 - oddhex; /* get the padding count */
598  }
599  i += oddhex; /* and add that into total */
600  i >>= 3; /* get the byte count */
601  value = raw_string(i); /* get the final value */
602 
603  for (j = 0; j < i; j++)
604  { /* loop through the entire string */
605  byte = 0; /* zero the byte */
606  for (k = oddhex; k < 8; k++)
607  { /* loop through each byte segment */
608  /* get the next bit */
609  nibble = this->current[inpointer];
610  inpointer++; /* step to the next character */
611  while (nibble == ' ' || nibble == '\t')
612  { /* step over any inter-nibble blanks */
613  /* get the next nibble */
614  nibble = this->current[inpointer];
615  inpointer++; /* step to the next character */
616  }
617  byte <<= 1; /* shift the accumulator */
618  if (nibble == '1') /* got a one bit? */
619  {
620  byte++; /* add in the bit */
621  }
622  else if (nibble != '0')
623  { /* not a '0' either? */
624  // update the error information
626  error_output[0] = nibble; /* copy the error character */
627  /* report the invalid character */
628  syntaxError(Error_Invalid_hex_invbin, new_string(&error_output[0]));
629  }
630  }
631  oddhex = 0; /* use 8 bits for the remaining group*/
632  value->putChar(outpointer, byte);/* store this in the output position */
633  outpointer++; /* step to the next position */
634  }
635  value = this->commonString(value); /* now force to a common string */
636  }
637  }
638 #if 0
639  if (value != OREF_NULLSTRING)
640  {
641  // Declare that this string is byte encoded
642  ProtectedObject pvalue(value); // The GC can be triggered by messageSend. Already protected by this->commonString(value), but...
643  ProtectedObject result;
644  RexxObject *args[1];
645  args[0] = OREF_BYTE; // positional argument
646  bool messageUnderstood = value->messageSend(OREF_SETENCODING, args, 1, 0, result, false);
647  // OREF_SETENCODING do that: value~!setEncoding(OREF_BYTE); // don't touch value->text (currently OREF_NULL). Maybe will be converted to RexxText during evaluation.
648  }
649 #endif
650  return value; /* return newly created string */
651 }
652 
653 RexxToken *RexxSource::sourceLiteral(size_t clause_free, SourceLocation location)
654 {
655  size_t start = this->line_offset; /* save the starting point */
656  size_t startline = this->line_number; /* remember the starting position */
657  size_t sourceLiteralEnd; /* end of source literal */
658  size_t length; /* length of extracted token */
659  RexxToken *previous = OREF_NULL;
660  for (;;)
661  { /* spin through the source literal */
662  if (this->nextSpecial('}', location))
663  {
664  sourceLiteralEnd = this->line_offset - 1; /* remember end location */
665  break;
666  }
667  RexxToken *token = this->sourceNextToken(previous);
668  if (token == OREF_NULL)
669  { /* reached the end of the source? */
670  this->clause->setEnd(this->line_number, this->line_offset);
672  // The source literal can be multiline, so must limit the amount of lines displayed in the error message
675  }
676  previous = token;
677  }
678  this->clause->setEnd(this->line_number, this->line_offset);
679  length = sourceLiteralEnd - start; /* get length of literal data */
680  RexxString *value = this->extract(location, true);
681  value = this->commonString(value);
682  location.setLimitedTrace(true); /* don't do that before this->extract */
683  this->endLocation(location); /* record the end position */
684  this->clause->free = clause_free; /* all the tokens scanned for the source literal are replaced by the source literal token */
685  return this->clause->newToken(TOKEN_SOURCE_LITERAL, 0, value, location);
686 }
687 
689  RexxToken *previous ) /* previous token scanned off */
690 /*********************************************************************/
691 /* Extract a token from the source and create a new token object. */
692 /* The token type and sub-type are set in the token, and any string */
693 /* value extracted. */
694 /*********************************************************************/
695 {
696  RexxToken *token = OREF_NULL; /* working token */
697  RexxString *value; /* associate string value */
698  unsigned int inch; /* working input character */
699  size_t eoffset; /* location of exponential */
700  int state; /* state of symbol scanning */
701  size_t start; /* scan start location */
702  size_t litend; /* end of literal data */
703  size_t length; /* length of extracted token */
704  int dot_count; /* count of periods in symbol */
705  unsigned int literal_delimiter; /* literal string delimiter */
706  int type; /* type of literal token */
707  size_t i; /* loop counter */
708  size_t j; /* loop counter */
709  int subclass; /* sub type of the token */
710  int numeric; /* numeric type flag */
711  SourceLocation location; /* token location information */
712  char tran; /* translated character */
713  char badchar[4]; /* working buffer for errors */
714  char hexbadchar[4]; /* working buffer for errors */
715 
716  /* definitions of states of exponential numeric scan */
717 #define EXP_START 0
718 #define EXP_EXCLUDED 1
719 #define EXP_DIGIT 2
720 #define EXP_SPOINT 3
721 #define EXP_POINT 4
722 #define EXP_E 5
723 #define EXP_ESIGN 6
724 #define EXP_EDIGIT 7
725 // When parsing a symbol having the form <number><after number>, stop immediatly
726 // after number, where number is such as datatype(number) = "NUM".
727 // Ex: 2a is the number 2 followed by the symbol A.
728 #define AFTER_INTEGER 8
729 #define AFTER_NUMBER 9
730 
731  if (this->clause->cachedToken != OREF_NULL)
732  {
733  token = this->clause->cachedToken;
734  this->clause->cachedToken = OREF_NULL;
735  return token;
736  }
737 
738  for (;;)
739  { /* loop until we find a significant */
740  /* token */
741  inch = this->locateToken(previous);/* locate the next token position */
742 
743  // record a starting location.
745 
746  if (inch == CLAUSEEND_EOF)
747  { /* reach the end of the source? */
748  token = OREF_NULL; /* no token to return */
749  break; /* finished */
750  }
751  else if (inch == CLAUSEEND_EOL)
752  { /* some other end-of-clause */
753  /* make end the end of the line */
754  location.setEndOffset(current_length);
755  /* return a clause terminator */
756  token = this->clause->newToken(TOKEN_EOC, CLAUSEEND_EOL, OREF_NULL, location);
757  this->nextLine(); /* step to the next line */
758  break; /* have something to return */
759  }
760  else if (inch == TOKEN_BLANK )
761  { /* some sort of white space? */
762  /* now go ahead to the next token */
763  inch = this->locateToken(OREF_NULL);
764  /* is this blank significant? */
765  if (inch != CLAUSEEND_EOL && /* not at the end */
766  (isSymbolCharacter(inch) || /* and next is a symbol token */
767  inch == '\"' || /* or start of a " quoted literal */
768  inch == '\'' || /* or start of a ' quoted literal */
769  inch == '(' || /* or a left parenthesis */
770  inch == '[' || /* or a left square bracket */
771  inch == '{' )) /* or a left curly bracket */
772  {
773  /* return blank token */
774  token = this->clause->newToken(TOKEN_BLANK, OPERATOR_BLANK, (RexxString *)OREF_BLANK, location);
775  }
776  else /* non-significant blank */
777  {
778  continue; /* just loop around again */
779  }
780  }
781  else
782  { /* non-special token type */
783  /* process different token types */
784  tran = translateChar(inch); /* do the table mapping */
785  if (tran != 0)
786  { /* have a symbol character? */
787  state = EXP_START; /* in a clean state now */
788  eoffset = 0; /* no exponential sign yet */
789  start = this->line_offset; /* remember token start position */
790  dot_count = 0; /* no periods yet */
791  for (;;)
792  { /* loop through the token */
793  if (inch == '.') /* have a period? */
794  {
795  dot_count++; /* remember we saw this one */
796  }
797 
798  /* finite state machine to establish numeric constant (with possible */
799  /* included sign in exponential form) */
800 
801  switch (state)
802  { /* process based on current state */
803 
804  case EXP_START: /* beginning of scan */
805  /* have a digit at the start? */
806  if (inch >= '0' && inch <= '9')
807  {
808  state = EXP_DIGIT; /* now scanning digits */ // 0..9 ==> EXP_DIGIT
809  }
810  else if (inch == '.') /* start with a decimal point? */
811  {
812  state = EXP_SPOINT; /* now scanning after the decimal */ // . ==> EXP_SPOINT
813  }
814  else /* must be a non-numeric character */
815  {
816  state = EXP_EXCLUDED; /* no longer a number */ // (neither 0..9 nor .) ==> EXP_EXCLUDED
817  }
818  break; /* go process the next character */
819 
820  case EXP_DIGIT: /* have at least one digit mantissa */
821  if (inch=='.') /* decimal point? */
822  {
823  state = EXP_POINT; /* we've hit a decimal point */ // (0..9)+ . ==> EXP_POINT
824  }
825  else if (tran=='E') /* start of exponential? */
826  {
827  state = EXP_E; /* remember we've had the 'E' form */ // (0..9)+ e|E ==> EXP_E
828  }
829  /* non-digit? */
830  else if (inch < '0' || inch > '9')
831  {
832  state = AFTER_INTEGER; // EXP_EXCLUDED; /* no longer scanning a number */ // (0..9)+ (not 0..9) ==> AFTER_NUMBER
833  }
834  /* a digit leaves the state unchanged at EXP_DIGIT */
835  break; /* go get the next character */ // (0..9)+ (0..9) ==> EXP_DIGIT
836 
837  case EXP_SPOINT: /* leading decimal point */
838  /* not a digit? */
839  if (inch < '0' || inch > '9')
840  {
841  state = EXP_EXCLUDED; /* not a number */ // . (not 0..9) ==> EXP_EXCLUDED
842  }
843  else /* digit character */
844  {
845  state = EXP_POINT; /* processing a decimal number */ // . (0..9) ==> EXP_POINT
846  }
847  break; /* go process the next character */
848 
849  case EXP_POINT: /* have a decimal point */
850  if (tran == 'E') /* found the exponential? */
851  {
852  state = EXP_E; /* set exponent state */ // (0..9)+ . (0..9)* e|E ==> EXP_E (yes, can have ZERO digits after decimal point)
853  // . (0..9)+ e|E ==> EXP_E
854  }
855  /* non-digit found? */
856  else if (inch < '0' || inch > '9')
857  {
858  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ . (0..9)* (not 0..9) ==> AFTER_NUMBER (yes, can have ZERO digits after deciaml point)
859  // . (0..9)+ (not 0..9) ==> AFTER_NUMBER
860  }
861  /* a digit leaves the state unchanged at EXP_POINT */
862  break; /* go get another character */ // (0..9)+ . (0..9)* (0..9) ==> EXP_POINT
863  // . (0..9)+ (0..9) ==> EXP_POINT
864  case EXP_E: /* just had an exponent */
865  /* next one a digit? */
866  if (inch >= '0' && inch <= '9')
867  {
868  state = EXP_EDIGIT; /* now looking for exponent digits */ // (0..9)+ e|E (0..9) ==> EXP_EDIGIT
869  // (0..9)+ . (0..9)* e|E (0..9) ==> EXP_EDIGIT
870  // . (0..9)+ e|E (0..9) ==> EXP_EDIGIT
871  }
872 
873  /* a sign will be collected by the apparent end of symbol code below */
874  break; /* finished */
875 
876  case EXP_ESIGN: /* just had a signed exponent */
877  /* got a digit? */
878  if (inch >= '0' && inch <= '9')
879  {
880  state = EXP_EDIGIT; /* now looking for the exponent */ // (0..9)+ e|E +|- (0..9) ==> EXP_EDIGIT
881  // (0..9)+ . (0..9)* e|E +|- (0..9) ==> EXP_EDIGIT
882  // . (0..9)+ e|E +|- (0..9) ==> EXP_EDIGIT
883  }
884  else
885  {
886  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ e|E +|- (not 0..9) ==> AFTER_NUMBER
887  // (0..9)+ . (0..9)* e|E +|- (not 0..9) ==> AFTER_NUMBER
888  // . (0..9)+ e|E +|- (not 0..9) ==> AFTER_NUMBER
889  }
890  break; /* go get the next digits */
891 
892  case EXP_EDIGIT: /* processing the exponent digits */
893  /* not a digit? */
894  if (inch < '0' || inch > '9')
895  {
896  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */ // (0..9)+ e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
897  // (0..9)+ . (0..9)* e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
898  // . (0..9)+ e|E (0..9)+ (not 0..9) ==> AFTER_NUMBER
899  // (0..9)+ e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
900  // (0..9)+ . (0..9)* e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
901  // . (0..9)+ e|E +|- (0..9)+ (not 0..9) ==> AFTER_NUMBER
902  }
903  break; /* go get the next character */ // (0..9)+ e|E (0..9)+ (0..9) ==> EXP_EDIGIT
904  // (0..9)+ . (0..9)* e|E (0..9)+ (0..9) ==> EXP_EDIGIT
905  // . (0..9)+ e|E (0..9)+ (0..9) ==> EXP_EDIGIT
906  // (0..9)+ e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
907  // (0..9)+ . (0..9)* e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
908  // . (0..9)+ e|E +|- (0..9)+ (0..9) ==> EXP_EDIGIT
909 
910  /* once EXP_EXCLUDED is reached the state doesn't change */
911  }
912 
913  if (state == AFTER_INTEGER || state == AFTER_NUMBER)
914  {
915  break;
916  }
917 
918  if (state == EXP_E && eoffset == 0)
919  {
920  eoffset = this->line_offset; // remember current position BEFORE skipping e|E : in case of bad exponent, I don't want to include e|E in the number.
921  }
922 
923  this->line_offset++; /* step the source pointer */
924 
925  if (state == EXP_EDIGIT)
926  {
927  eoffset = this->line_offset; // any digit after e|E is part of the number
928  }
929 
930  /* had a bad exponent part? */
931  //if (eoffset != 0 && state == EXP_EXCLUDED)
932  //{
933  // /* back up the scan pointer */
934  // this->line_offset = eoffset;
935  // break; /* and we're finished with this */
936  //}
937 
938  if (!MORELINE()) /* reached the end of the line? */
939  {
940  break; /* done processing */
941  }
942 
943  inch = GETCHAR(); /* get the next character */
944  tran = translateChar(inch); /* translate the next character */
945  if (tran != 0) /* good symbol character? */
946  {
947  continue; /* loop through the state machine */
948  }
949  /* check for sign in correct state */
950  if (state == EXP_E && (inch == '+' || inch == '-'))
951  {
952  // /* remember current position */
953  // eoffset = this->line_offset;
954  state = EXP_ESIGN; /* now looking for the exponent */ // (0..9)+ e|E +|- ==> EXP_ESIGN
955  // (0..9)+ . (0..9)* e|E +|- ==> EXP_ESIGN
956  // . (0..9)+ e|E +|- ==> EXP_ESIGN
957  this->line_offset++; /* step past the sign */
958  if (!MORELINE())
959  { /* reached the end of the line? */
960  state = AFTER_NUMBER; // EXP_EXCLUDED; /* can't be a number */
961  break; /* quit looping */
962  }
963  inch = GETCHAR(); /* get the next character */
964  tran = translateChar(inch);/* translate the next character */
965  if (tran != 0) /* good character? */
966  {
967  continue; /* loop around */
968  }
969  else
970  { /* bad character */
971  state = AFTER_NUMBER; // EXP_EXCLUDED; /* not a number */
972  break; /* break out of here */
973  }
974  }
975  else
976  {
977  break; /* reached a non-symbol character */
978  }
979  }
980  /* this must be the end of the symbol - check whether we have too much */
981  /* need to step backward? */
982  if (eoffset != 0 && state != EXP_EDIGIT)
983  {
984  this->line_offset = eoffset; /* restore the source pointer */
985  }
986  /* get the token length */
987  length = this->line_offset - start;
988  value = raw_string(length); /* get the final value */
989  numeric = 0; /* not a numeric constant yet */
990  for (i = 0; i < length; i++)
991  { /* copy over and translate the value */
992  /* copy over the symbol value */
993  /* (translating to uppercase */
994  /* get the next character */
995  inch = this->current[start + i];
996  if (isSymbolCharacter(inch)) /* normal symbol character (not +/-) */
997  {
998  inch = translateChar(inch); /* translate to uppercase */
999  }
1000  value->putChar(i, inch);
1001  }
1002  value->setUpperOnly(); /* only contains uppercase */
1003  /* now force to a common string */
1004  value = this->commonString(value);
1005  /* record current position in clause */
1006  this->clause->setEnd(this->line_number, this->line_offset);
1007  if (length > (size_t)MAX_SYMBOL_LENGTH)/* result too long? */
1008  {
1009  // update the error information
1011  /* report the error */
1013  }
1014  inch = this->current[start]; /* get the first character */
1015  if (length == 1 && inch == '.')/* have a solo period? */
1016  {
1017  subclass = SYMBOL_DUMMY; /* this is the place holder */
1018  }
1019  /* have a digit? */
1020  else if (inch >= '0' && inch <= '9')
1021  {
1022  subclass = SYMBOL_CONSTANT; /* have a constant symbol */
1023  /* can we optimize to an integer? */
1024  if ((state == EXP_DIGIT || state == AFTER_INTEGER) && length < Numerics::DEFAULT_DIGITS)
1025  {
1026  /* no leading zero or only zero? */
1027  if (inch != '0' || length == 1)
1028  {
1029  /* we can make this an integer object*/
1030  numeric = INTEGER_CONSTANT;
1031  }
1032  }
1033  }
1034  else if (inch == '.')
1035  {
1036  /* this is an environment symbol */
1037  subclass = SYMBOL_DOTSYMBOL;
1038  }
1039  else
1040  { /* variable type symbol */
1041  /* set the default extended type */
1042  subclass = SYMBOL_VARIABLE;
1043  if (dot_count > 0)
1044  { /* have a period in the name? */
1045  /* end in a dot? */
1046  if (dot_count == 1 && value->getChar(length-1) == '.')
1047  {
1048  /* this is a stem variable */
1049  subclass = SYMBOL_STEM;
1050  }
1051  else /* have a compound variable */
1052  {
1053  subclass = SYMBOL_COMPOUND;
1054  }
1055  }
1056  }
1057  this->endLocation(location); /* record the end position */
1058  /* get a symbol token */
1059  token = this->clause->newToken(TOKEN_SYMBOL, subclass, value, location);
1060  token->setNumeric(numeric); /* record any numeric side info */
1061 
1062  if (state == AFTER_INTEGER || state == AFTER_NUMBER)
1063  {
1064  // The tokenizer has splitted a symbol of the form <number><after number> in two distinct tokens.
1065  // An abuttal operator is inserted to re-concatenate <number> with <after number>.
1066  // In this context, the precedence of this abuttal operator is very high, to ensure both tokens are always linked together.
1067  SourceLocation location;
1068  this->startLocation(location); // The token abuttal starts at current source position
1069  this->endLocation(location); // The token abuttal ends at current source position (empty string)
1070  // Creates the token of the abuttal operator.
1071  RexxToken *token = this->clause->newToken(TOKEN_OPERATOR, OPERATOR_ABUTTAL, OREF_NULLSTRING, location);
1072  token->precedence = HIGHEST_PRECEDENCE;
1073  this->clause->cachedToken = token; // Will be returned on next call
1074  }
1075  }
1076  /* start of a quoted string? */
1077  else if (inch=='\'' || inch=='\"')
1078  {
1079  literal_delimiter = inch; /* save the starting character */
1080  start = this->line_offset + 1; /* save the starting point */
1081  dot_count = 0; /* no doubled quotes yet */
1082  type = 0; /* working with a straight literal */
1083  for (;;)
1084  { /* spin through the string */
1085  this->line_offset++; /* step the pointer */
1086  if (!MORELINE())
1087  { /* reached the end of the line? */
1088  /* record current position in clause */
1089  this->clause->setEnd(this->line_number, this->line_offset);
1090  // update the error information
1092  if (literal_delimiter == '\'')
1093  {
1094  /* raise the appropriate error */
1096  }
1097  else
1098  {
1099  /* must be a double quote */
1101  }
1102  }
1103  inch = GETCHAR(); /* get the next character */
1104  /* is this the delimiter? */
1105  if (literal_delimiter == inch)
1106  {
1107  /* remember end location */
1108  litend = this->line_offset - 1;
1109  this->line_offset++; /* step to the next character */
1110  if (!MORELINE()) /* end of the line? */
1111  {
1112  break; /* we're finished */
1113  }
1114  inch = GETCHAR(); /* get the next character */
1115  /* not a doubled quote? */
1116  if (inch != literal_delimiter)
1117  {
1118  break; /* got the end */
1119  }
1120  dot_count++; /* remember count of doubled quotes */
1121  }
1122  }
1123  if (MORELINE())
1124  { /* have more on this line? */
1125  inch = GETCHAR(); /* get the next character */
1126  /* potentially a hex string? */
1127  if (inch == 'x' || inch == 'X')
1128  {
1129  this->line_offset++; /* step to the next character */
1130  /* the end of the line, or */
1131  /* have another symbol character */
1132  if (MORELINE() && isSymbolCharacter(GETCHAR()))
1133  {
1134  this->line_offset--; /* step back to the X */
1135  }
1136  else
1137  {
1138  type = LITERAL_HEX; /* set the appropriate type */
1139  }
1140  }
1141  /* potentially a binary string? */
1142  else if (inch == 'b' || inch == 'B')
1143  {
1144  this->line_offset++; /* step to the next character */
1145  /* the end of the line, or */
1146  /* have another symbol character */
1147  if (MORELINE() && isSymbolCharacter(GETCHAR()))
1148  {
1149  this->line_offset--; /* step back to the B */
1150  }
1151  else
1152  {
1153  type = LITERAL_BIN; /* set the appropriate type */
1154  }
1155  }
1156  }
1157  length = litend - start + 1; /* calculate the literal length */
1158  /* record current position in clause */
1159  this->clause->setEnd(this->line_number, this->line_offset);
1160  if (type) /* need to pack a literal? */
1161  {
1162  /* compress into packed form */
1163  value = this->packLiteral(start, litend - start + 1, type) ;
1164  }
1165  else
1166  {
1167  length = litend - start + 1; /* get length of literal data */
1168  /* get the final value string */
1169  value = raw_string(length - dot_count);
1170  /* copy over and translate the value */
1171  for (i = 0, j = 0; j < length; i++, j++)
1172  {
1173  /* get the next character */
1174  inch = this->current[start + j];
1175  /* same as our delimiter? */
1176  if (inch == literal_delimiter)
1177  {
1178  j++; /* step one extra */
1179  }
1180  value->putChar(i, inch); /* copy over the literal data */
1181  }
1182  /* now force to a common string */
1183  value = this->commonString(value);
1184  }
1185  this->endLocation(location); /* record the end position */
1186  /* get a string token */
1187  token = this->clause->newToken(TOKEN_LITERAL, 0, value, location);
1188  }
1189  else
1190  { /* other special character */
1191  this->line_offset++; /* step past it */
1192 
1193  // Negation '¬' in UTF-8 (C2AC)
1194  if (inch == (unsigned char)0xC2 && this->nextSpecial((unsigned char)0xAC, location, false))
1195  {
1196  this->nextSpecial((unsigned char)0xAC, location); // skip AC
1197  inch = '\\';
1198  }
1199 
1200  switch (inch)
1201  { /* process operators and punctuation */
1202 
1203  case ')': /* right parenthesis? */
1204  /* this is a special character class */
1205  token = this->clause->newToken(TOKEN_RIGHT, 0, OREF_NULL, location);
1206  break;
1207 
1208  case ']': /* right square bracket */
1209  /* this is a special character class */
1210  token = this->clause->newToken(TOKEN_SQRIGHT, 0, OREF_NULL, location);
1211  break;
1212 
1213  case '}':
1214  /* end of source literal, but when seen here, it's an error */
1216 
1217  case '(': /* left parenthesis */
1218  /* this is a special character class */
1219  token = this->clause->newToken(TOKEN_LEFT, 0, OREF_NULL, location);
1220  break;
1221 
1222  case '[': /* left square bracket */
1223  /* this is a special character class */
1224  token = this->clause->newToken(TOKEN_SQLEFT, 0, OREF_NULL, location);
1225  break;
1226 
1227  case '{':
1228  /* start of source literal */
1229  token = this->sourceLiteral(this->clause->free, location);
1230  break;
1231 
1232  case ',': /* comma */
1233  /* this is a special character class */
1234  token = this->clause->newToken(TOKEN_COMMA, 0, OREF_NULL, location);
1235  break;
1236 
1237  case ';': /* semicolon */
1238  /* this is a special character class */
1239  token = this->clause->newToken(TOKEN_EOC, CLAUSEEND_SEMICOLON, OREF_NULL, location);
1240  break;
1241 
1242  case ':': /* colon */
1243  /* next one a colon also? */
1244  if (this->nextSpecial(':', location))
1245  {
1246  /* this is a special character class */
1247  token = this->clause->newToken(TOKEN_DCOLON, 0, OREF_NULL, location);
1248  }
1249  else
1250  {
1251  /* this is a special character class */
1252  token = this->clause->newToken(TOKEN_COLON, 0, OREF_NULL, location);
1253  }
1254  break;
1255 
1256  case '~': /* message send? */
1257  /* next one a tilde also? */
1258  if (this->nextSpecial('~', location))
1259  /* this is a special character class */
1260  token = this->clause->newToken(TOKEN_DTILDE, 0, OREF_NULL, location);
1261  else
1262  /* this is a special character class */
1263  token = this->clause->newToken(TOKEN_TILDE, 0, OREF_NULL, location);
1264  break;
1265 
1266  case '+': /* plus sign */
1267  /* addition operator */
1268  token = OPERATOR(PLUS); /* this is an operator class */
1269  CHECK_ASSIGNMENT(PLUS, token); // this is allowed as an assignment shortcut
1270  break;
1271 
1272  case '-': /* minus sign */
1273  /* subtraction operator */
1274  token = OPERATOR(SUBTRACT); /* this is an operator class */
1275  CHECK_ASSIGNMENT(SUBTRACT, token); // this is allowed as an assignment shortcut
1276  break;
1277 
1278  case '%': /* percent sign */
1279  /* integer divide operator */
1280  token = OPERATOR(INTDIV); /* this is an operator class */
1281  CHECK_ASSIGNMENT(INTDIV, token); // this is allowed as an assignment shortcut
1282  break;
1283 
1284  case '/': /* forward slash */
1285  /* this is division */
1286  /* next one a slash also? */
1287  if (this->nextSpecial('/', location))
1288  {
1289 
1290  token = OPERATOR(REMAINDER);
1291  CHECK_ASSIGNMENT(REMAINDER, token); // this is allowed as an assignment shortcut
1292  }
1293  // The operators /= and /== are supported in TSO/E REXX as alternatives to \= and \==, respectively.
1294  else if (this->nextSpecial('=', location))
1295  {
1296  if (this->nextSpecial('=', location)) token = OPERATOR(STRICT_BACKSLASH_EQUAL); // /== is equvalent to \==
1297  else token = OPERATOR(BACKSLASH_EQUAL); // /= is equvalent to \=
1298  }
1299 
1300  /* this is an operator class */
1301  else
1302  {
1303  token = OPERATOR(DIVIDE); /* this is an operator class */
1304  CHECK_ASSIGNMENT(DIVIDE, token); // this is allowed as an assignment shortcut
1305  }
1306  break;
1307 
1308  case '*': /* asterisk? */
1309  /* this is multiply */
1310  /* next one a star also? */
1311  if (this->nextSpecial('*', location))
1312  {
1313  token = OPERATOR(POWER); /* this is an operator class */
1314  CHECK_ASSIGNMENT(POWER, token); // this is allowed as an assignment shortcut
1315  }
1316  else /* this is an operator class */
1317  {
1318 
1319  token = OPERATOR(MULTIPLY);
1320  CHECK_ASSIGNMENT(MULTIPLY, token); // this is allowed as an assignment shortcut
1321  }
1322  break;
1323 
1324  case '&': /* ampersand? */
1325  /* this is the and operator */
1326  /* next one an ampersand also? */
1327  if (this->nextSpecial('&', location))
1328  {
1329 
1330  token = OPERATOR(XOR); /* this is an operator class */
1331  CHECK_ASSIGNMENT(XOR, token); // this is allowed as an assignment shortcut
1332  }
1333  else /* this is an operator class */
1334  {
1335  token = OPERATOR(AND);
1336  CHECK_ASSIGNMENT(AND, token); // this is allowed as an assignment shortcut
1337  }
1338  break;
1339 
1340  case '|': /* vertical bar? */
1341  /* this is an or operator */
1342  /* next one a vertical bar also? */
1343  if (this->nextSpecial('|', location))
1344  {
1345  /* this is a concatenation */
1346  token = OPERATOR(CONCATENATE);
1347  CHECK_ASSIGNMENT(CONCATENATE, token); // this is allowed as an assignment shortcut
1348  }
1349  else /* this is an operator class */
1350  {
1351 
1352  token = OPERATOR(OR); /* this is the OR operator */
1353  CHECK_ASSIGNMENT(OR, token); // this is allowed as an assignment shortcut
1354  }
1355  break;
1356 
1357  case '=': /* equal sign? */
1358  /* set this an an equal */
1359  /* next one an equal sign also? */
1360  if (this->nextSpecial('=', location))
1361  {
1362  /* this is an operator class */
1363  token = OPERATOR(STRICT_EQUAL);
1364  }
1365  else /* this is an operator class */
1366  {
1367  token = OPERATOR(EQUAL);
1368  }
1369  break;
1370 
1371  case '<': /* less than sign? */
1372  /* next one a less than also? */
1373  if (this->nextSpecial('<', location))
1374  {
1375  /* have an equal sign after that? */
1376  if (this->nextSpecial('=', location))
1377  {
1378  /* this is an operator class */
1379  token = OPERATOR(STRICT_LESSTHAN_EQUAL);
1380  }
1381  else /* this is an operator class */
1382  {
1383  token = OPERATOR(STRICT_LESSTHAN);
1384  }
1385  }
1386  /* next one an equal sign? */
1387  else if (this->nextSpecial('=', location))
1388  {
1389  /* this is the <= operator */
1390  token = OPERATOR(LESSTHAN_EQUAL);
1391  }
1392  /* next one a greater than sign? */
1393  else if (this->nextSpecial('>', location))
1394  {
1395  /* this is the <> operator */
1396  token = OPERATOR(LESSTHAN_GREATERTHAN);
1397  }
1398  else /* this simply the < operator */
1399  {
1400  token = OPERATOR(LESSTHAN);
1401  }
1402  break;
1403 
1404  case '>': /* greater than sign? */
1405  /* next one a greater than also? */
1406  if (this->nextSpecial('>', location))
1407  {
1408  /* have an equal sign after that? */
1409  if (this->nextSpecial('=', location))
1410  {
1411  /* this is the >>= operator */
1412  token = OPERATOR(STRICT_GREATERTHAN_EQUAL);
1413  }
1414  else /* this is the >> operator */
1415  {
1416  token = OPERATOR(STRICT_GREATERTHAN);
1417  }
1418  }
1419  /* next one an equal sign? */
1420  else if (this->nextSpecial('=', location))
1421  {
1422  /* this is the >= operator */
1423  token = OPERATOR(GREATERTHAN_EQUAL);
1424  }
1425  /* next one a less than sign? */
1426  else if (this->nextSpecial('<', location))
1427  {
1428  /* this is the <> operator */
1429  token = OPERATOR(GREATERTHAN_LESSTHAN);
1430  }
1431  else /* this simply the > operator */
1432  {
1433  token = OPERATOR(GREATERTHAN);
1434  }
1435  break;
1436 
1437  case '\\': /* backslash */
1438 
1439  // we accept either of these as alternatives
1440  case (unsigned char)0xAA: /* logical not (need unsigned cast) */
1441  case (unsigned char)0xAC: /* logical not (need unsigned cast) */
1442 
1443  // extension (supported by Regina)
1444  case '^':
1445 
1446  /* next one an equal sign? */
1447  if (this->nextSpecial('=', location))
1448  {
1449  /* have an equal sign after that? */
1450  if (this->nextSpecial('=', location))
1451  {
1452  /* this is the \== operator */
1453  token = OPERATOR(STRICT_BACKSLASH_EQUAL);
1454  }
1455  else /* this is the \= operator */
1456  {
1457  token = OPERATOR(BACKSLASH_EQUAL);
1458  }
1459  }
1460  /* next one a greater than sign? */
1461  else if (this->nextSpecial('>', location))
1462  {
1463  /* have another greater than next? */
1464  if (this->nextSpecial('>', location))
1465  {
1466  /* this is the >> operator */
1467  token = OPERATOR(STRICT_BACKSLASH_GREATERTHAN);
1468  }
1469  else /* this is the > operator */
1470  {
1471  token = OPERATOR(BACKSLASH_GREATERTHAN);
1472  }
1473  }
1474  /* next one a less than sign? */
1475  else if (this->nextSpecial('<', location))
1476  {
1477  /* have another less than next? */
1478  if (this->nextSpecial('<', location))
1479  {
1480  /* this is the << operator */
1481  token = OPERATOR(STRICT_BACKSLASH_LESSTHAN);
1482  }
1483  else /* this is the < operator */
1484  {
1485  token = OPERATOR(BACKSLASH_LESSTHAN);
1486  }
1487  }
1488  else /* this is just the NOT operator */
1489  {
1490  token = OPERATOR(BACKSLASH);
1491  }
1492  break;
1493 
1494  default: /* something else found */
1495  /* record current position in clause */
1496  this->clause->setEnd(this->line_number, this->line_offset);
1497  // update the error information
1499  snprintf(badchar, sizeof badchar, "%c", inch);
1500  snprintf(hexbadchar, sizeof badchar, "%2.2X", inch);
1501  /* report the error */
1503  break;
1504  }
1505  }
1506  }
1507  break; /* have a token now */
1508  }
1509  return token; /* return the next token */
1510 }
RexxInteger * new_integer(wholenumber_t v)
#define OREF_NULL
Definition: RexxCore.h:61
const int MAX_SYMBOL_LENGTH
Definition: RexxCore.h:77
#define Error_Unmatched_quote_comment
#define Error_Unexpected_curly_bracket
#define Error_Unmatched_quote_single
#define Error_Invalid_character_char
#define Error_Name_too_long_name
#define Error_Invalid_hex_binblank
#define Error_Unmatched_parenthesis_curly
#define Error_Invalid_hex_invbin
#define Error_Invalid_hex_hexblank
#define Error_Unmatched_quote_double
#define Error_Invalid_hex_invhex
#define EXP_EDIGIT
#define OPERATOR(op)
Definition: Scanner.cpp:198
#define GETCHAR()
Definition: Scanner.cpp:196
#define EXP_EXCLUDED
#define MORELINE()
Definition: Scanner.cpp:197
#define AFTER_NUMBER
#define EXP_E
#define HIGHEST_PRECEDENCE
Definition: Scanner.cpp:51
#define EXP_START
#define CHECK_ASSIGNMENT(op, token)
Definition: Scanner.cpp:199
#define AFTER_INTEGER
#define EXP_POINT
#define EXP_DIGIT
#define EXP_SPOINT
#define EXP_ESIGN
RexxString * raw_string(stringsize_t l)
RexxString * new_string(const char *s, stringsize_t l)
#define OPERATOR_DIVIDE
Definition: Token.hpp:113
#define TOKEN_LITERAL
Definition: Token.hpp:79
#define OPERATOR_SUBTRACT
Definition: Token.hpp:111
#define OPERATOR_INTDIV
Definition: Token.hpp:114
#define TOKEN_COLON
Definition: Token.hpp:87
#define TOKEN_OPERATOR
Definition: Token.hpp:80
#define OPERATOR_BACKSLASH
Definition: Token.hpp:141
#define OPERATOR_EQUAL
Definition: Token.hpp:120
#define TOKEN_LEFT
Definition: Token.hpp:84
#define SYMBOL_CONSTANT
Definition: Token.hpp:98
#define OPERATOR_XOR
Definition: Token.hpp:140
#define OPERATOR_STRICT_GREATERTHAN_EQUAL
Definition: Token.hpp:134
#define OPERATOR_POWER
Definition: Token.hpp:116
#define TOKEN_BLANK
Definition: Token.hpp:77
#define OPERATOR_BACKSLASH_EQUAL
Definition: Token.hpp:121
#define SYMBOL_DOTSYMBOL
Definition: Token.hpp:104
#define OPERATOR_PLUS
Definition: Token.hpp:110
#define TOKEN_DCOLON
Definition: Token.hpp:92
#define CLAUSEEND_SEMICOLON
Definition: Token.hpp:259
#define LITERAL_HEX
Definition: Token.hpp:106
#define OPERATOR_ABUTTAL
Definition: Token.hpp:117
#define OPERATOR_STRICT_EQUAL
Definition: Token.hpp:128
#define OPERATOR_LESSTHAN_EQUAL
Definition: Token.hpp:127
#define TOKEN_COMMA
Definition: Token.hpp:82
#define OPERATOR_STRICT_BACKSLASH_EQUAL
Definition: Token.hpp:129
#define OPERATOR_GREATERTHAN_LESSTHAN
Definition: Token.hpp:137
#define SYMBOL_COMPOUND
Definition: Token.hpp:101
#define OPERATOR_GREATERTHAN_EQUAL
Definition: Token.hpp:126
#define OPERATOR_BACKSLASH_LESSTHAN
Definition: Token.hpp:125
#define SYMBOL_STEM
Definition: Token.hpp:102
#define OPERATOR_CONCATENATE
Definition: Token.hpp:118
#define OPERATOR_MULTIPLY
Definition: Token.hpp:112
#define OPERATOR_OR
Definition: Token.hpp:139
#define SYMBOL_VARIABLE
Definition: Token.hpp:99
#define OPERATOR_LESSTHAN_GREATERTHAN
Definition: Token.hpp:136
#define OPERATOR_LESSTHAN
Definition: Token.hpp:124
#define TOKEN_SQRIGHT
Definition: Token.hpp:91
#define OPERATOR_STRICT_LESSTHAN_EQUAL
Definition: Token.hpp:135
#define OPERATOR_AND
Definition: Token.hpp:138
#define OPERATOR_GREATERTHAN
Definition: Token.hpp:122
#define CLAUSEEND_EOL
Definition: Token.hpp:260
#define CLAUSEEND_EOF
Definition: Token.hpp:258
#define OPERATOR_STRICT_GREATERTHAN
Definition: Token.hpp:130
#define OPERATOR_REMAINDER
Definition: Token.hpp:115
#define TOKEN_DTILDE
Definition: Token.hpp:89
#define TOKEN_EOC
Definition: Token.hpp:81
#define OPERATOR_STRICT_LESSTHAN
Definition: Token.hpp:132
#define TOKEN_RIGHT
Definition: Token.hpp:85
#define TOKEN_SQLEFT
Definition: Token.hpp:90
#define OPERATOR_BACKSLASH_GREATERTHAN
Definition: Token.hpp:123
#define TOKEN_TILDE
Definition: Token.hpp:88
#define OPERATOR_STRICT_BACKSLASH_LESSTHAN
Definition: Token.hpp:133
#define INTEGER_CONSTANT
Definition: Token.hpp:105
#define TOKEN_SYMBOL
Definition: Token.hpp:78
#define OPERATOR_BLANK
Definition: Token.hpp:119
#define OPERATOR_STRICT_BACKSLASH_GREATERTHAN
Definition: Token.hpp:131
#define SYMBOL_DUMMY
Definition: Token.hpp:103
#define LITERAL_BIN
Definition: Token.hpp:107
#define TOKEN_SOURCE_LITERAL
Definition: Token.hpp:95
static const size_t DEFAULT_DIGITS
Definition: Numerics.hpp:66
void setEnd(size_t, size_t)
Definition: Clause.cpp:106
RexxToken * newToken(int, int, RexxString *, SourceLocation &)
Definition: Clause.cpp:143
size_t free
Definition: Clause.hpp:84
RexxToken * cachedToken
Definition: Clause.hpp:92
const SourceLocation & getLocation()
Definition: Clause.hpp:72
bool messageSend(RexxString *, RexxObject **, size_t, size_t, ProtectedObject &, bool processUnknown=true, bool dynamicTarget=true)
RexxToken * sourceNextToken(RexxToken *)
Definition: Scanner.cpp:688
unsigned int locateToken(RexxToken *)
Definition: Scanner.cpp:291
RexxClause * clause
Definition: SourceFile.hpp:462
static int precedence(RexxToken *)
Definition: Scanner.cpp:53
void startLocation(SourceLocation &)
Definition: Scanner.cpp:201
void syntaxError(int errorcode, RexxInstruction *i)
Definition: SourceFile.hpp:320
size_t line_number
Definition: SourceFile.hpp:468
SourceLocation clauseLocation
Definition: SourceFile.hpp:463
RexxToken * sourceLiteral(size_t, SourceLocation)
Definition: Scanner.cpp:653
static bool isSymbolCharacter(codepoint_t ch)
Definition: SourceFile.hpp:387
void syntaxErrorAt(int errorcode, RexxToken *token)
Definition: SourceFile.hpp:322
size_t line_count
Definition: SourceFile.hpp:467
void endLocation(SourceLocation &)
Definition: Scanner.cpp:211
size_t current_length
Definition: SourceFile.hpp:466
size_t line_offset
Definition: SourceFile.hpp:469
void comment()
Definition: Scanner.cpp:246
bool nextSpecial(unsigned int, SourceLocation &, bool advance=true)
Definition: Scanner.cpp:221
void position(size_t, size_t)
Definition: SourceFile.cpp:518
RexxString * commonString(RexxString *)
RexxString * extract(SourceLocation &, bool=false)
const char * current
Definition: SourceFile.hpp:461
RexxString * packLiteral(size_t, size_t, int)
Definition: Scanner.cpp:429
static int translateChar(codepoint_t ch)
Definition: SourceFile.hpp:394
static int characterTable[]
Definition: SourceFile.hpp:547
void nextLine()
Definition: SourceFile.cpp:504
char putChar(size_t p, char c)
void setUpperOnly()
char getChar(size_t p)
void setNumeric(int v)
Definition: Token.hpp:442
int precedence
Definition: Token.hpp:452
int classId
Definition: Token.hpp:449
int subclass
Definition: Token.hpp:450
void setEnd(SourceLocation &l)
void setStart(SourceLocation &l)
void setLimitedTrace(bool b)
void setEndOffset(size_t l)
void setLocation(size_t line, size_t offset, size_t end, size_t end_offset, bool limited_trace=false)
int type
Definition: cmdparse.cpp:1888