StringUtil.cpp
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* */
3 /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */
4 /* Copyright (c) 2005-2009 Rexx Language Association. All rights reserved. */
5 /* */
6 /* This program and the accompanying materials are made available under */
7 /* the terms of the Common Public License v1.0 which accompanies this */
8 /* distribution. A copy is also available at the following address: */
9 /* http://www.oorexx.org/license.html */
10 /* */
11 /* Redistribution and use in source and binary forms, with or */
12 /* without modification, are permitted provided that the following */
13 /* conditions are met: */
14 /* */
15 /* Redistributions of source code must retain the above copyright */
16 /* notice, this list of conditions and the following disclaimer. */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in */
19 /* the documentation and/or other materials provided with the distribution. */
20 /* */
21 /* Neither the name of Rexx Language Association nor the names */
22 /* of its contributors may be used to endorse or promote products */
23 /* derived from this software without specific prior written permission. */
24 /* */
25 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
26 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
27 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS */
28 /* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
29 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
30 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
31 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, */
32 /* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
33 /* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
34 /* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
35 /* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* */
37 /*----------------------------------------------------------------------------*/
38 /******************************************************************************/
39 /* REXX Kernel */
40 /* */
41 /* String Utilities shared between String class and MutableBuffer class */
42 /* */
43 /******************************************************************************/
44 #include <ctype.h>
45 #include <string.h>
46 
47 #include "RexxCore.h"
48 #include "StringClass.hpp"
49 #include "ProtectedObject.hpp"
50 #include "StringUtil.hpp"
51 #include "QueueClass.hpp"
52 
53 
54 /**
55  * Extract a substring from a data buffer.
56  *
57  * @param string The data buffer.
58  * @param stringLength
59  * The length of the buffer.
60  * @param _position The position argument for the starting position.
61  * @param _length The substring length argument.
62  * @param pad The padding argument.
63  *
64  * @return The extracted substring.
65  */
66 RexxString *StringUtil::substr(const char *string, size_t stringLength, RexxInteger *_position,
67  RexxInteger *_length, RexxString *pad)
68 {
69  size_t position = positionArgument(_position, ARG_ONE) - 1;
70  // assume nothing is pulled from this string
71  size_t length = 0;
72  // is the position within the string bounds?
73  if (stringLength >= position)
74  {
75  // we extract everything from the position to the end (potentially)
76  length = stringLength - position;
77  }
78  // now we process any overrides on this
79  length = optionalLengthArgument(_length, length, ARG_TWO);
80  // get a padding character (blank is default)
81  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_THREE);
82 
83  // if our target length is zero, we can just return the null string singleton
84  if (length == 0)
85  {
86  return OREF_NULLSTRING;
87  }
88 
89  size_t substrLength = 0;
90  size_t padCount = 0;
91 
92  // starting past the end of the string?
93  // this will be all pad characters
94  if (position > stringLength)
95  {
96  padCount = length;
97  }
98  else
99  {
100  // we have a combination of source string and pad characters
101  substrLength = Numerics::minVal(length, stringLength - position);
102  padCount = length - substrLength;
103  }
104  RexxString *retval = raw_string(length); /* get result string */
105  if (substrLength != 0) /* data to copy? */
106  {
107  // copy over the string portion
108  retval->put(0, string + position, substrLength);
109  }
110  // add any needed padding characters
111  if (padCount != 0)
112  {
113  retval->set(substrLength, (int)padChar, padCount);
114  }
115  // and return the final result
116  return retval;
117 }
118 
119 
120 /**
121  * Locate a string within the designated string buffer.
122  *
123  * @param stringData The stringData to search within.
124  * @param length The length of the string data.
125  * @param needle The needle to search for.
126  * @param pstart The starting position.
127  *
128  * @return An integer object giving the located position.
129  */
130 RexxInteger *StringUtil::posRexx(const char *stringData, size_t length, RexxString *needle, RexxInteger *pstart, RexxInteger *range)
131 {
132  /* force needle to a string */
133  needle = stringArgument(needle, OREF_positional, ARG_ONE);
134  /* get the starting position */
135  size_t _start = optionalPositionArgument(pstart, 1, ARG_TWO);
136  size_t _range = optionalLengthArgument(range, length - _start + 1, ARG_THREE);
137  /* pass on to the primitive function */
138  /* and return as an integer object */
139  size_t result = pos(stringData, length, needle, _start - 1, _range);
140  return new_integer(result);
141 }
142 
143 
144 /**
145  * Primitive level search withint a string buffer.
146  *
147  * @param stringData The maystack buffer.
148  * @param haystack_length
149  * The length of the haystack.
150  * @param needle The search needle.
151  * @param _start The starting position.
152  *
153  * @return The offset of the located needle, or 0 if the needle doesn't exist.
154  */
155 size_t StringUtil::pos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
156 {
157  // get the two working lengths
158  size_t needle_length = needle->getLength();
159  // make sure the range is capped
160  _range = Numerics::minVal(_range, haystack_length - _start);
161 
162  // ok, there are a few quick checks we can perform. If the needle is
163  // bigger than the haystack, or the needle is a null string or
164  // our haystack length after adjusting to the starting position
165  // zero, then we can quickly return zero.
166  if (_start >= haystack_length || needle_length > _range || needle_length == 0)
167  {
168  return 0;
169  }
170 
171  // address the string value
172  const char *haypointer = stringData + _start;
173  const char *needlepointer = needle->getStringData();
174  size_t location = _start + 1; // this is the match location as an index
175  // calculate the number of probes we can make in this string
176  size_t count = _range - needle_length + 1;
177 
178  // now scan
179  while (count-- != 0)
180  {
181  /* get a hit? */
182  if (memcmp(haypointer, needlepointer, needle_length) == 0)
183  {
184  return location;
185  }
186  // step our pointers accordingly
187  location++;
188  haypointer++;
189  }
190  return 0; // we got nothing...
191 }
192 
193 
194 /**
195  * Primitive level search withint a string buffer.
196  *
197  * @param stringData The maystack buffer.
198  * @param haystack_length
199  * The length of the haystack.
200  * @param needle The search needle.
201  * @param _start The starting position.
202  *
203  * @return The offset of the located needle, or 0 if the needle doesn't exist.
204  */
205 size_t StringUtil::caselessPos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
206 {
207  // get the two working lengths
208  size_t needle_length = needle->getLength();
209  // make sure the range is capped
210  _range = Numerics::minVal(_range, haystack_length - _start + 1);
211 
212  // ok, there are a few quick checks we can perform. If the needle is
213  // bigger than the haystack, or the needle is a null string or
214  // our haystack length after adjusting to the starting position
215  // zero, then we can quickly return zero.
216  if (_start > haystack_length || needle_length > _range || needle_length == 0)
217  {
218  return 0;
219  }
220 
221  // address the string value
222  const char *haypointer = stringData + _start;
223  const char *needlepointer = needle->getStringData();
224  size_t location = _start + 1; // this is the match location as an index
225  // calculate the number of probes we can make in this string
226  size_t count = _range - needle_length + 1;
227 
228  // now scan
229  while (count-- != 0)
230  {
231  /* get a hit? */
232  if (caselessCompare(haypointer, needlepointer, needle_length) == 0)
233  {
234  return location;
235  }
236  // step our pointers accordingly
237  location++;
238  haypointer++;
239  }
240  return 0; // we got nothing...
241 }
242 
243 
244 /**
245  * Locate the last positon of a string within the designated
246  * string buffer.
247  *
248  * @param stringData The stringData to search within.
249  * @param length The length of the string data.
250  * @param needle The needle to search for.
251  * @param pstart The starting position.
252  *
253  * @return An integer object giving the located position.
254  */
255 RexxInteger *StringUtil::lastPosRexx(const char *stringData, size_t haystackLen, RexxString *needle, RexxInteger *_start, RexxInteger *_range)
256 {
257  needle = stringArgument(needle, OREF_positional, ARG_ONE);
258  // find out where to start the search. The default is at the very end.
259  size_t startPos = optionalPositionArgument(_start, haystackLen, ARG_TWO);
260  size_t range = optionalLengthArgument(_range, haystackLen, ARG_THREE);
261  // now perform the actual search.
262  size_t result = lastPos(stringData, haystackLen, needle, startPos, range);
263  return new_integer(result);
264 }
265 
266 
267 /**
268  * Primitive level lastpos search within a string buffer.
269  *
270  * @param stringData The maystack buffer.
271  * @param haystack_length
272  * The length of the haystack.
273  * @param needle The search needle.
274  * @param _start The starting position.
275  *
276  * @return The offset of the located needle, or 0 if the needle doesn't exist.
277  */
278 size_t StringUtil::lastPos(const char *stringData, size_t haystackLen, RexxString *needle, size_t _start, size_t range)
279 {
280  size_t needleLen = needle->getLength(); /* and get the length too */
281 
282  // no match possible if either string is null
283  if (needleLen == 0 || haystackLen == 0 || needleLen > range)
284  {
285  return 0;
286  }
287  else
288  {
289  // get the start position for the search.
290  haystackLen = Numerics::minVal(_start, haystackLen);
291  range = Numerics::minVal(range, haystackLen);
292  // adjust the starting point by pretending this is smaller than the original string
293  const char *startPoint = stringData + haystackLen - range;
294  /* do the search */
295  const char *matchLocation = lastPos(needle->getStringData(), needleLen, startPoint, range);
296  if (matchLocation == NULL)
297  {
298  return 0;
299  }
300  else
301  {
302  return matchLocation - stringData + 1;
303  }
304  }
305 }
306 
307 
308 /**
309  * Absolutely most primitive version of a lastpos search. This
310  * version searches directly in a buffer rather than a Rexx
311  * String.
312  *
313  * @param needle Pointer to the needle string.
314  * @param needleLen Length of the needle string.
315  * @param haystack The pointer to the haystack string.
316  * @param haystackLen
317  * The length of the haystack string.
318  *
319  * @return A pointer to the match location or NULL if there is no match.
320  */
321 const char *StringUtil::lastPos(const char *needle, size_t needleLen, const char *haystack, size_t haystackLen)
322 {
323  // if the needle's longer than the haystack, no chance of a match
324  if (needleLen > haystackLen)
325  {
326  return NULL;
327  }
328  // set the search startpoing point relative to the end of the search string
329  haystack = haystack + haystackLen - needleLen;
330  // this is the possible number of compares we might need to perform
331  size_t count = haystackLen - needleLen + 1;
332  // now scan backward
333  while (count > 0)
334  {
335  // got a match at this position, return it directly
336  if (memcmp(haystack, needle, needleLen) == 0)
337  {
338  return haystack;
339  }
340  // decrement count and position
341  count--;
342  haystack--;
343  }
344  return NULL; // nothing to see here folks, move along
345 }
346 
347 
348 /**
349  * Primitive level caseless lastpos search within a string
350  * buffer.
351  *
352  * @param stringData The maystack buffer.
353  * @param haystack_length
354  * The length of the haystack.
355  * @param needle The search needle.
356  * @param _start The starting position.
357  *
358  * @return The offset of the located needle, or 0 if the needle doesn't exist.
359  */
360 size_t StringUtil::caselessLastPos(const char *stringData, size_t haystackLen, RexxString *needle, size_t _start, size_t range)
361 {
362  size_t needleLen = needle->getLength(); /* and get the length too */
363 
364  // no match possible if either string is null
365  if (needleLen == 0 || haystackLen == 0 || needleLen > range)
366  {
367  return 0;
368  }
369  else
370  {
371  // get the start position for the search.
372  haystackLen = Numerics::minVal(_start, haystackLen);
373  range = Numerics::minVal(range, haystackLen);
374  // adjust the starting point
375  const char *startPoint = stringData + haystackLen - range;
376  /* do the search */
377  const char *matchLocation = caselessLastPos(needle->getStringData(), needleLen, startPoint, range);
378  if (matchLocation == NULL)
379  {
380  return 0;
381  }
382  else
383  {
384  return matchLocation - stringData + 1;
385  }
386  }
387 }
388 
389 
390 /**
391  * Absolutely most primitive version of a caseless lastpos
392  * search. This version searches directly in a buffer rather
393  * than a Rexx String.
394  *
395  * @param needle Pointer to the needle string.
396  * @param needleLen Length of the needle string.
397  * @param haystack The pointer to the haystack string.
398  * @param haystackLen
399  * The length of the haystack string.
400  *
401  * @return A pointer to the match location or NULL if there is no match.
402  */
403 const char *StringUtil::caselessLastPos(const char *needle, size_t needleLen, const char *haystack, size_t haystackLen)
404 {
405  // if the needle's longer than the haystack, no chance of a match
406  if (needleLen > haystackLen)
407  {
408  return NULL;
409  }
410  // set the search startpoing point relative to the end of the search string
411  haystack = haystack + haystackLen - needleLen;
412  // this is the possible number of compares we might need to perform
413  size_t count = haystackLen - needleLen + 1;
414  // now scan backward
415  while (count > 0)
416  {
417  // got a match at this position, return it directly
418  if (caselessCompare(haystack, needle, needleLen) == 0)
419  {
420  return haystack;
421  }
422  // decrement count and position
423  count--;
424  haystack--;
425  }
426  return NULL; // nothing to see here folks, move along
427 }
428 
429 
430 /**
431  * Extract an individual character from a string buffer, returned
432  * as a string object.
433  *
434  * @param stringData The string buffer.
435  * @param stringLength
436  * The length of the buffer.
437  * @param positionArg
438  * The target position.
439  *
440  * @return The target character, as a string value.
441  */
442 RexxString *StringUtil::subchar(const char *stringData, size_t stringLength, RexxInteger *positionArg)
443 {
444  // the starting position isn't optional
445  size_t position = positionArgument(positionArg, ARG_ONE) - 1;
446 
447  // beyond the bounds, this is a null string
448  if (position >= stringLength)
449  {
450  return OREF_NULLSTRING;
451  }
452  // return the single character
453  return new_string(stringData + position, 1);
454 }
455 
456 /**
457  * Search for a separator within a string segment.
458  *
459  * @param start The start position for the scan.
460  * @param end The last possible position for a scan (taking the length
461  * of the separator into account).
462  * @param sepData The separator data
463  * @param sepLength the length of the separator.
464  *
465  * @return The next match position, or null for no match.
466  */
467 const char *StringUtil::locateSeparator(const char *start, const char *end, const char *sepData, size_t sepLength)
468 {
469  /* search for separator character */
470  while (start < end)
471  {
472  if (memcmp(start, sepData, sepLength) == 0)
473  {
474  return start;
475  }
476  start++;
477  }
478  // not found
479  return NULL;
480 }
481 
482 
483 /**
484  * Carve the string buffer up into an array of string values.
485  *
486  * @param start The starting position of the buffer.
487  * @param length The length of the buffer.
488  * @param separator The optional separator character.
489  *
490  * @return An array of all strings within the buffer, with the target
491  * delimiter removed.
492  */
493 RexxArray *StringUtil::makearray(const char *start, size_t length, RexxString *separator)
494 {
495  const char *sepData = "\n"; // set our default separator
496  size_t sepSize = 1;
497  bool checkCR = true; // by default, we look for either separator
498 
499  // if we have an explicit separator, use it instead
500  if (separator != OREF_NULL)
501  {
502  // make sure this is really a string value
503  separator = stringArgument(separator, OREF_positional, ARG_ONE);
504  sepData = separator->getStringData();
505  sepSize = separator->getLength();
506  checkCR = false; // if explicitly given, only use the given one
507  }
508  ProtectedObject p(separator);
509 
510 
511  // the Null string gets special handling
512  if (sepSize == 0)
513  {
514  // we need an array the size of the string
515  RexxArray *array = new_array(length);
516  ProtectedObject p1(array);
517  // create a string for each character and poke into the array
518  for (size_t i = 0; i < length; i++, start++)
519  {
520  array->put(new_string(start, 1), i + 1);
521  }
522  return array;
523  }
524 
525 
526  RexxQueue *strings = new_queue(); /* save each string in a queue */
527  ProtectedObject p2(strings); /* which we need to protect */
528  // this is the end of the string
529  const char *stringEnd = start + length;
530 
531  // this is where we stop scanning
532  const char *end = start + length - sepSize + 1;
533 
534  while (start < end)
535  {
536  // search for the next separator, if not found, we're done
537  const char *tmp = locateSeparator(start, end, sepData, sepSize);
538  if (tmp == NULL)
539  {
540  break;
541  }
542  size_t stringLen = tmp - start;
543  // if checking for either linend possibility, reduce the length if we had
544  // a leading CR character
545  if (checkCR && *(tmp - 1) == '\r')
546  {
547  stringLen--;
548  }
549  strings->queue(new_string(start, stringLen));
550  // step to the next scan position
551  start = tmp + sepSize;
552  }
553  // we might have a tail piece here
554  if (start < stringEnd)
555  {
556  size_t stringLen = stringEnd - start;
557  strings->queue(new_string(start, stringLen));
558  }
559  // now convert this to an array
560  return strings->makeArray();
561 }
562 
563 
565 {
566  return StringUtil::makearray(str->getStringData(), str->getLength(), separator);
567 }
568 
569 
570 /**
571  * Perform a caseless comparison of two strings
572  *
573  * @param string1 The first string to compare.
574  * @param string2 The second string.
575  * @param length The length to compare.
576  *
577  * @return 0 if the two strings are equal, -1 if the first is less than the
578  * second, and 1 if the first string is the greater.
579  */
580 int StringUtil::caselessCompare(const char *string1, const char *string2, size_t length)
581 {
582  /* totally equal? */
583  if (!memcmp(string1, string2, length))
584  {
585  return 0; /* return equality indicator */
586  }
587 
588  while (length-- != 0) /* need to do it the hardway */
589  {
590  /* not equal? */
591  if (toupper(*string1) != toupper(*string2))
592  {
593  /* first one less? */
594  if (toupper(*string1) < toupper(*string2))
595  {
596  return -1; /* return less than indicator */
597  }
598  else
599  {
600  return 1; /* first is larger */
601  }
602  }
603  string1++; /* step first pointer */
604  string2++; /* and second pointer also */
605  }
606  return 0; /* fall through, these are equal */
607 }
608 
609 
610 
611 /**
612  * Convert a hex digit to it's integer value equivalent.
613  *
614  * @param ch The input character.
615  *
616  * @return the integer value of the digit.
617  */
619 {
620  int Retval; /* return value */
621 
622  if (isdigit(ch)) /* if real digit */
623  {
624  Retval = ch - '0'; /* convert that */
625  }
626  else
627  {
628  Retval = toupper(ch) - 'A' + 10; /* convert alphabetic */
629  }
630  return Retval; /* return conversion */
631 }
632 
633 /**
634  * The value of the buffer contents
635  * interpreted as the binary expansion
636  * of a byte, with most significant
637  * bit in s[0] and least significant
638  * bit in s[7].
639  *
640  * @param String The string to pack
641  *
642  * @return The single packed character.
643  */
644 char StringUtil::packByte(const char *String )
645 {
646  char Result = 0; /* start off at zero */
647  for (int i = 0; i < 8; i++) /* loop thru 8 chars */
648  {
649  if (String[i] == '1') /* if 'bit' set */
650  {
651  Result |= (1<<(7-i)); /* or with mask */
652  }
653  }
654  return Result; /* return packed byte */
655 }
656 
657 /**
658  * The value of the buffer contents
659  * interpreted as the binary expansion
660  * of a byte, with most significant
661  * bit in s[0] and least significant
662  * bit in s[7].
663  *
664  * @param String Pack 4 characters into a hex string value.
665  *
666  * @return The hex character representing the nibble value.
667  */
668 char StringUtil::packNibble(const char *String)
669 {
670  char Buf[8]; /* temporary buffer */
671  int i; /* table index */
672 
673  memset(Buf, '0', 4); /* set first 4 bytes to zero */
674  memcpy(Buf+4, String, 4); /* copy next 4 bytes */
675  i = packByte(Buf); /* pack to a single byte */
676  return "0123456789ABCDEF"[i]; /* convert to a printable character */
677 }
678 
679 /**
680  * Pack 2 0123456789ABCDEFabcdef chars into
681  * byte
682  *
683  * The value of the buffer contents
684  * interpreted as the hex expansion
685  * of a byte, with most significant
686  * nibble in s[0] and least significant
687  * nibble in s[2].
688  *
689  * @param Byte The pointer to the hex digit pair to pack.
690  *
691  * @return The single byte encoding of the pair of digits.
692  */
693 char StringUtil::packByte2(const char *Byte)
694 {
695  int Nibble1; /* first nibble */
696  int Nibble2; /* second nibble */
697 
698  /* convert each digit */
699  Nibble1 = hexDigitToInt(Byte[0]);
700  Nibble2 = hexDigitToInt(Byte[1]);
701  /* combine the two digits */
702 
703  return((Nibble1 << 4) | Nibble2);
704 }
705 
706 /**
707  * Validate blocks in string
708  *
709  * A string is considered valid if consists
710  * of zero or more characters belonging to
711  * the null-terminated C string set in
712  * groups of size modulus. The first group
713  * may have fewer than modulus characters.
714  * The groups are optionally separated by
715  * one or more blanks.
716  *
717  * @param String The string to validate.
718  * @param Length The string length.
719  * @param Set The valid characters in the set.
720  * @param Modulus The size of the smallest allowed grouping.
721  * @param Hex Indicates this is a hex or binary string. Used for issuing
722  * the correct error type.
723  *
724  * @return The number of valid digits found.
725  */
726 size_t StringUtil::validateSet(const char *String, size_t Length, const char *Set, int Modulus, bool Hex)
727 {
728  char c; /* current character */
729  size_t Count; /* # set members found */
730  const char *Current; /* current location */
731  const char *SpaceLocation = NULL; /* location of last space */
732  int SpaceFound; /* space found yet? */
733  size_t Residue = 0; /* if space_found, # set */
734  /* members */
735 
736  // leading whitespace not permitted
737  if (*String == ch_SPACE || *String == ch_TAB)
738  {
739  if (Hex) /* hex version? */
740  {
741  /* raise the hex message */
743  }
744  else
745  {
746  /* need the binary version */
748  }
749  }
750  SpaceFound = 0; /* set initial space flag */
751  Count = 0; /* start count with zero */
752  Current = String; /* point to start */
753 
754  for (; Length != 0; Length--)
755  { /* process entire string */
756  c = *Current++; /* get char and step pointer */
757  /* if c in set */
758  if (c != '\0' && strchr(Set, c) != NULL)
759  {
760  Count++; /* bump count */
761  }
762  else
763  {
764  if (c == ch_SPACE || c == ch_TAB)
765  { /* if c blank */
766  SpaceLocation = Current; /* save the space location */
767  if (!SpaceFound)
768  { /* if 1st blank */
769  /* save position */
770  Residue = (Count % Modulus);
771  SpaceFound = 1; /* we have the first space */
772  }
773  /* else if bad position */
774  else if (Residue != (Count % Modulus))
775  {
776  if (Hex) /* hex version? */
777  {
778  /* raise the hex message */
779  reportException(Error_Incorrect_method_hexblank, OREF_positional, SpaceLocation - String);
780  }
781  else
782  {
783  /* need the binary version */
784  reportException(Error_Incorrect_method_binblank, SpaceLocation - String);
785  }
786  }
787  }
788  else
789  {
790 
791  if (Hex) /* hex version? */
792  {
793  /* raise the hex message */
795  }
796  else
797  {
799  }
800  }
801  }
802  }
803  /* if trailing blank or grouping bad */
804  if ((c == ch_SPACE || c == ch_TAB) || (SpaceFound && ((Count % Modulus) != Residue)))
805  {
806  if (Hex) /* hex version? */
807  {
808  /* raise the hex message */
809  reportException(Error_Incorrect_method_hexblank, OREF_positional, SpaceLocation - String);
810  }
811  else
812  {
813  /* need the binary version */
814  reportException(Error_Incorrect_method_binblank, SpaceLocation - String);
815  }
816  }
817  return Count; /* return count of chars */
818 }
819 
820 /**
821  * Scan string for next members of
822  * character set
823  *
824  * @param Destination
825  * The string where the characters are packed.
826  * @param Source The source for the string data.
827  * @param Length The length of the input string.
828  * @param Count The number of valid characters in the string.
829  * @param Set The set of allowed characters.
830  * @param ScannedSize
831  * The returned scan size.
832  *
833  * @return
834  */
835 size_t StringUtil::chGetSm(char *Destination, const char *Source, size_t Length, size_t Count, const char *Set, size_t *ScannedSize)
836 {
837  char c; /* current scanned character */
838  const char *Current; /* current scan pointer */
839  size_t Found; /* number of characters found */
840  size_t Scanned; /* number of character scanned*/
841 
842  Scanned = 0; /* nothing scanned yet */
843  Found = 0; /* nothing found yet */
844  Current = Source; /* get pointer to string */
845 
846  for (; Length != 0; Length--)
847  { /* scan entire string */
848  c = *Current++; /* get char and step pointer */
849  Scanned++; /* remember scan count */
850  /* if c in set */
851  if (c != '\0' && strchr(Set, c) != NULL)
852  {
853  *Destination++ = c; /* copy c */
854  if (++Found == Count) /* if all found */
855  {
856  break; /* we are all done */
857  }
858  }
859  }
860  *ScannedSize = Scanned; /* return characters scanned */
861  return Found; /* and number found */
862 }
863 
864 /**
865  * pack a string of 'hex' digits in place
866  *
867  * take two alpha chars and make into one byte
868  *
869  * @param String The string to pack
870  * @param StringLength
871  * The length of the string.
872  *
873  * @return The resulting packed string.
874  */
875 RexxString *StringUtil::packHex(const char *String, size_t StringLength)
876 {
877  size_t Nibbles; /* count of nibbles to pack */
878  size_t n;
879  const char *Source; /* pack source */
880  char * Destination; /* packing destination */
881  size_t b; /* nibble odd count */
882  char Buf[8]; /* temp pack buffer */
883  size_t jjj; /* copies nibbles */
884  RexxString *Retval; /* result value */
885 
886  if (StringLength != 0)
887  { /* if not a null string */
888  Source = String; /* get pointer */
889  /* validate the information */
890  Nibbles = validateSet(Source, StringLength, "0123456789ABCDEFabcdef", 2, true);
891  /* get a result string */
892  Retval = raw_string((Nibbles + 1) / 2);
893  /* initialize destination */
894  Destination = Retval->getWritableData();
895 
896  while (Nibbles > 0)
897  { /* while chars to process */
898 
899  b = Nibbles%2; /* get nibbles for next byte */
900  if (b == 0) /* even number */
901  {
902  b = 2; /* use two bytes */
903  }
904  else /* odd number, */
905  {
906  memset(Buf, '0', 2); /* pad with zeroes */
907  }
908 
909  jjj = 2 - b; /* copy nibbles into buff */
910  chGetSm(Buf+jjj, Source, StringLength, b, "0123456789ABCDEFabcdef", &n);
911  *Destination++ = packByte2(Buf); /* pack into destination */
912  Source += n; /* advance source location */
913  StringLength -= n; /* reduce the length */
914  Nibbles -= b; /* decrement the count */
915  }
916  }
917  else
918  {
919  /* this is a null string */
920  Retval = OREF_NULLSTRING;
921  }
922 #if 0
923  if (Retval != OREF_NULLSTRING)
924  {
925  // Declare that this string is byte encoded
926  ProtectedObject pRetval(Retval); // Must protect Retval because the GC can be triggered by messageSend
927  ProtectedObject result;
928  RexxObject *args[1];
929  args[0] = OREF_BYTE; // positional argument
930  bool messageUnderstood = Retval->messageSend(OREF_SETENCODING, args, 1, 0, result, false);
931  // OREF_SETENCODING do that: Retval~!setEncoding(OREF_BYTE);
932  }
933 #endif
934  return Retval; /* return the packed string */
935 }
936 
937 /**
938  * convert nibble to 4 '0'/'1' chars
939  *
940  * p[0], ..., p[3]: the four '0'/'1'
941  * chars representing the nibble
942  *
943  * No terminating null character is
944  * produced
945  *
946  * @param Val The nibble to unpack.
947  * @param p The location to unpack into.
948  */
949 void StringUtil::unpackNibble(int Val, char *p)
950 {
951  p[0] = (Val & 0x08) != 0 ?'1':'0';
952  p[1] = (Val & 0x04) != 0 ?'1':'0';
953  p[2] = (Val & 0x02) != 0 ?'1':'0';
954  p[3] = (Val & 0x01) != 0 ?'1':'0';
955 }
956 
957 
958 /**
959  * Find the first occurrence of the set non-member in a string.
960  *
961  * @param String The string to search.
962  * @param Set The character set.
963  * @param Length The length to search.
964  *
965  * @return The position of a match.
966  */
967 const char *StringUtil::memcpbrk(const char *String, const char *Set, size_t Length)
968 {
969  const char *Retval; /* returned value */
970 
971  Retval = NULL; /* nothing found yet */
972  while (Length-- != 0)
973  { /* search through string */
974  /* find a match in ref set? */
975  if (*String == '\0' || !strchr(Set, *String))
976  {
977  Retval = String; /* copy position */
978  break; /* quit the loop */
979  }
980  String++; /* step the pointer */
981  }
982  return Retval; /* return matched position */
983 }
984 
985 
986 /**
987  * Validate blocks in string
988  *
989  * A string is considered valid if consists
990  * of zero or more characters belonging to
991  * the null-terminated C string set in
992  * groups of size modulus. The first group
993  * may have fewer than modulus characters.
994  * The groups are optionally separated by
995  * one or more blanks.
996  *
997  * @param String The string to validate.
998  * @param Length The string length.
999  * @param Set The validation set.
1000  * @param Modulus The set modulus
1001  * @param PackedSize The final packed size.
1002  *
1003  * @return The count of located characters.
1004  */
1005 int StringUtil::valSet(const char *String, size_t Length, const char *Set, int Modulus, size_t *PackedSize )
1006 {
1007  char c = '\0'; /* current character */
1008  size_t Count; /* # set members found */
1009  const char *Current; /* current location */
1010  int SpaceFound; /* space found yet? */
1011  size_t Residue = 0; /* if space_found, # set members */
1012  int rc; /* return code */
1013 
1014  rc = false; /* default to failure */
1015  if (*String != ' ' && *String != '\t')
1016  { /* if no leading blank */
1017  SpaceFound = 0; /* set initial space flag */
1018  Count = 0; /* start count with zero */
1019  Current = String; /* point to start */
1020 
1021  rc = true; /* default to good now */
1022  for (; Length != 0; Length--)
1023  { /* process entire string */
1024  c = *Current++; /* get char and step pointer */
1025  /* if c in set */
1026  if (c != '\0' && strchr(Set, c) != NULL)
1027  {
1028  Count++; /* bump count */
1029  }
1030  else
1031  {
1032  if (c == ' ' || c == '\t')
1033  { /* if c blank */
1034  if (!SpaceFound)
1035  { /* if 1st blank */
1036  /* save position */
1037  Residue = (Count % Modulus);
1038  SpaceFound = 1; /* we have the first space */
1039  }
1040  /* else if bad position */
1041  else if (Residue != (Count % Modulus))
1042  {
1043  rc = false; /* this is an error */
1044  break; /* report error */
1045  }
1046  }
1047  else
1048  {
1049  rc = false; /* this is an error */
1050  break; /* report error */
1051  }
1052  }
1053  }
1054  if (rc)
1055  { /* still good? */
1056  if (c == ' ' || c == '\t') /* if trailing blank */
1057  {
1058  rc = false; /* report error */
1059  }
1060  else if (SpaceFound && (Count % Modulus) != Residue)
1061  {
1062  rc = false; /* grouping problem */
1063  }
1064  else
1065  {
1066  *PackedSize = Count; /* return count of chars */
1067  }
1068  }
1069  }
1070  return rc; /* return success/failure */
1071 }
1072 
1073 
1074 /**
1075  * Perform primitive datatype validation.
1076  *
1077  * @param String The target string.
1078  * @param Option The type of data to validate.
1079  *
1080  * @return True if this is of the indicated type, false for any mismatch.
1081  */
1083 {
1084  size_t Len; /* validated string length */
1085  RexxObject *Answer; /* validation result */
1086  RexxObject *Temp; /* temporary value */
1087  const char *Scanp; /* string data pointer */
1088  size_t Count; /* hex nibble count */
1089  int Type; /* validated symbol type */
1090  RexxNumberString *TempNum;
1091 
1092  Len = String->getLength(); /* get validated string len */
1093  Option = toupper(Option); /* get the first character */
1094 
1095  /* assume failure on checking */
1096  Answer = TheFalseObject;
1097  /* get a scan pointer */
1098  Scanp = String->getStringData();
1099 
1100  switch (Option)
1101  { /* based on type to confirm */
1102 
1103  case DATATYPE_ALPHANUMERIC: /* Alphanumeric */
1104  /* all in the set? */
1105  if (Len != 0 && !memcpbrk(Scanp, ALPHANUM, Len))
1106  {
1107  /* this is a good string */
1108  Answer = TheTrueObject;
1109  }
1110  break;
1111 
1112  case DATATYPE_BINARY: /* Binary string */
1113  /* validate the string */
1114  if (Len == 0 || valSet(Scanp, Len, BINARI, 4, &Count))
1115  {
1116  /* this is a good string */
1117  Answer = TheTrueObject;
1118  }
1119  break;
1120 
1121  case DATATYPE_LOWERCASE: /* Lowercase */
1122  if (Len != 0 && !memcpbrk(Scanp, LOWER_ALPHA, Len))
1123  {
1124  /* this is a good string */
1125  Answer = TheTrueObject;
1126  }
1127  break;
1128 
1129  case DATATYPE_UPPERCASE: /* Uppercase */
1130  if (Len != 0 && !memcpbrk(Scanp, UPPER_ALPHA, Len))
1131  {
1132  /* this is a good string */
1133  Answer = TheTrueObject;
1134  }
1135  break;
1136 
1137  case DATATYPE_MIXEDCASE: /* Mixed case */
1138  if (Len != 0 && !memcpbrk(Scanp, MIXED_ALPHA, Len))
1139  {
1140  /* this is a good string */
1141  Answer = TheTrueObject;
1142  }
1143  break;
1144 
1145  case DATATYPE_WHOLE_NUMBER: /* Whole number */
1146  /* validate as a number */
1147  TempNum = String->numberString();
1148  if (TempNum != OREF_NULL)
1149  { /* valid number? */
1150  /* force rounding to current digits */
1151  TempNum = (RexxNumberString *)TempNum->plus(IntegerZero);
1152  /* check for integer then */
1153  Answer = TempNum->isInteger();
1154  }
1155  break;
1156 
1157  case DATATYPE_NUMBER: /* Number */
1158  /* validate as a number */
1159  Temp = (RexxObject *)String->numberString();
1160  if (Temp != OREF_NULL) /* valid number? */
1161  {
1162  /* got a good one */
1163  Answer = TheTrueObject;
1164  }
1165  break;
1166 
1167  case DATATYPE_9DIGITS: /* NUMERIC DIGITS 9 number */
1168  { /* good long number */
1169  wholenumber_t temp;
1170  if (String->numberValue(temp))
1171  {
1172  Answer = TheTrueObject;
1173  }
1174  break;
1175  }
1176 
1177  case DATATYPE_HEX: /* heXadecimal */
1178  /* validate the string */
1179  if (Len == 0 || valSet(Scanp, Len, HEX_CHAR_STR, 2, &Count))
1180  {
1181  /* valid hexadecimal */
1182  Answer = TheTrueObject;
1183  }
1184  break;
1185 
1186  case DATATYPE_SYMBOL: /* Symbol */
1187  /* validate the symbol */
1188  if (String->isSymbol() != STRING_BAD_VARIABLE)
1189  {
1190  /* is a valid symbol */
1191  Answer = TheTrueObject;
1192  }
1193  break;
1194 
1195  case DATATYPE_VARIABLE: /* Variable */
1196 
1197  /* validate the symbol */
1198  Type = String->isSymbol();
1199  /* a valid variable type? */
1200  if (Type == STRING_NAME ||
1201  Type == STRING_STEM ||
1202  Type == STRING_COMPOUND_NAME)
1203  {
1204  /* is a valid symbol */
1205  Answer = TheTrueObject;
1206  }
1207  break;
1208 
1209  case DATATYPE_LOGICAL: // Test for a valid logical.
1210  if (Len != 1 || (*Scanp != '1' && *Scanp != '0'))
1211  {
1212  Answer = TheFalseObject;
1213  }
1214  else
1215  {
1216  Answer = TheTrueObject;
1217  }
1218 
1219  break;
1220 
1221  default : /* unsupported option */
1222  reportException(Error_Incorrect_method_option, "ABCDLMNOSUVWX9", new_string((const char *)&Option,1));
1223  }
1224  return Answer; /* return validation answer */
1225 }
1226 
1227 
1228 /**
1229  * Skip leading blanks in a string.
1230  *
1231  * @param String The target string.
1232  * @param StringLength
1233  * The length of the string segment.
1234  */
1235 void StringUtil::skipBlanks(const char **String, size_t *StringLength )
1236 {
1237  const char *Scan; /* scan pointer */
1238  size_t Length; /* length to scan */
1239 
1240  Scan = *String; /* point to data */
1241  Length = *StringLength; /* get the length */
1242 
1243  for (;Length != 0; Length--)
1244  { /* scan entire string */
1245  if (*Scan != ' ' && *Scan != '\t') /* if not a space */
1246  {
1247  break; /* just quit the loop */
1248  }
1249  Scan++; /* step to next character */
1250  }
1251  /* fell through, all blanks */
1252  *String = Scan; /* set pointer one past */
1253  *StringLength = Length; /* update the length */
1254 }
1255 
1256 /**
1257  * Skip non-blank characters to the next whitespace char.
1258  *
1259  * @param String The source string.
1260  * @param StringLength
1261  * The string length (update on return);
1262  */
1263 void StringUtil::skipNonBlanks(const char **String, size_t *StringLength )
1264 {
1265  const char *Scan; /* scan pointer */
1266  size_t Length; /* length to scan */
1267 
1268  Scan = *String; /* point to data */
1269  Length = *StringLength; /* get the length */
1270 
1271  for (;Length != 0; Length--)
1272  { /* scan entire string */
1273  if (*Scan == ' ' || *Scan == '\t') /* if not a space */
1274  {
1275  break; /* just quit the loop */
1276  }
1277  Scan++; /* step to next character */
1278  }
1279  /* fell through, all blanks */
1280  *String = Scan; /* set pointer one past */
1281  *StringLength = Length; /* update the length */
1282 }
1283 
1284 
1285 /**
1286  * Count the number of words in a string.
1287  *
1288  * @param String The string to count.
1289  * @param StringLength
1290  * The length of the string.
1291  *
1292  * @return The count of white-space delimited words.
1293  */
1294 size_t StringUtil::wordCount(const char *String, size_t StringLength )
1295 {
1296  size_t Count = 0; /* default to nothing */
1297  if (StringLength != 0)
1298  { /* if not a null string */
1299  skipBlanks(&String, &StringLength);/* skip any leading blanks */
1300 
1301  while (StringLength != 0)
1302  { /* while still string ... */
1303  Count++; /* account for this word */
1304  /* now skip the non-blanks */
1305  skipNonBlanks(&String, &StringLength);
1306  if (StringLength == 0) /* if done with the string */
1307  {
1308  break; /* we are finished */
1309  }
1310  /* skip to the next word */
1311  skipBlanks(&String, &StringLength);
1312  } /* loop while still have chars*/
1313  }
1314  return Count; /* done looping, return the */
1315  /* count of words */
1316 }
1317 
1318 
1319 /**
1320  * Find the next word in the string.
1321  *
1322  * @param String The source string.
1323  * @param StringLength
1324  * The length of the string (update on return).
1325  * @param NextString The next word position.
1326  *
1327  * @return The length of the located word.
1328  */
1329 size_t StringUtil::nextWord(const char **String, size_t *StringLength, const char **NextString )
1330 {
1331  size_t WordStart = 0; /* nothing moved yet */
1332  if (*StringLength != 0)
1333  { /* Something there? */
1334  skipBlanks(String, StringLength); /* skip any leading blanks */
1335 
1336  if (*StringLength != 0)
1337  { /* if still string ... */
1338  WordStart = *StringLength; /* save current length */
1339  *NextString = *String; /* save start position now */
1340  /* skip the non-blanks */
1341  skipNonBlanks(NextString, StringLength);
1342  WordStart -= *StringLength; /* adjust the word length */
1343  }
1344  }
1345  return WordStart; /* return word length */
1346 }
1347 
1348 
1349 /**
1350  * Count the occurences of a string within another string.
1351  *
1352  * @param hayStack Pointer to the haystack data.
1353  * @param hayStackLength
1354  * Length of the haystack data.
1355  * @param needle The needle we're searching for
1356  *
1357  * @return The count of needle occurrences located in the string.
1358  */
1359 size_t StringUtil::countStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
1360 {
1361  size_t count = 0; /* no matches yet */
1362  /* get the first match position */
1363  size_t matchPos = pos(hayStack, hayStackLength, needle, 0, hayStackLength);
1364  while (matchPos != 0)
1365  {
1366  count = count + 1; /* count this match */
1367  // step to the new position and search
1368  matchPos = pos(hayStack, hayStackLength, needle, matchPos + needle->getLength() - 1, hayStackLength);
1369  }
1370  return count; /* return the match count */
1371 }
1372 
1373 
1374 /**
1375  * Count the occurences of a string within another string.
1376  *
1377  * @param hayStack Pointer to the haystack data.
1378  * @param hayStackLength
1379  * Length of the haystack data.
1380  * @param needle The needle we're searching for
1381  *
1382  * @return The count of needle occurrences located in the string.
1383  */
1384 size_t StringUtil::caselessCountStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
1385 {
1386  size_t count = 0; /* no matches yet */
1387  /* get the first match position */
1388  size_t matchPos = caselessPos(hayStack, hayStackLength, needle, 0, hayStackLength);
1389  while (matchPos != 0)
1390  {
1391  count = count + 1; /* count this match */
1392  // step to the new position and search
1393  matchPos = caselessPos(hayStack, hayStackLength, needle, matchPos + needle->getLength() - 1, hayStackLength);
1394  }
1395  return count; /* return the match count */
1396 }
1397 
1398 
1400  const char *string, /* search string */
1401  size_t length, /* string length */
1402  char target ) /* target character */
1403 /*********************************************************************/
1404 /* Function: offset of first occurrence of char in string */
1405 /*********************************************************************/
1406 {
1407  /* while in the string */
1408  for (const char *scan = string; length != 0; length--)
1409  {
1410  // if we have a match, return the offset
1411  if (*scan == target)
1412  {
1413  return scan - string;
1414  }
1415  scan++; /* step the position */
1416  }
1417  return -1; // no match position
1418 }
1419 
1420 
1421 /**
1422  * Perform a verify operation on a section of data.
1423  *
1424  * @param data The data pointer
1425  * @param stringLen The length of the string to match
1426  * @param ref The reference search string.
1427  * @param option The match/nomatch option.
1428  * @param _start The starting offset for the match.
1429  *
1430  * @return The match/nomatch position, or 0 if nothing was found.
1431  */
1432 RexxInteger *StringUtil::verify(const char *data, size_t stringLen, RexxString *ref, RexxString *option, RexxInteger *_start, RexxInteger *range)
1433 {
1434  // get the reference string information
1435  ref = stringArgument(ref, OREF_positional, ARG_ONE);
1436  size_t referenceLen = ref->getLength();
1437  const char *refSet = ref->getStringData();
1438  /* get the option, default 'Nomatch' */
1439  char opt = optionalOptionArgument(option, VERIFY_NOMATCH, ARG_TWO);
1440  // validate the possibilities
1441  if (opt != VERIFY_MATCH && opt != VERIFY_NOMATCH)
1442  {
1443  /* not that either, then its an error*/
1445  }
1446 
1447  /* get starting position */
1448  size_t startPos = optionalPositionArgument(_start, 1, ARG_THREE);
1449  size_t stringRange = optionalLengthArgument(range, stringLen - startPos + 1, ARG_FOUR);
1450  if (startPos > stringLen) /* beyond end of string? */
1451  {
1452  return IntegerZero; /* couldn't find it */
1453  }
1454  else
1455  {
1456  // adjust the range for seaching
1457  stringRange = Numerics::minVal(stringRange, stringLen - startPos + 1);
1458 
1459  /* point at start position */
1460  const char *current = data + startPos - 1;
1461  if (referenceLen == 0)
1462  { /* if verifying a nullstring */
1463  if (opt == VERIFY_MATCH) /* can't match at all */
1464  {
1465  return IntegerZero; /* so return zero */
1466  }
1467  else
1468  {
1469  return new_integer(startPos);/* non-match at start position */
1470  }
1471  }
1472  else
1473  {
1474  // we're verifying that all characters are members of the reference set, so
1475  // return the first non-matching character
1476  if (opt == VERIFY_NOMATCH)
1477  {
1478  while (stringRange-- != 0)
1479  {
1480  // if no match at this position, return this position
1481  if (!StringUtil::matchCharacter(*current++, refSet, referenceLen))
1482  {
1483  return new_integer(current - data);
1484  }
1485  }
1486  // this is always a non matching situation to get here
1487  return IntegerZero;
1488  }
1489  else
1490  {
1491  while (stringRange-- != 0)
1492  {
1493  // if we have a match at this position, trigger this
1494  if (StringUtil::matchCharacter(*current++, refSet, referenceLen))
1495  {
1496  return new_integer(current - data);
1497  }
1498  }
1499  // this is always a non matching situation to get here
1500  return IntegerZero;
1501  }
1502  }
1503  }
1504 }
1505 
1506 
1507 /**
1508  * Do a subword operation on a buffer of data
1509  *
1510  * @param data The start of the data buffer.
1511  * @param length The length of the buffer
1512  * @param position The starting word position.
1513  * @param plength the count of words to return.
1514  *
1515  * @return The string containing the indicated subwords.
1516  */
1517 RexxString *StringUtil::subWord(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
1518 {
1519  /* convert position to binary */
1520  size_t wordPos = positionArgument(position, ARG_ONE);
1521  // get num of words to extract. The default is a "very large number
1522  size_t count = optionalLengthArgument(plength, Numerics::MAX_WHOLENUMBER, ARG_TWO);
1523 
1524  // handle cases that will always result in a null string
1525  if (length == 0 || count == 0)
1526  {
1527  return OREF_NULLSTRING;
1528  }
1529  const char *nextSite = NULL;
1530  const char *word = data;
1531  /* get the first word */
1532  size_t wordLength = nextWord(&word, &length, &nextSite);
1533  while (--wordPos > 0 && wordLength != 0)
1534  { /* loop until we reach tArget */
1535  word = nextSite; /* copy the start pointer */
1536  /* get the next word */
1537  wordLength = nextWord(&word, &length, &nextSite);
1538  }
1539  // we terminated because there was no word found before we reached the
1540  // count position
1541  if (wordPos != 0)
1542  {
1543  return OREF_NULLSTRING; /* again a null string */
1544  }
1545 
1546  const char *wordStart = word; /* save start position */
1547  const char *wordEnd = word; /* default end is the same */
1548  /* loop until we reach tArget */
1549  while (count-- > 0 && wordLength != 0)
1550  {
1551  wordEnd = word + wordLength; /* point to the word end */
1552  word = nextSite; /* copy the start pointer */
1553  /* get the next word */
1554  wordLength = nextWord(&word, &length, &nextSite);
1555  }
1556  /* extract the substring */
1557  return new_string(wordStart, wordEnd - wordStart);
1558 }
1559 
1560 
1561 /**
1562  * Do a wordList operation on a buffer of data
1563  *
1564  * @param data The start of the data buffer.
1565  * @param length The length of the buffer
1566  * @param position The starting word position.
1567  * @param plength the count of words to return.
1568  *
1569  * @return The array containing the indicated subwords.
1570  */
1571 RexxArray *StringUtil::subWords(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
1572 {
1573  /* convert position to binary */
1574  size_t wordPos = optionalPositionArgument(position, 1, ARG_ONE);
1575  // get num of words to extract. The default is a "very large number
1576  size_t count = optionalLengthArgument(plength, Numerics::MAX_WHOLENUMBER, ARG_TWO);
1577 
1578  // handle cases that will always result an empty array
1579  if (length == 0 || count == 0)
1580  {
1581  return new_array((size_t)0);
1582  }
1583 
1584  const char *nextSite = NULL;
1585  const char *word = data;
1586  /* get the first word */
1587  size_t wordLength = nextWord(&word, &length, &nextSite);
1588  while (--wordPos > 0 && wordLength != 0)
1589  { /* loop until we reach target */
1590  word = nextSite; /* copy the start pointer */
1591  /* get the next word */
1592  wordLength = nextWord(&word, &length, &nextSite);
1593  }
1594  // we terminated because there was no word found before we reached the
1595  // count position
1596  if (wordPos != 0)
1597  {
1598  return new_array((size_t)0); // again, an empty array
1599  }
1600 
1601  // we make this size zero so the size and the items count will match
1602  RexxArray *result = new_array((size_t)0);
1603  ProtectedObject p(result);
1604 
1605  const char *wordStart = word; /* save start position */
1606  /* loop until we reach tArget */
1607  while (count-- > 0 && wordLength != 0)
1608  {
1609  // add to the result array
1610  result->append(new_string(word, wordLength));
1611  word = nextSite; /* copy the start pointer */
1612  /* get the next word */
1613  wordLength = nextWord(&word, &length, &nextSite);
1614  }
1615 
1616  return result; // return the populated array
1617 }
1618 
1619 
1620 /**
1621  * Extract a word from a buffer
1622  *
1623  * @param data The data pointer
1624  * @param length the length of the data buffer.
1625  * @param position the target word position.
1626  *
1627  * @return The string value of the word at the indicated position.
1628  */
1629 RexxString *StringUtil::word(const char *data, size_t length, RexxInteger *position)
1630 {
1631  /* convert position to binary */
1632  size_t wordPos = positionArgument(position, ARG_ONE);
1633 
1634  if (length == 0) /* null string? */
1635  {
1636  return OREF_NULLSTRING; /* result is null also */
1637  }
1638  const char *word = data; /* point to the string */
1639  const char *nextSite = NULL;
1640  /* get the first word */
1641  size_t wordLength = nextWord(&word, &length, &nextSite);
1642  while (--wordPos > 0 && wordLength != 0)
1643  { /* loop until we reach target */
1644  word = nextSite; /* copy the start pointer */
1645  /* get the next word */
1646  wordLength = nextWord(&word, &length, &nextSite);
1647  }
1648  if (wordLength != 0) /* have a word */
1649  {
1650  /* extract the string */
1651  return new_string(word, wordLength);
1652  }
1653  return OREF_NULLSTRING; /* no word, return a null */
1654 }
1655 
1656 
1657 /**
1658  * Extract all words from a buffer
1659  *
1660  * @param data The data pointer
1661  * @param length the length of the data buffer.
1662  * @param position the target word position.
1663  *
1664  * @return The string value of the word at the indicated position.
1665  */
1666 RexxArray *StringUtil::words(const char *data, size_t length)
1667 {
1668  const char *word = data; /* point to the string */
1669  const char *nextSite = NULL;
1670 
1671  RexxArray *result = new_array((size_t)0);
1672  ProtectedObject p(result);
1673  /* get the first word */
1674  size_t wordLength = nextWord(&word, &length, &nextSite);
1675  while (wordLength != 0)
1676  {
1677  // add to the result array
1678  result->append(new_string(word, wordLength));
1679  word = nextSite; /* copy the start pointer */
1680  /* get the next word */
1681  wordLength = nextWord(&word, &length, &nextSite);
1682  }
1683  return result; // return whatever we've accumulated
1684 }
1685 
1686 
1687 /**
1688  * Return the index position for a given word
1689  *
1690  * @param data The data containing the words
1691  * @param length The length of the data buffer
1692  * @param position The target word position
1693  *
1694  * @return The offset of the start of the indicated word.
1695  */
1696 RexxInteger *StringUtil::wordIndex(const char *data, size_t length, RexxInteger *position)
1697 {
1698  /* convert count to binary */
1699  size_t wordPos = positionArgument(position, ARG_ONE);
1700  const char *word = data; /* point to word data */
1701  const char *nextSite = NULL;
1702 
1703  /* get the first word */
1704  size_t wordLength = nextWord(&word, &length, &nextSite);
1705  while (--wordPos > 0 && wordLength != 0)
1706  { /* loop until we reach target */
1707  word = nextSite; /* copy the start pointer */
1708  /* get the next word */
1709  wordLength = nextWord(&word, &length, &nextSite);
1710  }
1711 
1712  if (wordLength == 0) /* ran out of string */
1713  {
1714  return IntegerZero; /* no index */
1715  }
1716  return new_integer(word - data + 1);
1717 }
1718 
1719 
1720 /**
1721  * Return the length of the word located at a given index.
1722  *
1723  * @param data The data containing the word list.
1724  * @param length The length of the data buffer
1725  * @param position The target word position.
1726  *
1727  * @return The length of the given word at the target index. Returns
1728  * 0 if no word is found.
1729  */
1730 RexxInteger *StringUtil::wordLength(const char *data, size_t length, RexxInteger *position)
1731 {
1732  /* convert count to binary */
1733  size_t wordPos = positionArgument(position , ARG_ONE);
1734  const char *word = data; /* point to word data */
1735  const char *nextSite = NULL;
1736 
1737  /* get the first word */
1738  size_t wordLength = nextWord(&word, &length, &nextSite);
1739  while (--wordPos > 0 && wordLength != 0)
1740  { /* loop until we reach target */
1741  word = nextSite; /* copy the start pointer */
1742  /* get the next word */
1743  wordLength = nextWord(&word, &length, &nextSite);
1744  }
1745  return new_integer(wordLength); /* return the word length */
1746 }
1747 
1748 
1749 /**
1750  * Execute a wordpos search on a buffer of data.
1751  *
1752  * @param data the source data buffer.
1753  * @param length the length of the buffer
1754  * @param phrase the search phrase.
1755  * @param pstart the starting position.
1756  *
1757  * @return the location of the start of the search phrase.
1758  */
1759 RexxInteger *StringUtil::wordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
1760 {
1761  phrase = stringArgument(phrase, OREF_positional, ARG_ONE);/* get the phrase we are looking for */
1762  stringsize_t needleLength = phrase->getLength(); /* get the length also */
1763  /* get starting position, the default*/
1764  /* is the first word */
1765  stringsize_t count = optionalPositionArgument(pstart, 1, ARG_TWO);
1766 
1767  const char *needle = phrase->getStringData(); /* get friendly pointer */
1768  const char *haystack = data; /* and the second also */
1769  stringsize_t haystackLength = length; /* get the haystack length */
1770  /* count the words in needle */
1771  stringsize_t needleWords = wordCount(needle, needleLength);
1772  /* and haystack */
1773  stringsize_t haystackWords = wordCount(haystack, haystackLength);
1774  /* if search string is longer */
1775  /* or no words in search */
1776  /* or count is longer than */
1777  /* haystack, this is a failure */
1778  if (needleWords > (haystackWords - count + 1) || needleWords == 0 || count > haystackWords)
1779  {
1780  return IntegerZero;
1781  }
1782 
1783  const char *nextHaystack;
1784  const char *nextNeedle;
1785  /* point at first word */
1786  stringsize_t haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1787  /* now skip over count-1 */
1788  for (stringsize_t i = count - 1; i && haystackWordLength != 0; i--)
1789  {
1790  haystack = nextHaystack; /* step past current word */
1791  /* find the next word */
1792  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1793  }
1794  /* get number of searches */
1795  stringsize_t searchCount = (haystackWords - needleWords - count) + 2;
1796  /* position at first needle */
1797  stringsize_t firstNeedle = nextWord(&needle, &needleLength, &nextNeedle);
1798  /* loop for the possible times */
1799  for (; searchCount; searchCount--)
1800  {
1801  stringsize_t needleWordLength = firstNeedle; /* set the length */
1802  const char *needlePosition = needle; /* get the start of phrase */
1803  const char *haystackPosition = haystack; /* and the target string loop */
1804  /* for needlewords */
1805  const char *nextHaystackPtr = nextHaystack; /* copy nextword information */
1806  const char *nextNeedlePtr = nextNeedle;
1807  /* including the lengths */
1808  stringsize_t haystackScanLength = haystackLength;
1809  stringsize_t needleScanLength = needleLength;
1810 
1811  stringsize_t i;
1812 
1813  for (i = needleWords; i; i--)
1814  {
1815  // length mismatch, can't be a match
1816 
1817  if (haystackWordLength != needleWordLength)
1818  {
1819  break;
1820  }
1821 
1822  // now compare the two words, using a caseless comparison
1823  // if the words don't match, terminate now
1824  if (memcmp(needlePosition, haystackPosition, needleWordLength) != 0)
1825  {
1826  break; /* get out fast. */
1827  }
1828 
1829  /* the last words matched, so */
1830  /* continue searching. */
1831 
1832  /* set new search information */
1833  haystackPosition = nextHaystackPtr;
1834  needlePosition = nextNeedlePtr;
1835  /* Scan off the next word */
1836  haystackWordLength = nextWord(&haystackPosition, &haystackScanLength, &nextHaystackPtr);
1837  /* repeat for the needle */
1838  needleWordLength = nextWord(&needlePosition, &needleScanLength, &nextNeedlePtr);
1839  }
1840 
1841  if (i == 0) /* all words matched, we */
1842  {
1843  return new_integer(count); // return the position
1844  }
1845  haystack = nextHaystack; /* set the search position */
1846  /* step to next haytack pos */
1847  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1848  count++; /* remember the word position */
1849  }
1850 
1851  return IntegerZero; // not found
1852 }
1853 
1854 
1855 /**
1856  * Execute a caseless wordpos search on a buffer of data.
1857  *
1858  * @param data the source data buffer.
1859  * @param length the length of the buffer
1860  * @param phrase the search phrase.
1861  * @param pstart the starting position.
1862  *
1863  * @return the location of the start of the search phrase.
1864  */
1865 RexxInteger *StringUtil::caselessWordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
1866 {
1867  phrase = stringArgument(phrase, OREF_positional, ARG_ONE);/* get the phrase we are looking for */
1868  stringsize_t needleLength = phrase->getLength(); /* get the length also */
1869  /* get starting position, the default*/
1870  /* is the first word */
1871  stringsize_t count = optionalPositionArgument(pstart, 1, ARG_TWO);
1872 
1873  const char *needle = phrase->getStringData(); /* get friendly pointer */
1874  const char *haystack = data; /* and the second also */
1875  stringsize_t haystackLength = length; /* get the haystack length */
1876  /* count the words in needle */
1877  stringsize_t needleWords = wordCount(needle, needleLength);
1878  /* and haystack */
1879  stringsize_t haystackWords = wordCount(haystack, haystackLength);
1880  /* if search string is longer */
1881  /* or no words in search */
1882  /* or count is longer than */
1883  /* haystack, this is a failure */
1884  if (needleWords > (haystackWords - count + 1) || needleWords == 0 || count > haystackWords)
1885  {
1886  return IntegerZero;
1887  }
1888 
1889  const char *nextHaystack;
1890  const char *nextNeedle;
1891  /* point at first word */
1892  stringsize_t haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1893  /* now skip over count-1 */
1894  for (stringsize_t i = count - 1; i && haystackWordLength != 0; i--)
1895  {
1896  haystack = nextHaystack; /* step past current word */
1897  /* find the next word */
1898  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1899  }
1900  /* get number of searches */
1901  stringsize_t searchCount = (haystackWords - needleWords - count) + 2;
1902  /* position at first needle */
1903  stringsize_t firstNeedle = nextWord(&needle, &needleLength, &nextNeedle);
1904  /* loop for the possible times */
1905  for (; searchCount; searchCount--)
1906  {
1907  stringsize_t needleWordLength = firstNeedle; /* set the length */
1908  const char *needlePosition = needle; /* get the start of phrase */
1909  const char *haystackPosition = haystack; /* and the target string loop */
1910  /* for needlewords */
1911  const char *nextHaystackPtr = nextHaystack; /* copy nextword information */
1912  const char *nextNeedlePtr = nextNeedle;
1913  /* including the lengths */
1914  stringsize_t haystackScanLength = haystackLength;
1915  stringsize_t needleScanLength = needleLength;
1916 
1917  stringsize_t i;
1918 
1919  for (i = needleWords; i; i--)
1920  {
1921  // length mismatch, can't be a match
1922 
1923  if (haystackWordLength != needleWordLength)
1924  {
1925  break;
1926  }
1927 
1928  // now compare the two words, using a caseless comparison
1929  // if the words don't match, terminate now
1930  if (caselessCompare(needlePosition, haystackPosition, needleWordLength))
1931  {
1932  break; /* get out fast. */
1933  }
1934 
1935  /* the last words matched, so */
1936  /* continue searching. */
1937 
1938  /* set new search information */
1939  haystackPosition = nextHaystackPtr;
1940  needlePosition = nextNeedlePtr;
1941  /* Scan off the next word */
1942  haystackWordLength = nextWord(&haystackPosition, &haystackScanLength, &nextHaystackPtr);
1943  /* repeat for the needle */
1944  needleWordLength = nextWord(&needlePosition, &needleScanLength, &nextNeedlePtr);
1945  }
1946 
1947  if (i == 0) /* all words matched, we */
1948  {
1949  return new_integer(count); // return the position
1950  }
1951  haystack = nextHaystack; /* set the search position */
1952  /* step to next haytack pos */
1953  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1954  count++; /* remember the word position */
1955  }
1956 
1957  return IntegerZero; // not found
1958 }
1959 
1960 
1961 /**
1962  * Checks if the buffer of data contains only ASCII characters.
1963  *
1964  * @param data the source data buffer.
1965  * @param length the length of the buffer
1966  *
1967  * @return true if the buffer of data contains only ASCII characters
1968  */
1969 bool StringUtil::checkIsASCII(const char *s, size_t length)
1970 {
1971  if (length != 0)
1972  {
1973  // Check from start ascending, from middle descending, from middle ascending, from end descending.
1974  // That will divide by 4 the number of iterations, while increasing the chance to find a not-ASCII character faster..
1975  const char *i1 = s;
1976  const char *i2 = s + (length - 1) / 2;
1977  const char *i3 = i2;
1978  const char *i4 = s + length - 1;
1979 
1980  do
1981  {
1982  if ( (*i1++ | *i2-- | *i3++ | *i4--) & 0x80 ) return false;
1983  }
1984  while (i1 <= i2 || i3 <= i4);
1985  }
1986 
1987  return true;
1988 }
void reportException(wholenumber_t error)
RexxArray * new_array(size_t s)
Definition: ArrayClass.hpp:259
RexxInteger * new_integer(wholenumber_t v)
#define ch_TAB
RexxQueue * new_queue()
Definition: QueueClass.hpp:89
codepoint_t optionalPadArgument(RexxObject *o, codepoint_t d, size_t p)
Definition: RexxCore.h:370
#define OREF_NULL
Definition: RexxCore.h:61
RexxString * stringArgument(RexxObject *object, RexxString *kind, size_t position)
Definition: RexxCore.h:315
const int ARG_FOUR
Definition: RexxCore.h:86
#define IntegerOne
Definition: RexxCore.h:200
const int ARG_THREE
Definition: RexxCore.h:85
#define TheTrueObject
Definition: RexxCore.h:196
const int ARG_TWO
Definition: RexxCore.h:84
size_t optionalLengthArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:355
char optionalOptionArgument(RexxObject *o, char d, size_t p)
Definition: RexxCore.h:377
size_t optionalPositionArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:363
#define TheFalseObject
Definition: RexxCore.h:195
const int ARG_ONE
Definition: RexxCore.h:83
#define IntegerZero
Definition: RexxCore.h:199
#define Error_Incorrect_method_option
#define Error_Incorrect_method_invbin
#define Error_Incorrect_method_invhex
#define Error_Incorrect_method_binblank
#define Error_Incorrect_method_hexblank
#define DATATYPE_UPPERCASE
Definition: StringClass.hpp:83
#define UPPER_ALPHA
#define DATATYPE_LOWERCASE
Definition: StringClass.hpp:78
#define DATATYPE_BINARY
Definition: StringClass.hpp:77
#define HEX_CHAR_STR
Definition: StringClass.hpp:96
RexxString * raw_string(stringsize_t l)
#define STRING_NAME
Definition: StringClass.hpp:60
RexxString * new_string(const char *s, stringsize_t l)
#define DATATYPE_MIXEDCASE
Definition: StringClass.hpp:79
#define DATATYPE_LOGICAL
Definition: StringClass.hpp:87
#define ALPHANUM
Definition: StringClass.hpp:97
#define DATATYPE_NUMBER
Definition: StringClass.hpp:80
#define LOWER_ALPHA
#define MIXED_ALPHA
#define VERIFY_NOMATCH
Definition: StringClass.hpp:90
#define DATATYPE_SYMBOL
Definition: StringClass.hpp:81
#define DATATYPE_9DIGITS
Definition: StringClass.hpp:86
#define STRING_COMPOUND_NAME
Definition: StringClass.hpp:56
#define VERIFY_MATCH
Definition: StringClass.hpp:89
#define DATATYPE_ALPHANUMERIC
Definition: StringClass.hpp:76
#define BINARI
Definition: StringClass.hpp:99
#define STRING_STEM
Definition: StringClass.hpp:55
#define DATATYPE_WHOLE_NUMBER
Definition: StringClass.hpp:84
#define DATATYPE_HEX
Definition: StringClass.hpp:85
#define ch_SPACE
Definition: StringClass.hpp:92
#define DATATYPE_VARIABLE
Definition: StringClass.hpp:82
#define STRING_BAD_VARIABLE
Definition: StringClass.hpp:54
stringsize_t positionArgument(RexxObject *argument, size_t position)
size_t RexxEntry StringLength(RexxThreadContext *c, RexxStringObject s)
static const wholenumber_t MAX_WHOLENUMBER
Definition: Numerics.hpp:62
static wholenumber_t minVal(wholenumber_t n1, wholenumber_t n2)
Definition: Numerics.hpp:116
void put(RexxObject *eref, size_t pos)
Definition: ArrayClass.cpp:208
size_t append(RexxObject *)
Definition: ArrayClass.cpp:485
RexxArray * makeArray()
Definition: ListClass.cpp:873
RexxObject * isInteger()
RexxNumberString * plus(RexxObject *)
bool messageSend(RexxString *, RexxObject **, size_t, size_t, ProtectedObject &, bool processUnknown=true, bool dynamicTarget=true)
void queue(RexxObject *obj)
Definition: QueueClass.hpp:82
bool numberValue(wholenumber_t &result, size_t precision)
size_t getLength()
const char * getStringData()
RexxNumberString * numberString()
void set(size_t s, int c, size_t l)
char * getWritableData()
void put(size_t s, const void *b, size_t l)
static void unpackNibble(int Val, char *p)
Definition: StringUtil.cpp:949
static size_t validateSet(const char *String, size_t Length, const char *Set, int Modulus, bool Hex)
Definition: StringUtil.cpp:726
static size_t pos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
Definition: StringUtil.cpp:155
static RexxString * packHex(const char *String, size_t StringLength)
Definition: StringUtil.cpp:875
static const char * memcpbrk(const char *String, const char *Set, size_t Length)
Definition: StringUtil.cpp:967
static char packByte2(const char *Byte)
Definition: StringUtil.cpp:693
static RexxInteger * caselessWordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
static int valSet(const char *String, size_t Length, const char *Set, int Modulus, size_t *PackedSize)
static RexxInteger * wordIndex(const char *data, size_t length, RexxInteger *position)
static RexxArray * subWords(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
static RexxObject * dataType(RexxString *String, char Option)
static bool checkIsASCII(const char *s, size_t length)
static size_t memPos(const char *string, size_t length, char target)
static void skipBlanks(const char **String, size_t *StringLength)
static size_t caselessLastPos(const char *stringData, size_t haystackLen, RexxString *needle, size_t _start, size_t range)
Definition: StringUtil.cpp:360
static size_t wordCount(const char *String, size_t StringLength)
static char packNibble(const char *String)
Definition: StringUtil.cpp:668
static RexxString * subWord(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
static void skipNonBlanks(const char **String, size_t *StringLength)
static int caselessCompare(const char *, const char *, size_t)
Definition: StringUtil.cpp:580
static RexxInteger * lastPosRexx(const char *stringData, size_t haystackLen, RexxString *needle, RexxInteger *_start, RexxInteger *_range)
Definition: StringUtil.cpp:255
static size_t caselessPos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
Definition: StringUtil.cpp:205
static size_t nextWord(const char **String, size_t *StringLength, const char **NextString)
static RexxInteger * wordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
static bool matchCharacter(char ch, const char *charSet, size_t len)
Definition: StringUtil.hpp:96
static RexxInteger * wordLength(const char *data, size_t length, RexxInteger *position)
static RexxArray * makearray(const char *start, size_t length, RexxString *separator)
Definition: StringUtil.cpp:493
static int hexDigitToInt(char ch)
Definition: StringUtil.cpp:618
static size_t chGetSm(char *Destination, const char *Source, size_t Length, size_t Count, const char *Set, size_t *ScannedSize)
Definition: StringUtil.cpp:835
static size_t countStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
static const char * locateSeparator(const char *start, const char *end, const char *sepData, size_t sepLength)
Definition: StringUtil.cpp:467
static size_t caselessCountStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
static RexxInteger * verify(const char *data, size_t stringLen, RexxString *ref, RexxString *option, RexxInteger *_start, RexxInteger *range)
static RexxString * word(const char *data, size_t length, RexxInteger *position)
static RexxString * subchar(const char *stringData, size_t stringLength, RexxInteger *positionArg)
Definition: StringUtil.cpp:442
static char packByte(const char *String)
Definition: StringUtil.cpp:644
static RexxString * substr(const char *, size_t, RexxInteger *, RexxInteger *, RexxString *)
Definition: StringUtil.cpp:66
static size_t lastPos(const char *stringData, size_t hastackLen, RexxString *needle, size_t _start, size_t _range)
Definition: StringUtil.cpp:278
static RexxInteger * posRexx(const char *stringData, size_t length, RexxString *needle, RexxInteger *pstart, RexxInteger *range)
Definition: StringUtil.cpp:130
static RexxArray * words(const char *data, size_t length)
ssize_t codepoint_t
Definition: rexx.h:232
ssize_t wholenumber_t
Definition: rexx.h:230
size_t stringsize_t
Definition: rexx.h:228