StringUtil.cpp
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* */
3 /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */
4 /* Copyright (c) 2005-2009 Rexx Language Association. All rights reserved. */
5 /* */
6 /* This program and the accompanying materials are made available under */
7 /* the terms of the Common Public License v1.0 which accompanies this */
8 /* distribution. A copy is also available at the following address: */
9 /* http://www.oorexx.org/license.html */
10 /* */
11 /* Redistribution and use in source and binary forms, with or */
12 /* without modification, are permitted provided that the following */
13 /* conditions are met: */
14 /* */
15 /* Redistributions of source code must retain the above copyright */
16 /* notice, this list of conditions and the following disclaimer. */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in */
19 /* the documentation and/or other materials provided with the distribution. */
20 /* */
21 /* Neither the name of Rexx Language Association nor the names */
22 /* of its contributors may be used to endorse or promote products */
23 /* derived from this software without specific prior written permission. */
24 /* */
25 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
26 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
27 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS */
28 /* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
29 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
30 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
31 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, */
32 /* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
33 /* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
34 /* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
35 /* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* */
37 /*----------------------------------------------------------------------------*/
38 /******************************************************************************/
39 /* REXX Kernel */
40 /* */
41 /* String Utilities shared between String class and MutableBuffer class */
42 /* */
43 /******************************************************************************/
44 #include <ctype.h>
45 #include <string.h>
46 
47 #include "RexxCore.h"
48 #include "StringClass.hpp"
49 #include "ProtectedObject.hpp"
50 #include "StringUtil.hpp"
51 #include "QueueClass.hpp"
52 
53 
54 /**
55  * Extract a substring from a data buffer.
56  *
57  * @param string The data buffer.
58  * @param stringLength
59  * The length of the buffer.
60  * @param _position The position argument for the starting position.
61  * @param _length The substring length argument.
62  * @param pad The padding argument.
63  *
64  * @return The extracted substring.
65  */
66 RexxString *StringUtil::substr(const char *string, sizeB_t stringLength, RexxInteger *_position,
67  RexxInteger *_length, RexxString *pad)
68 {
69  sizeC_t position = positionArgument(_position, ARG_ONE) - 1;
70  // assume nothing is pulled from this string
71  sizeC_t length = 0;
72  // is the position within the string bounds?
73  if (stringLength >= size_v(position)) // todo m17n
74  {
75  // we extract everything from the position to the end (potentially)
76  length = size_v(stringLength - size_v(position)); // todo m17n
77  }
78  // now we process any overrides on this
79  length = optionalLengthArgument(_length, length, ARG_TWO);
80  // get a padding character (blank is default)
81  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_THREE);
82 
83  // if our target length is zero, we can just return the null string singleton
84  if (length == 0)
85  {
86  return OREF_NULLSTRING;
87  }
88 
89  sizeB_t substrLength = 0;
90  sizeB_t padCount = 0;
91 
92  // starting past the end of the string?
93  // this will be all pad characters
94  if (size_v(position) > stringLength)
95  {
96  padCount = size_v(length); // todo m17n
97  }
98  else
99  {
100  // we have a combination of source string and pad characters
101  substrLength = Numerics::minVal(size_v(length), stringLength - size_v(position)); // todo m17n
102  padCount = size_v(length) - substrLength; // todo m17n
103  }
104  RexxString *retval = raw_string(size_v(length)); /* get result string */ // todo m17n
105  if (substrLength != 0) /* data to copy? */
106  {
107  // copy over the string portion
108  retval->put(0, string + size_v(position), substrLength); // todo m17n
109  }
110  // add any needed padding characters
111  if (padCount != 0)
112  {
113  retval->set(substrLength, padChar, padCount);
114  }
115  // and return the final result
116  return retval;
117 }
118 
119 
120 /**
121  * Locate a string within the designated string buffer.
122  *
123  * @param stringData The stringData to search within.
124  * @param length The length of the string data.
125  * @param needle The needle to search for.
126  * @param pstart The starting position.
127  *
128  * @return An integer object giving the located position.
129  */
130 RexxInteger *StringUtil::posRexx(const char *stringData, sizeB_t length, RexxString *needle, RexxInteger *pstart, RexxInteger *range)
131 {
132  /* force needle to a string */
133  needle = stringArgument(needle, OREF_positional, ARG_ONE);
134  /* get the starting position */
135  size_t _start = optionalPositionArgument(pstart, 1, ARG_TWO);
136  size_t _range = optionalLengthArgument(range, size_v(length - _start + 1), ARG_THREE);
137  /* pass on to the primitive function */
138  /* and return as an integer object */
139  sizeB_t result = pos(stringData, length, needle, _start - 1, _range);
140  return new_integer(result);
141 }
142 
143 
144 /**
145  * Primitive level search withint a string buffer.
146  *
147  * @param stringData The maystack buffer.
148  * @param haystack_length
149  * The length of the haystack.
150  * @param needle The search needle.
151  * @param _start The starting position.
152  *
153  * @return The offset of the located needle, or 0 if the needle doesn't exist.
154  */
155 sizeB_t StringUtil::pos(const char *stringData, sizeB_t haystack_length, RexxString *needle, sizeB_t _start, sizeB_t _range)
156 {
157  // get the two working lengths
158  sizeB_t needle_length = needle->getBLength();
159  // make sure the range is capped
160  _range = Numerics::minVal(_range, haystack_length - _start);
161 
162  // ok, there are a few quick checks we can perform. If the needle is
163  // bigger than the haystack, or the needle is a null string or
164  // our haystack length after adjusting to the starting position
165  // zero, then we can quickly return zero.
166  if (_start >= haystack_length || needle_length > _range || needle_length == 0)
167  {
168  return 0;
169  }
170 
171  // address the string value
172  const char *haypointer = stringData + _start;
173  const char *needlepointer = needle->getStringData();
174  sizeB_t location = _start + 1; // this is the match location as an index
175  // calculate the number of probes we can make in this string
176  sizeB_t count = _range - needle_length + 1;
177 
178  // now scan
179  while (count-- != 0)
180  {
181  /* get a hit? */
182  if (memcmp(haypointer, needlepointer, size_v(needle_length)) == 0) // todo m17n : needle_length is not a byte count
183  {
184  return location;
185  }
186  // step our pointers accordingly
187  location++;
188  haypointer++;
189  }
190  return 0; // we got nothing...
191 }
192 
193 
194 /**
195  * Primitive level search withint a string buffer.
196  *
197  * @param stringData The maystack buffer.
198  * @param haystack_length
199  * The length of the haystack.
200  * @param needle The search needle.
201  * @param _start The starting position.
202  *
203  * @return The offset of the located needle, or 0 if the needle doesn't exist.
204  */
205 sizeB_t StringUtil::caselessPos(const char *stringData, sizeB_t haystack_length, RexxString *needle, sizeB_t _start, sizeB_t _range)
206 {
207  // get the two working lengths
208  sizeB_t needle_length = needle->getBLength();
209  // make sure the range is capped
210  _range = Numerics::minVal(_range, haystack_length - _start + 1);
211 
212  // ok, there are a few quick checks we can perform. If the needle is
213  // bigger than the haystack, or the needle is a null string or
214  // our haystack length after adjusting to the starting position
215  // zero, then we can quickly return zero.
216  if (_start > haystack_length || needle_length > _range || needle_length == 0)
217  {
218  return 0;
219  }
220 
221  // address the string value
222  const char *haypointer = stringData + _start;
223  const char *needlepointer = needle->getStringData();
224  sizeB_t location = _start + 1; // this is the match location as an index
225  // calculate the number of probes we can make in this string
226  sizeB_t count = _range - needle_length + 1;
227 
228  // now scan
229  while (count-- != 0)
230  {
231  /* get a hit? */
232  if (caselessCompare(haypointer, needlepointer, needle_length) == 0)
233  {
234  return location;
235  }
236  // step our pointers accordingly
237  location++;
238  haypointer++;
239  }
240  return 0; // we got nothing...
241 }
242 
243 
244 /**
245  * Locate the last positon of a string within the designated
246  * string buffer.
247  *
248  * @param stringData The stringData to search within.
249  * @param length The length of the string data.
250  * @param needle The needle to search for.
251  * @param pstart The starting position.
252  *
253  * @return An integer object giving the located position.
254  */
255 RexxInteger *StringUtil::lastPosRexx(const char *stringData, sizeB_t haystackLen, RexxString *needle, RexxInteger *_start, RexxInteger *_range)
256 {
257  needle = stringArgument(needle, OREF_positional, ARG_ONE);
258  // find out where to start the search. The default is at the very end.
259  sizeB_t startPos = optionalPositionArgument(_start, haystackLen, ARG_TWO);
260  size_t range = optionalLengthArgument(_range, size_v(haystackLen), ARG_THREE);
261  // now perform the actual search.
262  sizeB_t result = lastPos(stringData, haystackLen, needle, startPos, range);
263  return new_integer(result);
264 }
265 
266 
267 /**
268  * Primitive level lastpos search within a string buffer.
269  *
270  * @param stringData The maystack buffer.
271  * @param haystack_length
272  * The length of the haystack.
273  * @param needle The search needle.
274  * @param _start The starting position.
275  *
276  * @return The offset of the located needle, or 0 if the needle doesn't exist.
277  */
278 sizeB_t StringUtil::lastPos(const char *stringData, sizeB_t haystackLen, RexxString *needle, sizeB_t _start, sizeB_t range)
279 {
280  sizeB_t needleLen = needle->getBLength(); /* and get the length too */
281 
282  // no match possible if either string is null
283  if (needleLen == 0 || haystackLen == 0 || needleLen > range)
284  {
285  return 0;
286  }
287  else
288  {
289  // get the start position for the search.
290  haystackLen = Numerics::minVal(_start, haystackLen);
291  range = Numerics::minVal(range, haystackLen);
292  // adjust the starting point by pretending this is smaller than the original string
293  const char *startPoint = stringData + haystackLen - range;
294  /* do the search */
295  const char *matchLocation = lastPos(needle->getStringData(), needleLen, startPoint, range);
296  if (matchLocation == NULL)
297  {
298  return 0;
299  }
300  else
301  {
302  return sizeB_v(matchLocation - stringData + 1);
303  }
304  }
305 }
306 
307 
308 /**
309  * Absolutely most primitive version of a lastpos search. This
310  * version searches directly in a buffer rather than a Rexx
311  * String.
312  *
313  * @param needle Pointer to the needle string.
314  * @param needleLen Length of the needle string.
315  * @param haystack The pointer to the haystack string.
316  * @param haystackLen
317  * The length of the haystack string.
318  *
319  * @return A pointer to the match location or NULL if there is no match.
320  */
321 const char *StringUtil::lastPos(const char *needle, sizeB_t needleLen, const char *haystack, sizeB_t haystackLen)
322 {
323  // if the needle's longer than the haystack, no chance of a match
324  if (needleLen > haystackLen)
325  {
326  return NULL;
327  }
328  // set the search startpoing point relative to the end of the search string
329  haystack = haystack + haystackLen - needleLen;
330  // this is the possible number of compares we might need to perform
331  sizeB_t count = haystackLen - needleLen + 1;
332  // now scan backward
333  while (count > 0)
334  {
335  // got a match at this position, return it directly
336  if (memcmp(haystack, needle, needleLen) == 0)
337  {
338  return haystack;
339  }
340  // decrement count and position
341  count--;
342  haystack--;
343  }
344  return NULL; // nothing to see here folks, move along
345 }
346 
347 
348 /**
349  * Primitive level caseless lastpos search within a string
350  * buffer.
351  *
352  * @param stringData The maystack buffer.
353  * @param haystack_length
354  * The length of the haystack.
355  * @param needle The search needle.
356  * @param _start The starting position.
357  *
358  * @return The offset of the located needle, or 0 if the needle doesn't exist.
359  */
360 sizeB_t StringUtil::caselessLastPos(const char *stringData, sizeB_t haystackLen, RexxString *needle, sizeB_t _start, sizeB_t range)
361 {
362  sizeB_t needleLen = needle->getBLength(); /* and get the length too */
363 
364  // no match possible if either string is null
365  if (needleLen == 0 || haystackLen == 0 || needleLen > range)
366  {
367  return 0;
368  }
369  else
370  {
371  // get the start position for the search.
372  haystackLen = Numerics::minVal(_start, haystackLen);
373  range = Numerics::minVal(range, haystackLen);
374  // adjust the starting point
375  const char *startPoint = stringData + haystackLen - range;
376  /* do the search */
377  const char *matchLocation = caselessLastPos(needle->getStringData(), needleLen, startPoint, range);
378  if (matchLocation == NULL)
379  {
380  return 0;
381  }
382  else
383  {
384  return sizeB_v(matchLocation - stringData + 1);
385  }
386  }
387 }
388 
389 
390 /**
391  * Absolutely most primitive version of a caseless lastpos
392  * search. This version searches directly in a buffer rather
393  * than a Rexx String.
394  *
395  * @param needle Pointer to the needle string.
396  * @param needleLen Length of the needle string.
397  * @param haystack The pointer to the haystack string.
398  * @param haystackLen
399  * The length of the haystack string.
400  *
401  * @return A pointer to the match location or NULL if there is no match.
402  */
403 const char *StringUtil::caselessLastPos(const char *needle, sizeB_t needleLen, const char *haystack, sizeB_t haystackLen)
404 {
405  // if the needle's longer than the haystack, no chance of a match
406  if (needleLen > haystackLen)
407  {
408  return NULL;
409  }
410  // set the search startpoing point relative to the end of the search string
411  haystack = haystack + haystackLen - needleLen;
412  // this is the possible number of compares we might need to perform
413  sizeB_t count = haystackLen - needleLen + 1;
414  // now scan backward
415  while (count > 0)
416  {
417  // got a match at this position, return it directly
418  if (caselessCompare(haystack, needle, needleLen) == 0)
419  {
420  return haystack;
421  }
422  // decrement count and position
423  count--;
424  haystack--;
425  }
426  return NULL; // nothing to see here folks, move along
427 }
428 
429 
430 /**
431  * Extract an individual character from a string buffer, returned
432  * as a string object.
433  *
434  * @param stringData The string buffer.
435  * @param stringLength
436  * The length of the buffer.
437  * @param positionArg
438  * The target position.
439  *
440  * @return The target character, as a string value.
441  */
442 RexxString *StringUtil::subchar(const char *stringData, sizeB_t stringLength, RexxInteger *positionArg)
443 {
444  // the starting position isn't optional
445  size_t position = positionArgument(positionArg, ARG_ONE) - 1;
446 
447  // beyond the bounds, this is a null string
448  if (position >= stringLength)
449  {
450  return OREF_NULLSTRING;
451  }
452  // return the single character
453  return new_string(stringData + position, 1);
454 }
455 
456 /**
457  * Search for a separator within a string segment.
458  *
459  * @param start The start position for the scan.
460  * @param end The last possible position for a scan (taking the length
461  * of the separator into account).
462  * @param sepData The separator data
463  * @param sepLength the length of the separator.
464  *
465  * @return The next match position, or null for no match.
466  */
467 const char *StringUtil::locateSeparator(const char *start, const char *end, const char *sepData, sizeB_t sepLength)
468 {
469  /* search for separator character */
470  while (start < end)
471  {
472  if (memcmp(start, sepData, sepLength) == 0)
473  {
474  return start;
475  }
476  start++;
477  }
478  // not found
479  return NULL;
480 }
481 
482 
483 /**
484  * Carve the string buffer up into an array of string values.
485  *
486  * @param start The starting position of the buffer.
487  * @param length The length of the buffer.
488  * @param separator The optional separator character.
489  *
490  * @return An array of all strings within the buffer, with the target
491  * delimiter removed.
492  */
493 RexxArray *StringUtil::makearray(const char *start, sizeB_t length, RexxString *separator)
494 {
495  const char *sepData = "\n"; // set our default separator
496  sizeB_t sepSize = 1;
497  bool checkCR = true; // by default, we look for either separator
498 
499  // if we have an explicit separator, use it instead
500  if (separator != OREF_NULL)
501  {
502  // make sure this is really a string value
503  separator = stringArgument(separator, OREF_positional, ARG_ONE);
504  sepData = separator->getStringData();
505  sepSize = separator->getBLength();
506  checkCR = false; // if explicitly given, only use the given one
507  }
508  ProtectedObject p(separator);
509 
510 
511  // the Null string gets special handling
512  if (sepSize == 0)
513  {
514  // we need an array the size of the string
515  RexxArray *array = new_array(size_v(length));
516  ProtectedObject p1(array);
517  // create a string for each character and poke into the array
518  for (size_t i = 0; i < length; i++, start++)
519  {
520  array->put(new_string(start, 1), i + 1);
521  }
522  return array;
523  }
524 
525 
526  RexxQueue *strings = new_queue(); /* save each string in a queue */
527  ProtectedObject p2(strings); /* which we need to protect */
528  // this is the end of the string
529  const char *stringEnd = start + length;
530 
531  // this is where we stop scanning
532  const char *end = start + length - sepSize + 1;
533 
534  while (start < end)
535  {
536  // search for the next separator, if not found, we're done
537  const char *tmp = locateSeparator(start, end, sepData, sepSize);
538  if (tmp == NULL)
539  {
540  break;
541  }
542  size_t stringLen = tmp - start;
543  // if checking for either linend possibility, reduce the length if we had
544  // a leading CR character
545  if (checkCR && *(tmp - 1) == '\r')
546  {
547  stringLen--;
548  }
549  strings->queue(new_string(start, stringLen));
550  // step to the next scan position
551  start = tmp + sepSize;
552  }
553  // we might have a tail piece here
554  if (start < stringEnd)
555  {
556  size_t stringLen = stringEnd - start;
557  strings->queue(new_string(start, stringLen));
558  }
559  // now convert this to an array
560  return strings->makeArray();
561 }
562 
563 
565 {
566  return StringUtil::makearray(str->getStringData(), str->getBLength(), separator); // todo m17n
567 }
568 
569 
570 /**
571  * Perform a caseless comparison of two strings
572  *
573  * @param string1 The first string to compare.
574  * @param string2 The second string.
575  * @param length The length to compare.
576  *
577  * @return 0 if the two strings are equal, -1 if the first is less than the
578  * second, and 1 if the first string is the greater.
579  */
580 int StringUtil::caselessCompare(const char *string1, const char *string2, sizeB_t length)
581 {
582  /* totally equal? */
583  if (!memcmp(string1, string2, length))
584  {
585  return 0; /* return equality indicator */
586  }
587 
588  while (length-- != 0) /* need to do it the hardway */
589  {
590  /* not equal? */
591  if (toupper(*string1) != toupper(*string2))
592  {
593  /* first one less? */
594  if (toupper(*string1) < toupper(*string2))
595  {
596  return -1; /* return less than indicator */
597  }
598  else
599  {
600  return 1; /* first is larger */
601  }
602  }
603  string1++; /* step first pointer */
604  string2++; /* and second pointer also */
605  }
606  return 0; /* fall through, these are equal */
607 }
608 
609 
610 
611 /**
612  * Convert a hex digit to it's integer value equivalent.
613  *
614  * @param ch The input character.
615  *
616  * @return the integer value of the digit.
617  */
619 {
620  int Retval; /* return value */
621 
622  if (isdigit(ch)) /* if real digit */
623  {
624  Retval = ch - '0'; /* convert that */
625  }
626  else
627  {
628  Retval = toupper(ch) - 'A' + 10; /* convert alphabetic */
629  }
630  return Retval; /* return conversion */
631 }
632 
633 /**
634  * The value of the buffer contents
635  * interpreted as the binary expansion
636  * of a byte, with most significant
637  * bit in s[0] and least significant
638  * bit in s[7].
639  *
640  * @param String The string to pack
641  *
642  * @return The single packed character.
643  */
644 char StringUtil::packByte(const char *String )
645 {
646  char Result = 0; /* start off at zero */
647  for (int i = 0; i < 8; i++) /* loop thru 8 chars */
648  {
649  if (String[i] == '1') /* if 'bit' set */
650  {
651  Result |= (1<<(7-i)); /* or with mask */
652  }
653  }
654  return Result; /* return packed byte */
655 }
656 
657 /**
658  * The value of the buffer contents
659  * interpreted as the binary expansion
660  * of a byte, with most significant
661  * bit in s[0] and least significant
662  * bit in s[7].
663  *
664  * @param String Pack 4 characters into a hex string value.
665  *
666  * @return The hex character representing the nibble value.
667  */
668 char StringUtil::packNibble(const char *String)
669 {
670  char Buf[8]; /* temporary buffer */
671  int i; /* table index */
672 
673  memset(Buf, '0', 4); /* set first 4 bytes to zero */
674  memcpy(Buf+4, String, 4); /* copy next 4 bytes */
675  i = packByte(Buf); /* pack to a single byte */
676  return "0123456789ABCDEF"[i]; /* convert to a printable character */
677 }
678 
679 /**
680  * Pack 2 0123456789ABCDEFabcdef chars into
681  * byte
682  *
683  * The value of the buffer contents
684  * interpreted as the hex expansion
685  * of a byte, with most significant
686  * nibble in s[0] and least significant
687  * nibble in s[2].
688  *
689  * @param Byte The pointer to the hex digit pair to pack.
690  *
691  * @return The single byte encoding of the pair of digits.
692  */
693 char StringUtil::packByte2(const char *Byte)
694 {
695  int Nibble1; /* first nibble */
696  int Nibble2; /* second nibble */
697 
698  /* convert each digit */
699  Nibble1 = hexDigitToInt(Byte[0]);
700  Nibble2 = hexDigitToInt(Byte[1]);
701  /* combine the two digits */
702 
703  return((Nibble1 << 4) | Nibble2);
704 }
705 
706 /**
707  * Validate blocks in string
708  *
709  * A string is considered valid if consists
710  * of zero or more characters belonging to
711  * the null-terminated C string set in
712  * groups of size modulus. The first group
713  * may have fewer than modulus characters.
714  * The groups are optionally separated by
715  * one or more blanks.
716  *
717  * @param String The string to validate.
718  * @param Length The string length.
719  * @param Set The valid characters in the set.
720  * @param Modulus The size of the smallest allowed grouping.
721  * @param Hex Indicates this is a hex or binary string. Used for issuing
722  * the correct error type.
723  *
724  * @return The number of valid digits found.
725  */
726 size_t StringUtil::validateSet(const char *String, sizeB_t Length, const char *Set, int Modulus, bool Hex)
727 {
728  char c; /* current character */
729  size_t Count; /* # set members found */
730  const char *Current; /* current location */
731  const char *SpaceLocation = NULL; /* location of last space */
732  int SpaceFound; /* space found yet? */
733  size_t Residue = 0; /* if space_found, # set */
734  /* members */
735 
736  // leading whitespace not permitted
737  if (*String == ch_SPACE || *String == ch_TAB)
738  {
739  if (Hex) /* hex version? */
740  {
741  /* raise the hex message */
743  }
744  else
745  {
746  /* need the binary version */
748  }
749  }
750  SpaceFound = 0; /* set initial space flag */
751  Count = 0; /* start count with zero */
752  Current = String; /* point to start */
753 
754  for (; Length != 0; Length--)
755  { /* process entire string */
756  c = *Current++; /* get char and step pointer */
757  /* if c in set */
758  if (c != '\0' && strchr(Set, c) != NULL)
759  {
760  Count++; /* bump count */
761  }
762  else
763  {
764  if (c == ch_SPACE || c == ch_TAB)
765  { /* if c blank */
766  SpaceLocation = Current; /* save the space location */
767  if (!SpaceFound)
768  { /* if 1st blank */
769  /* save position */
770  Residue = (Count % Modulus);
771  SpaceFound = 1; /* we have the first space */
772  }
773  /* else if bad position */
774  else if (Residue != (Count % Modulus))
775  {
776  if (Hex) /* hex version? */
777  {
778  /* raise the hex message */
779  reportException(Error_Incorrect_method_hexblank, OREF_positional, SpaceLocation - String);
780  }
781  else
782  {
783  /* need the binary version */
784  reportException(Error_Incorrect_method_binblank, SpaceLocation - String);
785  }
786  }
787  }
788  else
789  {
790 
791  if (Hex) /* hex version? */
792  {
793  /* raise the hex message */
795  }
796  else
797  {
799  }
800  }
801  }
802  }
803  /* if trailing blank or grouping bad */
804  if ((c == ch_SPACE || c == ch_TAB) || (SpaceFound && ((Count % Modulus) != Residue)))
805  {
806  if (Hex) /* hex version? */
807  {
808  /* raise the hex message */
809  reportException(Error_Incorrect_method_hexblank, OREF_positional, SpaceLocation - String);
810  }
811  else
812  {
813  /* need the binary version */
814  reportException(Error_Incorrect_method_binblank, SpaceLocation - String);
815  }
816  }
817  return Count; /* return count of chars */
818 }
819 
820 /**
821  * Scan string for next members of
822  * character set
823  *
824  * @param Destination
825  * The string where the characters are packed.
826  * @param Source The source for the string data.
827  * @param Length The length of the input string.
828  * @param Count The number of valid characters in the string.
829  * @param Set The set of allowed characters.
830  * @param ScannedSize
831  * The returned scan size.
832  *
833  * @return
834  */
835 sizeB_t StringUtil::chGetSm(char *Destination, const char *Source, sizeB_t Length, sizeB_t Count, const char *Set, sizeB_t *ScannedSize)
836 {
837  char c; /* current scanned character */
838  const char *Current; /* current scan pointer */
839  size_t Found; /* number of characters found */
840  size_t Scanned; /* number of character scanned*/
841 
842  Scanned = 0; /* nothing scanned yet */
843  Found = 0; /* nothing found yet */
844  Current = Source; /* get pointer to string */
845 
846  for (; Length != 0; Length--)
847  { /* scan entire string */
848  c = *Current++; /* get char and step pointer */
849  Scanned++; /* remember scan count */
850  /* if c in set */
851  if (c != '\0' && strchr(Set, c) != NULL)
852  {
853  *Destination++ = c; /* copy c */
854  if (++Found == Count) /* if all found */
855  {
856  break; /* we are all done */
857  }
858  }
859  }
860  *ScannedSize = Scanned; /* return characters scanned */
861  return Found; /* and number found */
862 }
863 
864 /**
865  * pack a string of 'hex' digits in place
866  *
867  * take two alpha chars and make into one byte
868  *
869  * @param String The string to pack
870  * @param StringLength
871  * The length of the string.
872  *
873  * @return The resulting packed string.
874  */
876 {
877  size_t Nibbles; /* count of nibbles to pack */
878  sizeB_t n;
879  const char *Source; /* pack source */
880  char * Destination; /* packing destination */
881  size_t b; /* nibble odd count */
882  char Buf[8]; /* temp pack buffer */
883  size_t jjj; /* copies nibbles */
884  RexxString *Retval; /* result value */
885 
886  if (StringLength != 0)
887  { /* if not a null string */
888  Source = String; /* get pointer */
889  /* validate the information */
890  Nibbles = validateSet(Source, StringLength, "0123456789ABCDEFabcdef", 2, true);
891  /* get a result string */
892  Retval = raw_string((Nibbles + 1) / 2);
893  /* initialize destination */
894  Destination = Retval->getWritableData();
895 
896  while (Nibbles > 0)
897  { /* while chars to process */
898 
899  b = Nibbles%2; /* get nibbles for next byte */
900  if (b == 0) /* even number */
901  {
902  b = 2; /* use two bytes */
903  }
904  else /* odd number, */
905  {
906  memset(Buf, '0', 2); /* pad with zeroes */
907  }
908 
909  jjj = 2 - b; /* copy nibbles into buff */
910  chGetSm(Buf+jjj, Source, StringLength, b, "0123456789ABCDEFabcdef", &n);
911  *Destination++ = packByte2(Buf); /* pack into destination */
912  Source += n; /* advance source location */
913  StringLength -= n; /* reduce the length */
914  Nibbles -= b; /* decrement the count */
915  }
916  }
917  else
918  {
919  /* this is a null string */
920  Retval = OREF_NULLSTRING;
921  }
922  return Retval; /* return the packed string */
923 }
924 
925 /**
926  * convert nibble to 4 '0'/'1' chars
927  *
928  * p[0], ..., p[3]: the four '0'/'1'
929  * chars representing the nibble
930  *
931  * No terminating null character is
932  * produced
933  *
934  * @param Val The nibble to unpack.
935  * @param p The location to unpack into.
936  */
937 void StringUtil::unpackNibble(int Val, char *p)
938 {
939  p[0] = (Val & 0x08) != 0 ?'1':'0';
940  p[1] = (Val & 0x04) != 0 ?'1':'0';
941  p[2] = (Val & 0x02) != 0 ?'1':'0';
942  p[3] = (Val & 0x01) != 0 ?'1':'0';
943 }
944 
945 
946 /**
947  * Find the first occurrence of the set non-member in a string.
948  *
949  * @param String The string to search.
950  * @param Set The character set.
951  * @param Length The length to search.
952  *
953  * @return The position of a match.
954  */
955 const char *StringUtil::memcpbrk(const char *String, const char *Set, sizeB_t Length)
956 {
957  const char *Retval; /* returned value */
958 
959  Retval = NULL; /* nothing found yet */
960  while (Length-- != 0)
961  { /* search through string */
962  /* find a match in ref set? */
963  if (*String == '\0' || !strchr(Set, *String))
964  {
965  Retval = String; /* copy position */
966  break; /* quit the loop */
967  }
968  String++; /* step the pointer */
969  }
970  return Retval; /* return matched position */
971 }
972 
973 
974 /**
975  * Validate blocks in string
976  *
977  * A string is considered valid if consists
978  * of zero or more characters belonging to
979  * the null-terminated C string set in
980  * groups of size modulus. The first group
981  * may have fewer than modulus characters.
982  * The groups are optionally separated by
983  * one or more blanks.
984  *
985  * @param String The string to validate.
986  * @param Length The string length.
987  * @param Set The validation set.
988  * @param Modulus The set modulus
989  * @param PackedSize The final packed size.
990  *
991  * @return The count of located characters.
992  */
993 int StringUtil::valSet(const char *String, sizeB_t Length, const char *Set, int Modulus, size_t *PackedSize )
994 {
995  char c = '\0'; /* current character */
996  size_t Count; /* # set members found */
997  const char *Current; /* current location */
998  int SpaceFound; /* space found yet? */
999  size_t Residue = 0; /* if space_found, # set members */
1000  int rc; /* return code */
1001 
1002  rc = false; /* default to failure */
1003  if (*String != ' ' && *String != '\t')
1004  { /* if no leading blank */
1005  SpaceFound = 0; /* set initial space flag */
1006  Count = 0; /* start count with zero */
1007  Current = String; /* point to start */
1008 
1009  rc = true; /* default to good now */
1010  for (; Length != 0; Length--)
1011  { /* process entire string */
1012  c = *Current++; /* get char and step pointer */
1013  /* if c in set */
1014  if (c != '\0' && strchr(Set, c) != NULL)
1015  {
1016  Count++; /* bump count */
1017  }
1018  else
1019  {
1020  if (c == ' ' || c == '\t')
1021  { /* if c blank */
1022  if (!SpaceFound)
1023  { /* if 1st blank */
1024  /* save position */
1025  Residue = (Count % Modulus);
1026  SpaceFound = 1; /* we have the first space */
1027  }
1028  /* else if bad position */
1029  else if (Residue != (Count % Modulus))
1030  {
1031  rc = false; /* this is an error */
1032  break; /* report error */
1033  }
1034  }
1035  else
1036  {
1037  rc = false; /* this is an error */
1038  break; /* report error */
1039  }
1040  }
1041  }
1042  if (rc)
1043  { /* still good? */
1044  if (c == ' ' || c == '\t') /* if trailing blank */
1045  {
1046  rc = false; /* report error */
1047  }
1048  else if (SpaceFound && (Count % Modulus) != Residue)
1049  {
1050  rc = false; /* grouping problem */
1051  }
1052  else
1053  {
1054  *PackedSize = Count; /* return count of chars */
1055  }
1056  }
1057  }
1058  return rc; /* return success/failure */
1059 }
1060 
1061 
1062 /**
1063  * Perform primitive datatype validation.
1064  *
1065  * @param String The target string.
1066  * @param Option The type of data to validate.
1067  *
1068  * @return True if this is of the indicated type, false for any mismatch.
1069  */
1071 {
1072  sizeB_t Len; /* validated string length */
1073  RexxObject *Answer; /* validation result */
1074  RexxObject *Temp; /* temporary value */
1075  const char *Scanp; /* string data pointer */
1076  size_t Count; /* hex nibble count */
1077  int Type; /* validated symbol type */
1078  RexxNumberString *TempNum;
1079 
1080  Len = String->getBLength(); /* get validated string len */
1081  Option = toupper(Option); /* get the first character */
1082 
1083  /* assume failure on checking */
1084  Answer = TheFalseObject;
1085  /* get a scan pointer */
1086  Scanp = String->getStringData();
1087 
1088  switch (Option)
1089  { /* based on type to confirm */
1090 
1091  case DATATYPE_ALPHANUMERIC: /* Alphanumeric */
1092  /* all in the set? */
1093  if (Len != 0 && !memcpbrk(Scanp, ALPHANUM, Len))
1094  {
1095  /* this is a good string */
1096  Answer = TheTrueObject;
1097  }
1098  break;
1099 
1100  case DATATYPE_BINARY: /* Binary string */
1101  /* validate the string */
1102  if (Len == 0 || valSet(Scanp, Len, BINARI, 4, &Count))
1103  {
1104  /* this is a good string */
1105  Answer = TheTrueObject;
1106  }
1107  break;
1108 
1109  case DATATYPE_LOWERCASE: /* Lowercase */
1110  if (Len != 0 && !memcpbrk(Scanp, LOWER_ALPHA, Len))
1111  {
1112  /* this is a good string */
1113  Answer = TheTrueObject;
1114  }
1115  break;
1116 
1117  case DATATYPE_UPPERCASE: /* Uppercase */
1118  if (Len != 0 && !memcpbrk(Scanp, UPPER_ALPHA, Len))
1119  {
1120  /* this is a good string */
1121  Answer = TheTrueObject;
1122  }
1123  break;
1124 
1125  case DATATYPE_MIXEDCASE: /* Mixed case */
1126  if (Len != 0 && !memcpbrk(Scanp, MIXED_ALPHA, Len))
1127  {
1128  /* this is a good string */
1129  Answer = TheTrueObject;
1130  }
1131  break;
1132 
1133  case DATATYPE_WHOLE_NUMBER: /* Whole number */
1134  /* validate as a number */
1135  TempNum = String->numberString();
1136  if (TempNum != OREF_NULL)
1137  { /* valid number? */
1138  /* force rounding to current digits */
1139  TempNum = (RexxNumberString *)TempNum->plus(IntegerZero);
1140  /* check for integer then */
1141  Answer = TempNum->isInteger();
1142  }
1143  break;
1144 
1145  case DATATYPE_NUMBER: /* Number */
1146  /* validate as a number */
1147  Temp = (RexxObject *)String->numberString();
1148  if (Temp != OREF_NULL) /* valid number? */
1149  {
1150  /* got a good one */
1151  Answer = TheTrueObject;
1152  }
1153  break;
1154 
1155  case DATATYPE_9DIGITS: /* NUMERIC DIGITS 9 number */
1156  { /* good long number */
1157  wholenumber_t temp;
1158  if (String->numberValue(temp))
1159  {
1160  Answer = TheTrueObject;
1161  }
1162  break;
1163  }
1164 
1165  case DATATYPE_HEX: /* heXadecimal */
1166  /* validate the string */
1167  if (Len == 0 || valSet(Scanp, Len, HEX_CHAR_STR, 2, &Count))
1168  {
1169  /* valid hexadecimal */
1170  Answer = TheTrueObject;
1171  }
1172  break;
1173 
1174  case DATATYPE_SYMBOL: /* Symbol */
1175  /* validate the symbol */
1176  if (String->isSymbol() != STRING_BAD_VARIABLE)
1177  {
1178  /* is a valid symbol */
1179  Answer = TheTrueObject;
1180  }
1181  break;
1182 
1183  case DATATYPE_VARIABLE: /* Variable */
1184 
1185  /* validate the symbol */
1186  Type = String->isSymbol();
1187  /* a valid variable type? */
1188  if (Type == STRING_NAME ||
1189  Type == STRING_STEM ||
1190  Type == STRING_COMPOUND_NAME)
1191  {
1192  /* is a valid symbol */
1193  Answer = TheTrueObject;
1194  }
1195  break;
1196 
1197  case DATATYPE_LOGICAL: // Test for a valid logical.
1198  if (Len != 1 || (*Scanp != '1' && *Scanp != '0'))
1199  {
1200  Answer = TheFalseObject;
1201  }
1202  else
1203  {
1204  Answer = TheTrueObject;
1205  }
1206 
1207  break;
1208 
1209  default : /* unsupported option */
1210  reportException(Error_Incorrect_method_option, "ABCDLMNOSUVWX9", new_string((const char *)&Option,1));
1211  }
1212  return Answer; /* return validation answer */
1213 }
1214 
1215 
1216 /**
1217  * Skip leading blanks in a string.
1218  *
1219  * @param String The target string.
1220  * @param StringLength
1221  * The length of the string segment.
1222  */
1223 void StringUtil::skipBlanks(const char **String, sizeB_t *StringLength )
1224 {
1225  const char *Scan; /* scan pointer */
1226  sizeB_t Length; /* length to scan */
1227 
1228  Scan = *String; /* point to data */
1229  Length = *StringLength; /* get the length */
1230 
1231  for (;Length != 0; Length--)
1232  { /* scan entire string */
1233  if (*Scan != ' ' && *Scan != '\t') /* if not a space */
1234  {
1235  break; /* just quit the loop */
1236  }
1237  Scan++; /* step to next character */
1238  }
1239  /* fell through, all blanks */
1240  *String = Scan; /* set pointer one past */
1241  *StringLength = Length; /* update the length */
1242 }
1243 
1244 /**
1245  * Skip non-blank characters to the next whitespace char.
1246  *
1247  * @param String The source string.
1248  * @param StringLength
1249  * The string length (update on return);
1250  */
1251 void StringUtil::skipNonBlanks(const char **String, sizeB_t *StringLength )
1252 {
1253  const char *Scan; /* scan pointer */
1254  sizeB_t Length; /* length to scan */
1255 
1256  Scan = *String; /* point to data */
1257  Length = *StringLength; /* get the length */
1258 
1259  for (;Length != 0; Length--)
1260  { /* scan entire string */
1261  if (*Scan == ' ' || *Scan == '\t') /* if not a space */
1262  {
1263  break; /* just quit the loop */
1264  }
1265  Scan++; /* step to next character */
1266  }
1267  /* fell through, all blanks */
1268  *String = Scan; /* set pointer one past */
1269  *StringLength = Length; /* update the length */
1270 }
1271 
1272 
1273 /**
1274  * Count the number of words in a string.
1275  *
1276  * @param String The string to count.
1277  * @param StringLength
1278  * The length of the string.
1279  *
1280  * @return The count of white-space delimited words.
1281  */
1282 size_t StringUtil::wordCount(const char *String, sizeB_t StringLength )
1283 {
1284  size_t Count = 0; /* default to nothing */
1285  if (StringLength != 0)
1286  { /* if not a null string */
1287  skipBlanks(&String, &StringLength);/* skip any leading blanks */
1288 
1289  while (StringLength != 0)
1290  { /* while still string ... */
1291  Count++; /* account for this word */
1292  /* now skip the non-blanks */
1293  skipNonBlanks(&String, &StringLength);
1294  if (StringLength == 0) /* if done with the string */
1295  {
1296  break; /* we are finished */
1297  }
1298  /* skip to the next word */
1299  skipBlanks(&String, &StringLength);
1300  } /* loop while still have chars*/
1301  }
1302  return Count; /* done looping, return the */
1303  /* count of words */
1304 }
1305 
1306 
1307 /**
1308  * Find the next word in the string.
1309  *
1310  * @param String The source string.
1311  * @param StringLength
1312  * The length of the string (update on return).
1313  * @param NextString The next word position.
1314  *
1315  * @return The length of the located word.
1316  */
1317 sizeB_t StringUtil::nextWord(const char **String, sizeB_t *StringLength, const char **NextString )
1318 {
1319  sizeB_t WordStart = 0; /* nothing moved yet */
1320  if (*StringLength != 0)
1321  { /* Something there? */
1322  skipBlanks(String, StringLength); /* skip any leading blanks */
1323 
1324  if (*StringLength != 0)
1325  { /* if still string ... */
1326  WordStart = *StringLength; /* save current length */
1327  *NextString = *String; /* save start position now */
1328  /* skip the non-blanks */
1329  skipNonBlanks(NextString, StringLength);
1330  WordStart -= *StringLength; /* adjust the word length */
1331  }
1332  }
1333  return WordStart; /* return word length */
1334 }
1335 
1336 
1337 /**
1338  * Count the occurences of a string within another string.
1339  *
1340  * @param hayStack Pointer to the haystack data.
1341  * @param hayStackLength
1342  * Length of the haystack data.
1343  * @param needle The needle we're searching for
1344  *
1345  * @return The count of needle occurrences located in the string.
1346  */
1347 size_t StringUtil::countStr(const char *hayStack, sizeB_t hayStackLength, RexxString *needle)
1348 {
1349  size_t count = 0; /* no matches yet */
1350  /* get the first match position */
1351  sizeB_t matchPos = pos(hayStack, hayStackLength, needle, 0, hayStackLength);
1352  while (matchPos != 0)
1353  {
1354  count = count + 1; /* count this match */
1355  // step to the new position and search
1356  matchPos = pos(hayStack, hayStackLength, needle, matchPos + needle->getBLength() - 1, hayStackLength);
1357  }
1358  return count; /* return the match count */
1359 }
1360 
1361 
1362 /**
1363  * Count the occurences of a string within another string.
1364  *
1365  * @param hayStack Pointer to the haystack data.
1366  * @param hayStackLength
1367  * Length of the haystack data.
1368  * @param needle The needle we're searching for
1369  *
1370  * @return The count of needle occurrences located in the string.
1371  */
1372 size_t StringUtil::caselessCountStr(const char *hayStack, sizeB_t hayStackLength, RexxString *needle)
1373 {
1374  size_t count = 0; /* no matches yet */
1375  /* get the first match position */
1376  sizeB_t matchPos = caselessPos(hayStack, hayStackLength, needle, 0, hayStackLength);
1377  while (matchPos != 0)
1378  {
1379  count = count + 1; /* count this match */
1380  // step to the new position and search
1381  matchPos = caselessPos(hayStack, hayStackLength, needle, matchPos + needle->getBLength() - 1, hayStackLength);
1382  }
1383  return count; /* return the match count */
1384 }
1385 
1386 
1388  const char *string, /* search string */
1389  sizeB_t length, /* string length */
1390  char target ) /* target character */
1391 /*********************************************************************/
1392 /* Function: offset of first occurrence of char in string */
1393 /*********************************************************************/
1394 {
1395  /* while in the string */
1396  for (const char *scan = string; length != 0; length--)
1397  {
1398  // if we have a match, return the offset
1399  if (*scan == target)
1400  {
1401  return scan - string;
1402  }
1403  scan++; /* step the position */
1404  }
1405  return -1; // no match position
1406 }
1407 
1408 
1409 /**
1410  * Perform a verify operation on a section of data.
1411  *
1412  * @param data The data pointer
1413  * @param stringLen The length of the string to match
1414  * @param ref The reference search string.
1415  * @param option The match/nomatch option.
1416  * @param _start The starting offset for the match.
1417  *
1418  * @return The match/nomatch position, or 0 if nothing was found.
1419  */
1420 RexxInteger *StringUtil::verify(const char *data, sizeB_t stringLen, RexxString *ref, RexxString *option, RexxInteger *_start, RexxInteger *range)
1421 {
1422  // get the reference string information
1423  ref = stringArgument(ref, OREF_positional, ARG_ONE);
1424  sizeB_t referenceLen = ref->getBLength();
1425  const char *refSet = ref->getStringData();
1426  /* get the option, default 'Nomatch' */
1427  char opt = optionalOptionArgument(option, VERIFY_NOMATCH, ARG_TWO);
1428  // validate the possibilities
1429  if (opt != VERIFY_MATCH && opt != VERIFY_NOMATCH)
1430  {
1431  /* not that either, then its an error*/
1433  }
1434 
1435  /* get starting position */
1436  sizeB_t startPos = optionalPositionArgument(_start, 1, ARG_THREE);
1437  sizeB_t stringRange = optionalLengthArgument(range, size_v(stringLen - startPos + 1), ARG_FOUR);
1438  if (startPos > stringLen) /* beyond end of string? */
1439  {
1440  return IntegerZero; /* couldn't find it */
1441  }
1442  else
1443  {
1444  // adjust the range for seaching
1445  stringRange = Numerics::minVal(stringRange, stringLen - startPos + 1);
1446 
1447  /* point at start position */
1448  const char *current = data + startPos - 1;
1449  if (referenceLen == 0)
1450  { /* if verifying a nullstring */
1451  if (opt == VERIFY_MATCH) /* can't match at all */
1452  {
1453  return IntegerZero; /* so return zero */
1454  }
1455  else
1456  {
1457  return new_integer(startPos);/* non-match at start position */
1458  }
1459  }
1460  else
1461  {
1462  // we're verifying that all characters are members of the reference set, so
1463  // return the first non-matching character
1464  if (opt == VERIFY_NOMATCH)
1465  {
1466  while (stringRange-- != 0)
1467  {
1468  // if no match at this position, return this position
1469  if (!StringUtil::matchCharacter(*current++, refSet, referenceLen))
1470  {
1471  return new_integer(current - data);
1472  }
1473  }
1474  // this is always a non matching situation to get here
1475  return IntegerZero;
1476  }
1477  else
1478  {
1479  while (stringRange-- != 0)
1480  {
1481  // if we have a match at this position, trigger this
1482  if (StringUtil::matchCharacter(*current++, refSet, referenceLen))
1483  {
1484  return new_integer(current - data);
1485  }
1486  }
1487  // this is always a non matching situation to get here
1488  return IntegerZero;
1489  }
1490  }
1491  }
1492 }
1493 
1494 
1495 /**
1496  * Do a subword operation on a buffer of data
1497  *
1498  * @param data The start of the data buffer.
1499  * @param length The length of the buffer
1500  * @param position The starting word position.
1501  * @param plength the count of words to return.
1502  *
1503  * @return The string containing the indicated subwords.
1504  */
1505 RexxString *StringUtil::subWord(const char *data, sizeB_t length, RexxInteger *position, RexxInteger *plength)
1506 {
1507  /* convert position to binary */
1508  size_t wordPos = positionArgument(position, ARG_ONE);
1509  // get num of words to extract. The default is a "very large number
1510  size_t count = optionalLengthArgument(plength, Numerics::MAX_WHOLENUMBER, ARG_TWO);
1511 
1512  // handle cases that will always result in a null string
1513  if (length == 0 || count == 0)
1514  {
1515  return OREF_NULLSTRING;
1516  }
1517  const char *nextSite = NULL;
1518  const char *word = data;
1519  /* get the first word */
1520  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1521  while (--wordPos > 0 && wordLength != 0)
1522  { /* loop until we reach tArget */
1523  word = nextSite; /* copy the start pointer */
1524  /* get the next word */
1525  wordLength = nextWord(&word, &length, &nextSite);
1526  }
1527  // we terminated because there was no word found before we reached the
1528  // count position
1529  if (wordPos != 0)
1530  {
1531  return OREF_NULLSTRING; /* again a null string */
1532  }
1533 
1534  const char *wordStart = word; /* save start position */
1535  const char *wordEnd = word; /* default end is the same */
1536  /* loop until we reach tArget */
1537  while (count-- > 0 && wordLength != 0)
1538  {
1539  wordEnd = word + wordLength; /* point to the word end */
1540  word = nextSite; /* copy the start pointer */
1541  /* get the next word */
1542  wordLength = nextWord(&word, &length, &nextSite);
1543  }
1544  /* extract the substring */
1545  return new_string(wordStart, sizeB_v(wordEnd - wordStart));
1546 }
1547 
1548 
1549 /**
1550  * Do a wordList operation on a buffer of data
1551  *
1552  * @param data The start of the data buffer.
1553  * @param length The length of the buffer
1554  * @param position The starting word position.
1555  * @param plength the count of words to return.
1556  *
1557  * @return The array containing the indicated subwords.
1558  */
1559 RexxArray *StringUtil::subWords(const char *data, sizeB_t length, RexxInteger *position, RexxInteger *plength)
1560 {
1561  /* convert position to binary */
1562  size_t wordPos = optionalPositionArgument(position, 1, ARG_ONE);
1563  // get num of words to extract. The default is a "very large number
1564  size_t count = optionalLengthArgument(plength, Numerics::MAX_WHOLENUMBER, ARG_TWO);
1565 
1566  // handle cases that will always result an empty array
1567  if (length == 0 || count == 0)
1568  {
1569  return new_array((size_t)0);
1570  }
1571 
1572  const char *nextSite = NULL;
1573  const char *word = data;
1574  /* get the first word */
1575  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1576  while (--wordPos > 0 && wordLength != 0)
1577  { /* loop until we reach target */
1578  word = nextSite; /* copy the start pointer */
1579  /* get the next word */
1580  wordLength = nextWord(&word, &length, &nextSite);
1581  }
1582  // we terminated because there was no word found before we reached the
1583  // count position
1584  if (wordPos != 0)
1585  {
1586  return new_array((size_t)0); // again, an empty array
1587  }
1588 
1589  // we make this size zero so the size and the items count will match
1590  RexxArray *result = new_array((size_t)0);
1591  ProtectedObject p(result);
1592 
1593  const char *wordStart = word; /* save start position */
1594  /* loop until we reach tArget */
1595  while (count-- > 0 && wordLength != 0)
1596  {
1597  // add to the result array
1598  result->append(new_string(word, wordLength));
1599  word = nextSite; /* copy the start pointer */
1600  /* get the next word */
1601  wordLength = nextWord(&word, &length, &nextSite);
1602  }
1603 
1604  return result; // return the populated array
1605 }
1606 
1607 
1608 /**
1609  * Extract a word from a buffer
1610  *
1611  * @param data The data pointer
1612  * @param length the length of the data buffer.
1613  * @param position the target word position.
1614  *
1615  * @return The string value of the word at the indicated position.
1616  */
1617 RexxString *StringUtil::word(const char *data, sizeB_t length, RexxInteger *position)
1618 {
1619  /* convert position to binary */
1620  size_t wordPos = positionArgument(position, ARG_ONE);
1621 
1622  if (length == 0) /* null string? */
1623  {
1624  return OREF_NULLSTRING; /* result is null also */
1625  }
1626  const char *word = data; /* point to the string */
1627  const char *nextSite = NULL;
1628  /* get the first word */
1629  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1630  while (--wordPos > 0 && wordLength != 0)
1631  { /* loop until we reach target */
1632  word = nextSite; /* copy the start pointer */
1633  /* get the next word */
1634  wordLength = nextWord(&word, &length, &nextSite);
1635  }
1636  if (wordLength != 0) /* have a word */
1637  {
1638  /* extract the string */
1639  return new_string(word, wordLength);
1640  }
1641  return OREF_NULLSTRING; /* no word, return a null */
1642 }
1643 
1644 
1645 /**
1646  * Extract all words from a buffer
1647  *
1648  * @param data The data pointer
1649  * @param length the length of the data buffer.
1650  * @param position the target word position.
1651  *
1652  * @return The string value of the word at the indicated position.
1653  */
1654 RexxArray *StringUtil::words(const char *data, sizeB_t length)
1655 {
1656  const char *word = data; /* point to the string */
1657  const char *nextSite = NULL;
1658 
1659  RexxArray *result = new_array((size_t)0);
1660  ProtectedObject p(result);
1661  /* get the first word */
1662  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1663  while (wordLength != 0)
1664  {
1665  // add to the result array
1666  result->append(new_string(word, wordLength));
1667  word = nextSite; /* copy the start pointer */
1668  /* get the next word */
1669  wordLength = nextWord(&word, &length, &nextSite);
1670  }
1671  return result; // return whatever we've accumulated
1672 }
1673 
1674 
1675 /**
1676  * Return the index position for a given word
1677  *
1678  * @param data The data containing the words
1679  * @param length The length of the data buffer
1680  * @param position The target word position
1681  *
1682  * @return The offset of the start of the indicated word.
1683  */
1684 RexxInteger *StringUtil::wordIndex(const char *data, sizeB_t length, RexxInteger *position)
1685 {
1686  /* convert count to binary */
1687  size_t wordPos = positionArgument(position, ARG_ONE);
1688  const char *word = data; /* point to word data */
1689  const char *nextSite = NULL;
1690 
1691  /* get the first word */
1692  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1693  while (--wordPos > 0 && wordLength != 0)
1694  { /* loop until we reach target */
1695  word = nextSite; /* copy the start pointer */
1696  /* get the next word */
1697  wordLength = nextWord(&word, &length, &nextSite);
1698  }
1699 
1700  if (wordLength == 0) /* ran out of string */
1701  {
1702  return IntegerZero; /* no index */
1703  }
1704  return new_integer(word - data + 1);
1705 }
1706 
1707 
1708 /**
1709  * Return the length of the word located at a given index.
1710  *
1711  * @param data The data containing the word list.
1712  * @param length The length of the data buffer
1713  * @param position The target word position.
1714  *
1715  * @return The length of the given word at the target index. Returns
1716  * 0 if no word is found.
1717  */
1718 RexxInteger *StringUtil::wordLength(const char *data, sizeB_t length, RexxInteger *position)
1719 {
1720  /* convert count to binary */
1721  size_t wordPos = positionArgument(position , ARG_ONE);
1722  const char *word = data; /* point to word data */
1723  const char *nextSite = NULL;
1724 
1725  /* get the first word */
1726  sizeB_t wordLength = nextWord(&word, &length, &nextSite);
1727  while (--wordPos > 0 && wordLength != 0)
1728  { /* loop until we reach target */
1729  word = nextSite; /* copy the start pointer */
1730  /* get the next word */
1731  wordLength = nextWord(&word, &length, &nextSite);
1732  }
1733  return new_integer(wordLength); /* return the word length */
1734 }
1735 
1736 
1737 /**
1738  * Execute a wordpos search on a buffer of data.
1739  *
1740  * @param data the source data buffer.
1741  * @param length the length of the buffer
1742  * @param phrase the search phrase.
1743  * @param pstart the starting position.
1744  *
1745  * @return the location of the start of the search phrase.
1746  */
1747 RexxInteger *StringUtil::wordPos(const char *data, sizeB_t length, RexxString *phrase, RexxInteger *pstart)
1748 {
1749  phrase = stringArgument(phrase, OREF_positional, ARG_ONE);/* get the phrase we are looking for */
1750  stringsizeB_t needleLength = phrase->getBLength(); /* get the length also */
1751  /* get starting position, the default*/
1752  /* is the first word */
1753  stringsize_t count = optionalPositionArgument(pstart, 1, ARG_TWO);
1754 
1755  const char *needle = phrase->getStringData(); /* get friendly pointer */
1756  const char *haystack = data; /* and the second also */
1757  stringsizeB_t haystackLength = length; /* get the haystack length */
1758  /* count the words in needle */
1759  stringsize_t needleWords = wordCount(needle, needleLength);
1760  /* and haystack */
1761  stringsize_t haystackWords = wordCount(haystack, haystackLength);
1762  /* if search string is longer */
1763  /* or no words in search */
1764  /* or count is longer than */
1765  /* haystack, this is a failure */
1766  if (needleWords > (haystackWords - count + 1) || needleWords == 0 || count > haystackWords)
1767  {
1768  return IntegerZero;
1769  }
1770 
1771  const char *nextHaystack;
1772  const char *nextNeedle;
1773  /* point at first word */
1774  stringsizeB_t haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1775  /* now skip over count-1 */
1776  for (stringsize_t i = count - 1; i && haystackWordLength != 0; i--)
1777  {
1778  haystack = nextHaystack; /* step past current word */
1779  /* find the next word */
1780  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1781  }
1782  /* get number of searches */
1783  stringsize_t searchCount = (haystackWords - needleWords - count) + 2;
1784  /* position at first needle */
1785  stringsizeB_t firstNeedle = nextWord(&needle, &needleLength, &nextNeedle);
1786  /* loop for the possible times */
1787  for (; searchCount; searchCount--)
1788  {
1789  stringsizeB_t needleWordLength = firstNeedle; /* set the length */
1790  const char *needlePosition = needle; /* get the start of phrase */
1791  const char *haystackPosition = haystack; /* and the target string loop */
1792  /* for needlewords */
1793  const char *nextHaystackPtr = nextHaystack; /* copy nextword information */
1794  const char *nextNeedlePtr = nextNeedle;
1795  /* including the lengths */
1796  stringsizeB_t haystackScanLength = haystackLength;
1797  stringsizeB_t needleScanLength = needleLength;
1798 
1799  stringsize_t i;
1800 
1801  for (i = needleWords; i; i--)
1802  {
1803  // length mismatch, can't be a match
1804 
1805  if (haystackWordLength != needleWordLength)
1806  {
1807  break;
1808  }
1809 
1810  // now compare the two words, using a caseless comparison
1811  // if the words don't match, terminate now
1812  if (memcmp(needlePosition, haystackPosition, needleWordLength) != 0)
1813  {
1814  break; /* get out fast. */
1815  }
1816 
1817  /* the last words matched, so */
1818  /* continue searching. */
1819 
1820  /* set new search information */
1821  haystackPosition = nextHaystackPtr;
1822  needlePosition = nextNeedlePtr;
1823  /* Scan off the next word */
1824  haystackWordLength = nextWord(&haystackPosition, &haystackScanLength, &nextHaystackPtr);
1825  /* repeat for the needle */
1826  needleWordLength = nextWord(&needlePosition, &needleScanLength, &nextNeedlePtr);
1827  }
1828 
1829  if (i == 0) /* all words matched, we */
1830  {
1831  return new_integer(count); // return the position
1832  }
1833  haystack = nextHaystack; /* set the search position */
1834  /* step to next haytack pos */
1835  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1836  count++; /* remember the word position */
1837  }
1838 
1839  return IntegerZero; // not found
1840 }
1841 
1842 
1843 /**
1844  * Execute a caseless wordpos search on a buffer of data.
1845  *
1846  * @param data the source data buffer.
1847  * @param length the length of the buffer
1848  * @param phrase the search phrase.
1849  * @param pstart the starting position.
1850  *
1851  * @return the location of the start of the search phrase.
1852  */
1853 RexxInteger *StringUtil::caselessWordPos(const char *data, sizeB_t length, RexxString *phrase, RexxInteger *pstart)
1854 {
1855  phrase = stringArgument(phrase, OREF_positional, ARG_ONE);/* get the phrase we are looking for */
1856  stringsizeB_t needleLength = phrase->getBLength(); /* get the length also */
1857  /* get starting position, the default*/
1858  /* is the first word */
1859  stringsize_t count = optionalPositionArgument(pstart, 1, ARG_TWO);
1860 
1861  const char *needle = phrase->getStringData(); /* get friendly pointer */
1862  const char *haystack = data; /* and the second also */
1863  stringsizeB_t haystackLength = length; /* get the haystack length */
1864  /* count the words in needle */
1865  stringsize_t needleWords = wordCount(needle, needleLength);
1866  /* and haystack */
1867  stringsize_t haystackWords = wordCount(haystack, haystackLength);
1868  /* if search string is longer */
1869  /* or no words in search */
1870  /* or count is longer than */
1871  /* haystack, this is a failure */
1872  if (needleWords > (haystackWords - count + 1) || needleWords == 0 || count > haystackWords)
1873  {
1874  return IntegerZero;
1875  }
1876 
1877  const char *nextHaystack;
1878  const char *nextNeedle;
1879  /* point at first word */
1880  stringsizeB_t haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1881  /* now skip over count-1 */
1882  for (stringsize_t i = count - 1; i && haystackWordLength != 0; i--)
1883  {
1884  haystack = nextHaystack; /* step past current word */
1885  /* find the next word */
1886  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1887  }
1888  /* get number of searches */
1889  stringsize_t searchCount = (haystackWords - needleWords - count) + 2;
1890  /* position at first needle */
1891  stringsizeB_t firstNeedle = nextWord(&needle, &needleLength, &nextNeedle);
1892  /* loop for the possible times */
1893  for (; searchCount; searchCount--)
1894  {
1895  stringsizeB_t needleWordLength = firstNeedle; /* set the length */
1896  const char *needlePosition = needle; /* get the start of phrase */
1897  const char *haystackPosition = haystack; /* and the target string loop */
1898  /* for needlewords */
1899  const char *nextHaystackPtr = nextHaystack; /* copy nextword information */
1900  const char *nextNeedlePtr = nextNeedle;
1901  /* including the lengths */
1902  stringsizeB_t haystackScanLength = haystackLength;
1903  stringsizeB_t needleScanLength = needleLength;
1904 
1905  stringsize_t i;
1906 
1907  for (i = needleWords; i; i--)
1908  {
1909  // length mismatch, can't be a match
1910 
1911  if (haystackWordLength != needleWordLength)
1912  {
1913  break;
1914  }
1915 
1916  // now compare the two words, using a caseless comparison
1917  // if the words don't match, terminate now
1918  if (caselessCompare(needlePosition, haystackPosition, needleWordLength))
1919  {
1920  break; /* get out fast. */
1921  }
1922 
1923  /* the last words matched, so */
1924  /* continue searching. */
1925 
1926  /* set new search information */
1927  haystackPosition = nextHaystackPtr;
1928  needlePosition = nextNeedlePtr;
1929  /* Scan off the next word */
1930  haystackWordLength = nextWord(&haystackPosition, &haystackScanLength, &nextHaystackPtr);
1931  /* repeat for the needle */
1932  needleWordLength = nextWord(&needlePosition, &needleScanLength, &nextNeedlePtr);
1933  }
1934 
1935  if (i == 0) /* all words matched, we */
1936  {
1937  return new_integer(count); // return the position
1938  }
1939  haystack = nextHaystack; /* set the search position */
1940  /* step to next haytack pos */
1941  haystackWordLength = nextWord(&haystack, &haystackLength, &nextHaystack);
1942  count++; /* remember the word position */
1943  }
1944 
1945  return IntegerZero; // not found
1946 }
void reportException(wholenumber_t error)
RexxArray * new_array(size_t s)
Definition: ArrayClass.hpp:259
RexxInteger * new_integer(wholenumber_t v)
#define ch_TAB
RexxQueue * new_queue()
Definition: QueueClass.hpp:89
codepoint_t optionalPadArgument(RexxObject *o, codepoint_t d, size_t p)
Definition: RexxCore.h:382
#define OREF_NULL
Definition: RexxCore.h:60
RexxString * stringArgument(RexxObject *object, RexxString *kind, size_t position)
Definition: RexxCore.h:303
const int ARG_FOUR
Definition: RexxCore.h:83
#define IntegerOne
Definition: RexxCore.h:190
const int ARG_THREE
Definition: RexxCore.h:82
#define TheTrueObject
Definition: RexxCore.h:186
const int ARG_TWO
Definition: RexxCore.h:81
size_t optionalLengthArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:343
char optionalOptionArgument(RexxObject *o, char d, size_t p)
Definition: RexxCore.h:389
size_t optionalPositionArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:363
#define TheFalseObject
Definition: RexxCore.h:185
const int ARG_ONE
Definition: RexxCore.h:80
#define IntegerZero
Definition: RexxCore.h:189
#define Error_Incorrect_method_option
#define Error_Incorrect_method_invbin
#define Error_Incorrect_method_invhex
#define Error_Incorrect_method_binblank
#define Error_Incorrect_method_hexblank
#define DATATYPE_UPPERCASE
Definition: StringClass.hpp:82
#define UPPER_ALPHA
#define DATATYPE_LOWERCASE
Definition: StringClass.hpp:77
#define DATATYPE_BINARY
Definition: StringClass.hpp:76
#define HEX_CHAR_STR
Definition: StringClass.hpp:95
#define STRING_NAME
Definition: StringClass.hpp:60
#define DATATYPE_MIXEDCASE
Definition: StringClass.hpp:78
#define DATATYPE_LOGICAL
Definition: StringClass.hpp:86
RexxString * new_string(const char *s, stringsizeB_t bl, sizeC_t cl=-1)
#define ALPHANUM
Definition: StringClass.hpp:96
#define DATATYPE_NUMBER
Definition: StringClass.hpp:79
#define LOWER_ALPHA
Definition: StringClass.hpp:99
#define MIXED_ALPHA
#define VERIFY_NOMATCH
Definition: StringClass.hpp:89
#define DATATYPE_SYMBOL
Definition: StringClass.hpp:80
#define DATATYPE_9DIGITS
Definition: StringClass.hpp:85
#define STRING_COMPOUND_NAME
Definition: StringClass.hpp:56
#define VERIFY_MATCH
Definition: StringClass.hpp:88
RexxString * raw_string(stringsizeB_t bl, stringsizeC_t cl=-1)
#define DATATYPE_ALPHANUMERIC
Definition: StringClass.hpp:75
#define BINARI
Definition: StringClass.hpp:98
#define STRING_STEM
Definition: StringClass.hpp:55
#define DATATYPE_WHOLE_NUMBER
Definition: StringClass.hpp:83
#define DATATYPE_HEX
Definition: StringClass.hpp:84
#define ch_SPACE
Definition: StringClass.hpp:91
#define DATATYPE_VARIABLE
Definition: StringClass.hpp:81
#define STRING_BAD_VARIABLE
Definition: StringClass.hpp:54
stringsize_t positionArgument(RexxObject *argument, size_t position)
size_t RexxEntry StringLength(RexxThreadContext *c, RexxStringObject s)
static const wholenumber_t MAX_WHOLENUMBER
Definition: Numerics.hpp:62
static wholenumber_t minVal(wholenumber_t n1, wholenumber_t n2)
Definition: Numerics.hpp:116
void put(RexxObject *eref, size_t pos)
Definition: ArrayClass.cpp:208
size_t append(RexxObject *)
Definition: ArrayClass.cpp:485
RexxArray * makeArray()
Definition: ListClass.cpp:873
RexxObject * isInteger()
RexxNumberString * plus(RexxObject *)
void queue(RexxObject *obj)
Definition: QueueClass.hpp:82
bool numberValue(wholenumber_t &result, size_t precision)
const char * getStringData()
RexxNumberString * numberString()
char * getWritableData()
void set(sizeB_t s, int c, sizeB_t l)
void put(sizeB_t s, const void *b, sizeB_t l)
sizeB_t getBLength()
static void unpackNibble(int Val, char *p)
Definition: StringUtil.cpp:937
static RexxInteger * posRexx(const char *stringData, sizeB_t length, RexxString *needle, RexxInteger *pstart, RexxInteger *range)
Definition: StringUtil.cpp:130
static RexxString * packHex(const char *String, sizeB_t StringLength)
Definition: StringUtil.cpp:875
static RexxInteger * wordIndex(const char *data, sizeB_t length, RexxInteger *position)
static size_t memPos(const char *string, sizeB_t length, char target)
static const char * memcpbrk(const char *String, const char *Set, sizeB_t Length)
Definition: StringUtil.cpp:955
static char packByte2(const char *Byte)
Definition: StringUtil.cpp:693
static sizeB_t caselessLastPos(const char *stringData, sizeB_t haystackLen, RexxString *needle, sizeB_t _start, sizeB_t range)
Definition: StringUtil.cpp:360
static void skipNonBlanks(const char **String, sizeB_t *StringLength)
static RexxString * substr(const char *, sizeB_t, RexxInteger *, RexxInteger *, RexxString *)
Definition: StringUtil.cpp:66
static RexxObject * dataType(RexxString *String, char Option)
static const char * locateSeparator(const char *start, const char *end, const char *sepData, sizeB_t sepLength)
Definition: StringUtil.cpp:467
static size_t caselessCountStr(const char *hayStack, sizeB_t hayStackLength, RexxString *needle)
static char packNibble(const char *String)
Definition: StringUtil.cpp:668
static RexxString * word(const char *data, sizeB_t length, RexxInteger *position)
static void skipBlanks(const char **String, sizeB_t *StringLength)
static RexxInteger * wordPos(const char *data, sizeB_t length, RexxString *phrase, RexxInteger *pstart)
static sizeB_t lastPos(const char *stringData, sizeB_t hastackLen, RexxString *needle, sizeB_t _start, sizeB_t _range)
Definition: StringUtil.cpp:278
static bool matchCharacter(char ch, const char *charSet, sizeB_t len)
Definition: StringUtil.hpp:95
static RexxInteger * verify(const char *data, sizeB_t stringLen, RexxString *ref, RexxString *option, RexxInteger *_start, RexxInteger *range)
static RexxInteger * wordLength(const char *data, sizeB_t length, RexxInteger *position)
static sizeB_t chGetSm(char *Destination, const char *Source, sizeB_t Length, sizeB_t Count, const char *Set, sizeB_t *ScannedSize)
Definition: StringUtil.cpp:835
static RexxInteger * caselessWordPos(const char *data, sizeB_t length, RexxString *phrase, RexxInteger *pstart)
static RexxArray * subWords(const char *data, sizeB_t length, RexxInteger *position, RexxInteger *plength)
static size_t countStr(const char *hayStack, sizeB_t hayStackLength, RexxString *needle)
static int valSet(const char *String, sizeB_t Length, const char *Set, int Modulus, size_t *PackedSize)
Definition: StringUtil.cpp:993
static int hexDigitToInt(char ch)
Definition: StringUtil.cpp:618
static size_t validateSet(const char *String, sizeB_t Length, const char *Set, int Modulus, bool Hex)
Definition: StringUtil.cpp:726
static RexxArray * words(const char *data, sizeB_t length)
static char packByte(const char *String)
Definition: StringUtil.cpp:644
static sizeB_t pos(const char *stringData, sizeB_t haystack_length, RexxString *needle, sizeB_t _start, sizeB_t _range)
Definition: StringUtil.cpp:155
static RexxString * subWord(const char *data, sizeB_t length, RexxInteger *position, RexxInteger *plength)
static size_t wordCount(const char *String, sizeB_t StringLength)
static sizeB_t nextWord(const char **String, sizeB_t *StringLength, const char **NextString)
static RexxInteger * lastPosRexx(const char *stringData, sizeB_t haystackLen, RexxString *needle, RexxInteger *_start, RexxInteger *_range)
Definition: StringUtil.cpp:255
static sizeB_t caselessPos(const char *stringData, sizeB_t haystack_length, RexxString *needle, sizeB_t _start, sizeB_t _range)
Definition: StringUtil.cpp:205
static int caselessCompare(const char *, const char *, sizeB_t)
Definition: StringUtil.cpp:580
static RexxArray * makearray(const char *start, sizeB_t length, RexxString *separator)
Definition: StringUtil.cpp:493
static RexxString * subchar(const char *stringData, sizeB_t stringLength, RexxInteger *positionArg)
Definition: StringUtil.cpp:442
stringsize_t stringsizeB_t
Definition: rexx.h:247
stringsizeC_t sizeC_t
Definition: rexx.h:242
ssize_t codepoint_t
Definition: rexx.h:232
#define sizeB_v(X)
Definition: rexx.h:250
ssize_t wholenumber_t
Definition: rexx.h:230
#define size_v(X)
Definition: rexx.h:237
stringsizeB_t sizeB_t
Definition: rexx.h:248
size_t stringsize_t
Definition: rexx.h:228