MutableBufferClass.cpp
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* */
3 /* Copyright (c) 1995, 2004 IBM Corporation. All rights reserved. */
4 /* Copyright (c) 2005-2009 Rexx Language Association. All rights reserved. */
5 /* */
6 /* This program and the accompanying materials are made available under */
7 /* the terms of the Common Public License v1.0 which accompanies this */
8 /* distribution. A copy is also available at the following address: */
9 /* http://www.oorexx.org/license.html */
10 /* */
11 /* Redistribution and use in source and binary forms, with or */
12 /* without modification, are permitted provided that the following */
13 /* conditions are met: */
14 /* */
15 /* Redistributions of source code must retain the above copyright */
16 /* notice, this list of conditions and the following disclaimer. */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in */
19 /* the documentation and/or other materials provided with the distribution. */
20 /* */
21 /* Neither the name of Rexx Language Association nor the names */
22 /* of its contributors may be used to endorse or promote products */
23 /* derived from this software without specific prior written permission. */
24 /* */
25 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
26 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
27 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS */
28 /* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
29 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
30 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
31 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, */
32 /* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
33 /* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
34 /* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
35 /* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* */
37 /*----------------------------------------------------------------------------*/
38 /******************************************************************************/
39 /* REXX Kernel */
40 /* */
41 /* Primitive MutableBuffer Class */
42 /* */
43 /******************************************************************************/
44 #include <ctype.h>
45 #include <stdlib.h>
46 #include <string.h>
47 
48 #include "RexxCore.h"
49 #include "StringClass.hpp"
50 #include "MutableBufferClass.hpp"
51 #include "ProtectedObject.hpp"
52 #include "StringUtil.hpp"
53 
54 
55 // singleton class instance
57 
58 
59 
60 /**
61  * Create initial class object at bootstrap time.
62  */
64 {
65  CLASS_CREATE(MutableBuffer, "MutableBuffer", RexxClass);
66 }
67 
68 
69 #define DEFAULT_BUFFER_LENGTH 256
70 
71 // in behaviour
72 RexxMutableBuffer *RexxMutableBufferClass::newRexx(RexxObject **args, size_t argc, size_t named_argc)
73 /******************************************************************************/
74 /* Function: Allocate (and initialize) a string object */
75 /******************************************************************************/
76 {
77  // this method is defined on the object class, but this is actually attached
78  // to a class object instance. Therefore, any use of the this pointer
79  // will be touching the wrong data. Use the classThis pointer for calling
80  // any methods on this object from this method.
81  RexxClass *classThis = (RexxClass *)this;
82  classThis->checkAbstract(); // ooRexx5
83 
84  RexxString *string;
85  RexxMutableBuffer *newBuffer; /* new mutable buffer object */
86  size_t bufferLength = DEFAULT_BUFFER_LENGTH;
87  size_t defaultSize;
88  if (argc >= 1)
89  {
90  if (args[0] != NULL)
91  {
92  /* force argument to string value */
93  string = stringArgument(args[0], OREF_positional, ARG_ONE);
94  }
95  else
96  {
97  string = OREF_NULLSTRING; /* default to empty content */
98  }
99  }
100  else /* minimum buffer size given? */
101  {
102  string = OREF_NULLSTRING;
103  }
104  ProtectedObject p_string(string);
105 
106  if (argc >= 2)
107  {
108  bufferLength = optionalLengthArgument(args[1], DEFAULT_BUFFER_LENGTH, ARG_TWO);
109  }
110 
111  defaultSize = bufferLength; /* remember initial default size */
112 
113  /* input string longer than demanded */
114  /* minimum size? expand accordingly */
115  if (string->getLength() > bufferLength)
116  {
117  bufferLength = string->getLength();
118  }
119  /* allocate the new object */
120  newBuffer = new (classThis) RexxMutableBuffer(bufferLength, defaultSize);
121  newBuffer->setLength(string->getLength());
122  /* copy the content */
123  newBuffer->copyData(0, string->getStringData(), string->getLength());
124 
125  ProtectedObject p_newBuffer(newBuffer);
126 
127  // Next line is non-sense. What is this manipulation of argc without adjusting args?
128  newBuffer->sendMessage(OREF_INIT, args, argc > 2 ? argc - 2 : 0, named_argc);
129 
130  // Anyway I need to pass args[0] to init, so it's out of question to drop some args!
131  // If I fix it by passing argc unchanged, I get another problem:
132  // Object::initRexx takes ZERO arguments and will raise an error if argc > 0.
133  // So no choice: I must send a message that is currently not supported.
134  // I keep the previous sendMessage "INIT" to remain aligned with official ooRexx (yes, same bug).
135  // For sending the new message, I use the technique used for the alternative operators:
136  // Try to send the message and don't complain if not understood.
137  ProtectedObject result;
138  newBuffer->messageSend(OREF_INIT_EXTENDED, args, argc, named_argc, result, /*processUnknown*/ false);
139 
140  return newBuffer;
141 }
142 
143 
144 /**
145  * Default constructor.
146  */
148 {
149  bufferLength = DEFAULT_BUFFER_LENGTH; /* save the length of the buffer */
150  defaultSize = bufferLength; /* store the default buffer size */
151  // NB: we clear this before we allocate the new buffer because allocating the
152  // new buffer might trigger a garbage collection, causing us to mark bogus
153  // reference.
154  data = OREF_NULL;
156  data->setDataLength(0); // strange to have dataLength equal to bufferSize by default... I assign 0 instead.
157  dataLength = 0;
158 }
159 
160 
161 /**
162  * Constructor with explicitly set size and default.
163  *
164  * @param l Initial length.
165  * @param d The explicit default size.
166  */
168 {
169  bufferLength = l; /* save the length of the buffer */
170  defaultSize = d; /* store the default buffer size */
171  // NB: As in the default constructor, we clear this before we allocate the
172  // new buffer in case garbage collection is triggered.
173  data = OREF_NULL;
175  this->setLength(0);
176 }
177 
178 
179 /**
180  * Create a new mutable buffer object from a potential subclass.
181  *
182  * @param size The size of the buffer object.
183  *
184  * @return A new instance of a mutable buffer, with the default class
185  * behaviour.
186  */
187 void *RexxMutableBuffer::operator new(size_t size)
188 {
189  return new_object(size, T_MutableBuffer);
190 }
191 
192 /**
193  * Create a new mutable buffer object from a potential subclass.
194  *
195  * @param size The size of the buffer object.
196  * @param bufferClass
197  * The class of the buffer object.
198  *
199  * @return A new instance of a mutable buffer, with the target class
200  * behaviour.
201  */
202 void *RexxMutableBuffer::operator new(size_t size, RexxClass *bufferClass)
203 {
204  RexxObject * newObj = new_object(size, T_MutableBuffer);
205  newObj->setBehaviour(bufferClass->getInstanceBehaviour());
206  return newObj;
207 }
208 
209 
210 void RexxMutableBuffer::live(size_t liveMark)
211 /******************************************************************************/
212 /* Function: Normal garbage collection live marking */
213 /******************************************************************************/
214 {
215  memory_mark(this->objectVariables);
216  memory_mark(this->data);
217 }
218 
220 /******************************************************************************/
221 /* Function: Generalized object marking */
222 /******************************************************************************/
223 {
224  memory_mark_general(this->objectVariables);
225  memory_mark_general(this->data);
226 }
227 
228 
230 /******************************************************************************/
231 /* Function: Flatten a mutable buffer */
232 /******************************************************************************/
233 {
235 
236  flatten_reference(newThis->data, envelope);
237  flatten_reference(newThis->objectVariables, envelope);
238 
240 }
241 
243 /******************************************************************************/
244 /* Function: copy an object */
245 /******************************************************************************/
246 {
247 
248  RexxMutableBuffer *newObj = (RexxMutableBuffer *)this->clone();
249 
250  /* see the comments in ::newRexx()!! */
251  newObj->data = new_buffer(bufferLength);
252  newObj->setLength(this->dataLength);
253  newObj->copyData(0, data->getData(), bufferLength);
254 
255  newObj->defaultSize = this->defaultSize;
256  newObj->bufferLength = this->bufferLength;
257 
258  return newObj;
259 }
260 
261 void RexxMutableBuffer::ensureCapacity(size_t addedLength)
262 /******************************************************************************/
263 /* Function: append to the mutable buffer */
264 /******************************************************************************/
265 {
266  size_t resultLength = this->dataLength + addedLength;
267 
268  if (resultLength > bufferLength)
269  { /* need to enlarge? */
270  bufferLength *= 2; /* double the buffer */
271  if (bufferLength < resultLength)
272  { /* still too small? use new length */
273  bufferLength = resultLength;
274  }
275 
276  RexxBuffer *newBuffer = new_buffer(bufferLength);
277  // copy the data into the new buffer
278  newBuffer->copyData(0, data->getData(), dataLength);
279  newBuffer->setDataLength(data->getDataLength());
280  // replace the old data buffer
281  OrefSet(this, this->data, newBuffer);
282  }
283 }
284 
285 
286 /**
287  * Set the length of the data in the buffer. The limit is
288  * the current capacity of the buffer. If the length is
289  * extended beyond the current length, the extra characters
290  * of the buffer will be filled with nulls.
291  *
292  * @param newLength The new datalength. This is capped to the capacity of
293  * the buffer.
294  *
295  * @return The actual length the data has been set to. If the
296  * target length is greater than the capacity, the capacity
297  * value is returned.
298  */
299 size_t RexxMutableBuffer::setDataLength(size_t newLength)
300 {
301  // cap the data length at the capacity
302  size_t capacity = this->getCapacity();
303  if (newLength > capacity)
304  {
305  newLength = capacity;
306  }
307 
308  size_t oldLength = this->getLength();
309  // set the new buffer length
310  dataLength = newLength;
311  // do we need to pad?
312  if (newLength > oldLength)
313  {
314  this->setData(oldLength, '\0', newLength - oldLength);
315  }
316  else
317  {
318  // The buffer has been truncated
319  // If the buffer before truncation was not ASCII, maybe the shorter buffer is ASCII
320  if (!this->isASCII()) this->setIsASCIIChecked(false); // check again
321  }
322 
323  return newLength;
324 }
325 
326 /**
327  * Set the capacity of the buffer.
328  *
329  * @param newLength The new buffer length
330  *
331  * @return The pointer to the data area in the buffer.
332  */
333 char *RexxMutableBuffer::setCapacity(size_t newLength)
334 {
335  // if the new length is longer than our current,
336  // extend by the delta
337  if (newLength > bufferLength)
338  {
339  ensureCapacity(newLength - bufferLength);
340  }
341  // return a pointer to the current buffer data
342  return getData();
343 }
344 
345 
346 /**
347  * Return the length of the data in the buffer currently.
348  *
349  * @return The current length, as an Integer object.
350  */
351 // in behaviour
353 {
354  return new_integer(getLength());
355 }
356 
357 
359 {
360  if (this->isASCIIChecked()) return this->isASCII();
361  bool isASCII = StringUtil::checkIsASCII(this->getStringData(), this->getLength());
362  this->setIsASCII(isASCII);
363  this->setIsASCIIChecked(true);
364  return isASCII;
365 }
366 
367 // In behaviour
369 {
370  return this->checkIsASCII() ? TheTrueObject : TheFalseObject;
371 }
372 
373 
374 // in behaviour
376 /******************************************************************************/
377 /* Function: append to the mutable buffer */
378 /******************************************************************************/
379 {
380  RexxString *string = stringArgument(obj, OREF_positional, ARG_ONE);
381  ProtectedObject p(string);
382  // make sure we have enough room
383  ensureCapacity(string->getLength());
384 
385  copyData(dataLength, string->getStringData(), string->getLength());
386  this->setLength(this->dataLength + string->getLength());
387 
388  if (this->isASCII())
389  {
390  if (!string->checkIsASCII()) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
391  }
392  return this;
393 }
394 
395 
396 RexxMutableBuffer *RexxMutableBuffer::appendCstring(const char *_data, size_t blength)
397 /******************************************************************************/
398 /* Function: append to the mutable buffer */
399 /******************************************************************************/
400 {
401  // make sure we have enough room
402  ensureCapacity(blength);
403 
404  this->data->copyData(dataLength, _data, blength);
405  this->setLength(this->dataLength + blength);
406 
407  if (this->isASCII())
408  {
409  bool _dataIsASCII = StringUtil::checkIsASCII(_data, blength);
410  if (!_dataIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
411  }
412  return this;
413 }
414 
415 
416 // in behaviour
418 /******************************************************************************/
419 /* Function: insert string at given position */
420 /******************************************************************************/
421 {
422  // force this into string form
423  RexxString * string = stringArgument(str, OREF_positional, ARG_ONE);
424  ProtectedObject p(string);
425 
426  // we're using optional length because 0 is valid for insert.
427  size_t begin = optionalNonNegative(pos, 0, OREF_positional, ARG_TWO);
428  size_t insertLength = optionalLengthArgument(len, string->getLength(), ARG_THREE);
429 
430  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_FOUR);
431  bool padInserted = false;
432  bool padIsASCII = ((padChar & 0x80) == 0);
433 
434  size_t copyLength = Numerics::minVal(insertLength, string->getLength());
435  size_t padLength = insertLength - copyLength;
436 
437 
438  // if inserting within the current bounds, we only need to add the length
439  // if inserting beyond the end, we need to make sure we add space for the gap too
440  if (begin < dataLength)
441  {
442  // if inserting a zero length string, this is simple!
443  if (insertLength == 0)
444  {
445  return this; /* do nothing */
446  }
447  ensureCapacity(insertLength);
448  }
449  else
450  {
451  ensureCapacity(insertLength + (begin - dataLength));
452  }
453 
454  /* create space in the buffer */
455  if (begin < dataLength)
456  {
457  openGap(begin, insertLength, dataLength - begin);
458  }
459  else if (begin > this->dataLength)
460  {
461  /* pad before insertion */
462  setData(dataLength, padChar, begin - dataLength);
463  padInserted = true;
464  }
465  /* insert string contents */
466  copyData(begin, string->getStringData(), copyLength);
467  // do we need data padding?
468  if (padLength > 0)
469  {
470  setData(begin + string->getLength(), padChar, padLength);
471  padInserted = true;
472  }
473  // inserting after the end? the resulting length is measured from the insertion point
474  if (begin > this->dataLength)
475  {
476  this->setLength(begin + insertLength);
477  }
478  else
479  {
480  // just add in the inserted length
481  this->setLength(this->dataLength + insertLength);
482  }
483 
484  if (this->isASCII())
485  {
486  if (padInserted)
487  {
488  if (!padIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
489  }
490  }
491 
492  if (this->isASCII())
493  {
494  bool stringIsASCII = string->checkIsASCII();
495  if (!stringIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
496  }
497 
498  return this;
499 }
500 
501 
502 // in behaviour
504 /******************************************************************************/
505 /* Function: replace characters in buffer contents */
506 /******************************************************************************/
507 {
508  RexxString *string = stringArgument(str, OREF_positional, ARG_ONE);
509  ProtectedObject p(string);
510  size_t begin = optionalPositionArgument(pos, 1, ARG_TWO) - 1;
511  size_t replaceLength = optionalLengthArgument(len, string->getLength(), ARG_THREE);
512 
513  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_FOUR);
514  bool padInserted = false;
515  bool padIsASCII = ((padChar & 0x80) == 0);
516 
517  // make sure we have room for this
518  ensureCapacity(begin + replaceLength);
519 
520  // is our start position beyond the current data end?
521  if (begin > dataLength)
522  {
523  // add padding to the gap
524  setData(dataLength, padChar, begin - dataLength);
525  padInserted = true;
526  }
527 
528  // now overlay the string data
529  copyData(begin, string->getStringData(), Numerics::minVal(replaceLength, string->getLength()));
530  // do we need additional padding?
531  if (replaceLength > string->getLength())
532  {
533  // pad the section after the overlay
534  setData(begin + string->getLength(), padChar, replaceLength - string->getLength());
535  padInserted = true;
536  }
537 
538  // did this add to the size?
539  if (begin + replaceLength > dataLength)
540  {
541  //adjust upward
542  this->setLength(begin + replaceLength);
543  }
544 
545  if (this->isASCII())
546  {
547  if (padInserted)
548  {
549  if (!padIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
550  }
551  }
552 
553  if (this->isASCII())
554  {
555  bool stringIsASCII = string->checkIsASCII();
556  if (!stringIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
557  }
558 
559  return this;
560 }
561 
562 
563 /**
564  * Replace a target substring within a string with
565  * a new string value. This is similar overlay, but
566  * replacing might cause the characters following the
567  * replacement position to be shifted to the left or
568  * right.
569  *
570  * @param str The replacement string.
571  * @param pos The target position (required).
572  * @param len The target length (optional). If not specified, the
573  * length of the replacement string is used, and this
574  * is essentially an overlay operation.
575  * @param pad A padding character if padding is required. The default
576  * pad is a ' '. Padding only occurs if the replacement
577  * position is beyond the current data length.
578  *
579  * @return The target mutablebuffer object.
580  */
581 // in behaviour
583 {
584  RexxString *string = stringArgument(str, OREF_positional, ARG_ONE);
585  ProtectedObject p(string);
586  size_t begin = positionArgument(pos, ARG_TWO) - 1;
587  size_t newLength = string->getLength();
588  size_t replaceLength = optionalLengthArgument(len, newLength, ARG_THREE);
589 
590  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_FOUR);
591  bool padInserted = false;
592  bool padIsASCII = ((padChar & 0x80) == 0);
593  size_t finalLength;
594 
595  // if replaceLength extends beyond the end of the string
596  // then we cut it.
597  if (begin > dataLength)
598  {
599  replaceLength = 0;
600  }
601  else if (begin + replaceLength > dataLength)
602  {
603  replaceLength = dataLength - begin;
604  }
605 
606  // We need to add the delta between the excised string and the inserted
607  // replacement string.
608  //
609  // If this extends beyond the end of the string, then we require space for
610  // the position + the replacement string length. Else we find the required
611  // size (may be smaller than before)
612  if (begin > dataLength)
613  {
614  finalLength = begin - replaceLength + newLength;
615  }
616  else
617  {
618  finalLength = dataLength - replaceLength + newLength;
619  }
620 
621  // make sure we have room for this
622  ensureCapacity(finalLength);
623 
624  // is our start position beyond the current data end?
625  // NB: Even though we've adjusted the buffer size, the dataLength is still
626  // the original entry length.
627  if (begin > dataLength)
628  {
629  // add padding to the gap
630  setData(dataLength, padChar, begin - dataLength);
631  padInserted = true;
632  // now overlay the string data
633  copyData(begin, string->getStringData(), newLength);
634  }
635  else
636  {
637  // if the strings are of different lengths, we need to adjust the size
638  // of the gap we're copying into. Only adjust if there is a real gap
639  if (replaceLength != newLength && begin + replaceLength < dataLength)
640  {
641  // snip out the original string
642  adjustGap(begin, replaceLength, newLength);
643  }
644  // now overlay the string data
645  copyData(begin, string->getStringData(), newLength);
646  }
647 
648  // and finally adjust the length
649  this->setLength(finalLength);
650  // our return value is always the target mutable buffer
651 
652  if (this->isASCII())
653  {
654  if (padInserted)
655  {
656  if (!padIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
657  }
658  }
659 
660  if (this->isASCII())
661  {
662  bool stringIsASCII = string->checkIsASCII();
663  if (!stringIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
664  }
665 
666  return this;
667 }
668 
669 
670 // in behaviour
672 /******************************************************************************/
673 /* Function: delete character range in buffer */
674 /******************************************************************************/
675 {
676  size_t begin = positionArgument(_start, ARG_ONE) - 1;
677  size_t range = optionalLengthArgument(len, /*this->data->getDataLength()*/ this->dataLength - begin, ARG_TWO);
678 
679  // is the begin point actually within the string?
680  if (begin < dataLength)
681  { /* got some work to do? */
682  // deleting from the middle?
683  if (begin + range < dataLength)
684  {
685  // shift everything over
686  closeGap(begin, range, dataLength - (begin + range));
687  this->setLength(dataLength - range);
688  }
689  else
690  {
691  // we're just truncating
692  this->setLength(begin);
693  }
694  }
695 
696  // The buffer has been truncated
697  // If the buffer before truncation was not ASCII, maybe the shorter buffer is ASCII
698  if (!this->isASCII()) this->setIsASCIIChecked(false); // check again
699 
700  return this;
701 }
702 
703 
705 /******************************************************************************/
706 /* Function: set the size of the buffer */
707 /******************************************************************************/
708 {
709  bool truncated = (newsize < dataLength);
710  // has a reset to zero been requested?
711  if (newsize == 0)
712  {
713  // have we increased the buffer size?
715  {
716  // reallocate the buffer
717  OrefSet(this, this->data, new_buffer(defaultSize));
718  // reset the size to the default
720  }
721  this->setLength(0);
722  }
723  // an actual resize?
724  else if (newsize != bufferLength)
725  {
726  // reallocate the buffer
727  RexxBuffer *newBuffer = new_buffer(newsize);
728  // if we're shrinking this, it truncates.
729  this->setLength(Numerics::minVal(dataLength, newsize));
730  newBuffer->copyData(0, data->getData(), dataLength);
731  // replace the old buffer
732  OrefSet(this, this->data, newBuffer);
733  // and update the size....
734  bufferLength = newsize;
735  }
736  if (truncated)
737  {
738  // If the buffer before truncation was not ASCII, maybe the shorter buffer is ASCII
739  if (!this->isASCII()) this->setIsASCIIChecked(false); // check again
740  }
741  return this;
742 }
743 
744 
745 // in behaviour
747 /******************************************************************************/
748 /* Function: set the size of the buffer */
749 /******************************************************************************/
750 {
751  size_t newsize = lengthArgument(size, ARG_ONE);
752  return this->setBufferLength(newsize);
753 }
754 
755 
757 /******************************************************************************/
758 /* Function: Handle a REQUEST('STRING') request for a mutablebuffer object */
759 /******************************************************************************/
760 {
761  return new_string(this->data->getData(), this->dataLength);
762 }
763 
764 /**
765  * Baseclass optimization for handling request array calls.
766  *
767  * @return The string object converted to an array using default arguments.
768  */
770 {
771  // forward to the Rexx version with default arguments
772  return this->makeArrayRexx(OREF_NULL);
773 }
774 
775 /**
776  * Handle the primitive class makeString optimization. This
777  * is required because MutableBuffer implements a
778  * STRING method.
779  *
780  * @return The string value of the buffer
781  */
783 {
784  // go straight to the string handler
785  return this->makeString();
786 }
787 
788 
789 /******************************************************************************/
790 /* Arguments: String position for substr */
791 /* requested length of new string */
792 /* pad character to use, if necessary */
793 /* */
794 /* Returned: string, sub string of original. */
795 /******************************************************************************/
796 // in behaviour
798  RexxInteger *arglength,
799  RexxString *pad)
800 {
801  return StringUtil::substr(getStringData(), getLength(), argposition, arglength, pad);
802 }
803 
804 
805 /**
806  * Perform a search for a string within the buffer.
807  *
808  * @param needle The search needle.
809  * @param pstart the starting position.
810  *
811  * @return The index of the located string. Returns 0 if no matches
812  * are found.
813  */
814 // in behaviour
816 {
817  return StringUtil::posRexx(getStringData(), getLength(), needle, pstart, range);
818 }
819 
820 
821 /**
822  * Perform a search for the last position of a string within the
823  * buffer.
824  *
825  * @param needle The search needle.
826  * @param pstart the starting position.
827  *
828  * @return The index of the located string. Returns 0 if no matches
829  * are found.
830  */
831 // in behaviour
833 {
834  return StringUtil::lastPosRexx(getStringData(), getLength(), needle, _start, _range);
835 }
836 
837 
838 /**
839  * Perform a caseless search for a string within the buffer.
840  *
841  * @param needle The search needle.
842  * @param pstart the starting position.
843  *
844  * @return The index of the located string. Returns 0 if no matches
845  * are found.
846  */
847 // in behaviour
849 {
850  /* force needle to a string */
851  needle = stringArgument(needle, OREF_positional, ARG_ONE);
852  ProtectedObject p(needle);
853  /* get the starting position */
854  size_t _start = optionalPositionArgument(pstart, 1, ARG_TWO);
855  size_t _range = optionalLengthArgument(range, getLength() - _start + 1, ARG_THREE);
856  /* pass on to the primitive function */
857  /* and return as an integer object */
858  size_t result = StringUtil::caselessPos(getStringData(), getLength(), needle , _start - 1, _range);
859  return new_integer(result);
860 }
861 
862 
863 /**
864  * Perform a caseless search for the last position of a string
865  * within the buffer.
866  *
867  * @param needle The search needle.
868  * @param pstart the starting position.
869  *
870  * @return The index of the located string. Returns 0 if no matches
871  * are found.
872  */
873 // in behaviour
875 {
876  /* force needle to a string */
877  needle = stringArgument(needle, OREF_positional, ARG_ONE);
878  ProtectedObject p(needle);
879  /* get the starting position */
880  size_t _start = optionalPositionArgument(pstart, getLength(), ARG_TWO);
881  size_t _range = optionalLengthArgument(range, getLength(), ARG_THREE);
882  /* pass on to the primitive function */
883  /* and return as an integer object */
884  size_t result = StringUtil::caselessLastPos(getStringData(), getLength(), needle , _start, _range);
885  return new_integer(result);
886 }
887 
888 
889 /**
890  * Extract a single character from a string object.
891  * Returns a null string if the specified position is
892  * beyond the bounds of the string.
893  *
894  * @param positionArg
895  * The position of the target character. Must be a positive
896  * whole number.
897  *
898  * @return Returns the single character at the target position.
899  * Returns a null string if the position is beyond the end
900  * of the string.
901  */
902 // in behaviour
904 {
905  return StringUtil::subchar(getStringData(), getLength(), positionArg);
906 }
907 
908 
909 // in behaviour
911 /******************************************************************************/
912 /* Function: Split string into an array */
913 /******************************************************************************/
914 {
916 }
917 
918 
919 // in behaviour
921 /******************************************************************************/
922 /* Function: Count occurrences of one string in another. */
923 /******************************************************************************/
924 {
925  /* force needle to a string */
926  needle = stringArgument(needle, OREF_positional, ARG_ONE);
927  ProtectedObject p(needle);
928  // delegate the counting to the string util
930 }
931 
932 // in behaviour
934 /******************************************************************************/
935 /* Function: Count occurrences of one string in another. */
936 /******************************************************************************/
937 {
938  /* force needle to a string */
939  needle = stringArgument(needle, OREF_positional, ARG_ONE);
940  ProtectedObject p(needle);
941  // delegate the counting to the string util
943 }
944 
945 /**
946  * Do an inplace changeStr operation on a mutablebuffer.
947  *
948  * @param needle The search needle.
949  * @param newNeedle The replacement string.
950  * @param countArg The number of occurrences to replace.
951  *
952  * @return The target MutableBuffer
953  */
954 // in behaviour
956 {
957  /* force needle to a string */
958  needle = stringArgument(needle, OREF_positional, ARG_ONE);
959  ProtectedObject p1(needle);
960  /* newneedle must be a string two */
961  newNeedle = stringArgument(newNeedle, OREF_positional, ARG_TWO);
962  ProtectedObject p2(newNeedle);
963 
964  // we'll only change up to a specified count. If not there, we do everything.
965  size_t count = optionalPositive(countArg, Numerics::MAX_WHOLENUMBER, OREF_positional, ARG_THREE);
966  // find the number of matches in the string
967  size_t matches = StringUtil::countStr(getStringData(), getLength(), needle);
968  if (matches > count) // the matches are bounded by the count
969  {
970  matches = count;
971  }
972  // no matches is easy!
973  if (matches == 0)
974  {
975  return this;
976  }
977  size_t needleLength = needle->getLength(); /* get the length of the needle */
978  size_t newLength = newNeedle->getLength(); /* and the replacement length */
979  // calculate the final length and make sure we have enough space
980  size_t resultLength = this->getLength() - (matches * needleLength) + (matches * newLength);
981  ensureCapacity(resultLength);
982 
983  // an inplace update has complications, depending on whether the new string is shorter,
984  // the same length, or longer
985 
986  // simplest case...same length strings. We can just overlay the existing occurrences
987  if (needleLength == newLength)
988  {
989  const char *source = getStringData();
990  size_t sourceLength = getLength();
991  size_t _start = 0; /* set a zero starting point */
992  for (size_t i = 0; i < matches; i++)
993  {
994  // search for the next occurrence...which should be there because we
995  // already know the count
996  size_t matchPos = StringUtil::pos(source, sourceLength, needle, _start, sourceLength);
997  copyData(matchPos - 1, newNeedle->getStringData(), newLength);
998  // step to the next search position
999  _start = matchPos + newLength - 1;
1000  }
1001  }
1002  // this will be a shorter thing, so we can do things in place as if we were using two buffers
1003  else if (needleLength > newLength)
1004  {
1005  // we start building from the beginning
1006  size_t copyOffset = 0;
1007  size_t _start = 0;
1008  // get our string bounds
1009  const char *source = getStringData();
1010  size_t sourceLength = getLength();
1011  const char *newPtr = newNeedle->getStringData();
1012  // this is our scan offset
1013  for (size_t i = 0; i < matches; i++)
1014  {
1015  // look for each instance and replace
1016  size_t matchPos = StringUtil::pos(source, sourceLength, needle, _start, sourceLength);
1017  size_t copyLength = (matchPos - 1) - _start; /* get the next length to copy */
1018  // if this skipped over characters, we need to copy those
1019  if (copyLength != 0)
1020  {
1021  copyData(copyOffset, source + _start, copyLength);
1022  copyOffset += copyLength;
1023  }
1024  // replacing with a non-null string, copy the replacement string in
1025  if (newLength != 0)
1026  {
1027  copyData(copyOffset, newPtr, newLength);
1028  copyOffset += newLength;
1029  }
1030  _start = matchPos + needleLength - 1; /* step to the next position */
1031  }
1032  // we likely have some remainder that needs copying
1033  if (_start < sourceLength)
1034  {
1035  copyData(copyOffset, source + _start, sourceLength - _start);
1036  }
1037  }
1038  // hardest case...the string gets longer. We need to shift all of the data
1039  // to the end and then pull the pieces back in as we go
1040  else
1041  {
1042  size_t growth = (newLength - needleLength) * matches;
1043 
1044  // we start building from the beginning
1045  size_t copyOffset = 0;
1046  size_t _start = 0;
1047  // get our string bounds
1048  const char *source = getStringData() + growth;
1049  size_t sourceLength = getLength();
1050  // this shifts everything to the end of the buffer. From there,
1051  // we pull pieces back into place.
1052  openGap(0, growth, sourceLength);
1053  const char *newPtr = newNeedle->getStringData();
1054  // this is our scan offset
1055  for (size_t i = 0; i < matches; i++)
1056  {
1057  // look for each instance and replace
1058  size_t matchPos = StringUtil::pos(source, sourceLength, needle, _start, sourceLength);
1059  size_t copyLength = (matchPos - 1) - _start; /* get the next length to copy */
1060  // if this skipped over characters, we need to copy those
1061  if (copyLength != 0)
1062  {
1063  copyData(copyOffset, source + _start, copyLength);
1064  copyOffset += copyLength;
1065  }
1066  // replacing with a non-null string, copy the replacement string in
1067  if (newLength != 0)
1068  {
1069  copyData(copyOffset, newPtr, newLength);
1070  copyOffset += newLength;
1071  }
1072  _start = matchPos + needleLength - 1; /* step to the next position */
1073  }
1074  // we likely have some remainder that needs copying
1075  if (_start < sourceLength)
1076  {
1077  copyData(copyOffset, source + _start, sourceLength - _start);
1078  }
1079  }
1080  // update the result length, and return
1081  this->setLength(resultLength);
1082 
1083  bool newNeedleIsASCII = newNeedle->checkIsASCII();
1084  if (this->isASCII())
1085  {
1086  if (!newNeedleIsASCII) this->setIsASCII(false); // no need to check, we are sure it's not ASCII
1087  }
1088  else
1089  {
1090  if (newNeedleIsASCII) this->setIsASCIIChecked(false); // check again, maybe the ASCII newNeedle has replaced all the non-ASCII characters
1091  }
1092 
1093  return this;
1094 }
1095 
1096 /**
1097  * Do an inplace caseless changeStr operation on a
1098  * mutablebuffer.
1099  *
1100  * @param needle The search needle.
1101  * @param newNeedle The replacement string.
1102  * @param countArg The number of occurrences to replace.
1103  *
1104  * @return The target MutableBuffer
1105  */
1106 // in beahviour
1108 {
1109  /* force needle to a string */
1110  needle = stringArgument(needle, OREF_positional, ARG_ONE);
1111  ProtectedObject p1(needle);
1112  /* newneedle must be a string two */
1113  newNeedle = stringArgument(newNeedle, OREF_positional, ARG_TWO);
1114  ProtectedObject p2(newNeedle);
1115 
1116  // we'll only change up to a specified count. If not there, we do everything.
1117  size_t count = optionalPositive(countArg, Numerics::MAX_WHOLENUMBER, OREF_positional, ARG_THREE);
1118  // find the number of matches in the string
1119  size_t matches = StringUtil::caselessCountStr(getStringData(), getLength(), needle);
1120  if (matches > count) // the matches are bounded by the count
1121  {
1122  matches = count;
1123  }
1124  // no matches is easy!
1125  if (matches == 0)
1126  {
1127  return this;
1128  }
1129  size_t needleLength = needle->getLength(); /* get the length of the needle */
1130  size_t newLength = newNeedle->getLength(); /* and the replacement length */
1131  // calculate the final length and make sure we have enough space
1132  size_t resultLength = this->getLength() - (matches * needleLength) + (matches * newLength);
1133  ensureCapacity(resultLength);
1134 
1135  // an inplace update has complications, depending on whether the new string is shorter,
1136  // the same length, or longer
1137 
1138  // simplest case...same length strings. We can just overlay the existing occurrences
1139  if (needleLength == newLength)
1140  {
1141  const char *source = getStringData();
1142  size_t sourceLength = getLength();
1143  size_t _start = 0; /* set a zero starting point */
1144  for (size_t i = 0; i < matches; i++)
1145  {
1146  // search for the next occurrence...which should be there because we
1147  // already know the count
1148  size_t matchPos = StringUtil::caselessPos(source, sourceLength, needle, _start, sourceLength);
1149  copyData(matchPos - 1, newNeedle->getStringData(), newLength);
1150  // step to the next search position
1151  _start = matchPos + newLength - 1;
1152  }
1153  }
1154  // this will be a shorter thing, so we can do things in place as if we were using two buffers
1155  else if (needleLength > newLength)
1156  {
1157  // we start building from the beginning
1158  size_t copyOffset = 0;
1159  size_t _start = 0;
1160  // get our string bounds
1161  const char *source = getStringData();
1162  size_t sourceLength = getLength();
1163  const char *newPtr = newNeedle->getStringData();
1164  // this is our scan offset
1165  for (size_t i = 0; i < matches; i++)
1166  {
1167  // look for each instance and replace
1168  size_t matchPos = StringUtil::caselessPos(source, sourceLength, needle, _start, sourceLength);
1169  size_t copyLength = (matchPos - 1) - _start; /* get the next length to copy */
1170  // if this skipped over characters, we need to copy those
1171  if (copyLength != 0)
1172  {
1173  copyData(copyOffset, source + _start, copyLength);
1174  copyOffset += copyLength;
1175  }
1176  // replacing with a non-null string, copy the replacement string in
1177  if (newLength != 0)
1178  {
1179  copyData(copyOffset, newPtr, newLength);
1180  copyOffset += newLength;
1181  }
1182  _start = matchPos + needleLength - 1; /* step to the next position */
1183  }
1184  // we likely have some remainder that needs copying
1185  if (_start < sourceLength)
1186  {
1187  copyData(copyOffset, source + _start, sourceLength - _start);
1188  }
1189  }
1190  // hardest case...the string gets longer. We need to shift all of the data
1191  // to the end and then pull the pieces back in as we go
1192  else
1193  {
1194  size_t growth = (newLength - needleLength) * matches;
1195 
1196  // we start building from the beginning
1197  size_t copyOffset = 0;
1198  size_t _start = 0;
1199  // get our string bounds
1200  const char *source = getStringData() + growth;
1201  size_t sourceLength = getLength();
1202  // this shifts everything to the end of the buffer. From there,
1203  // we pull pieces back into place.
1204  openGap(0, growth, sourceLength);
1205  const char *newPtr = newNeedle->getStringData();
1206  // this is our scan offset
1207  for (size_t i = 0; i < matches; i++)
1208  {
1209  // look for each instance and replace
1210  size_t matchPos = StringUtil::caselessPos(source, sourceLength, needle, _start, sourceLength);
1211  size_t copyLength = (matchPos - 1) - _start; /* get the next length to copy */
1212  // if this skipped over characters, we need to copy those
1213  if (copyLength != 0)
1214  {
1215  copyData(copyOffset, source + _start, copyLength);
1216  copyOffset += copyLength;
1217  }
1218  // replacing with a non-null string, copy the replacement string in
1219  if (newLength != 0)
1220  {
1221  copyData(copyOffset, newPtr, newLength);
1222  copyOffset += newLength;
1223  }
1224  _start = matchPos + needleLength - 1; /* step to the next position */
1225  }
1226  // we likely have some remainder that needs copying
1227  if (_start < sourceLength)
1228  {
1229  copyData(copyOffset, source + _start, sourceLength - _start);
1230  }
1231  }
1232  // update the result length, and return
1233  this->setLength(resultLength);
1234 
1235  bool newNeedleIsASCII = newNeedle->checkIsASCII();
1236  if (this->isASCII())
1237  {
1238  if (!newNeedleIsASCII) this->setIsASCII(false); // no need to check, we are sure it's not ASCII
1239  }
1240  else
1241  {
1242  if (newNeedleIsASCII) this->setIsASCIIChecked(false); // check again, maybe the ASCII newNeedle has replaced all the non-ASCII characters
1243  }
1244 
1245  return this;
1246 }
1247 
1248 
1249 /**
1250  * Rexx exported method stub for the lower() method.
1251  *
1252  * @param start The optional starting location. Defaults to the first character
1253  * if not specified.
1254  * @param length The length to convert. Defaults to the segment from the start
1255  * position to the end of the string.
1256  *
1257  * @return A new string object with the case conversion applied.
1258  */
1259 // in behaviour
1261 {
1262  size_t startPos = optionalPositionArgument(_start, 1, ARG_ONE) - 1;
1263  size_t rangeLength = optionalLengthArgument(_length, getLength(), ARG_TWO);
1264 
1265  // if we're starting beyond the end bounds, return unchanged
1266  if (startPos >= getLength())
1267  {
1268  return this;
1269  }
1270 
1271  rangeLength = Numerics::minVal(rangeLength, getLength() - startPos);
1272 
1273  // a zero length value is also a non-change.
1274  if (rangeLength == 0)
1275  {
1276  return this;
1277  }
1278 
1279  char *bufferData = getData() + startPos;
1280  // now uppercase in place
1281  for (size_t i = 0; i < rangeLength; i++)
1282  {
1283  *bufferData = tolower(*bufferData);
1284  bufferData++;
1285  }
1286  return this;
1287 }
1288 
1289 
1290 /**
1291  * Rexx exported method stub for the upper() method.
1292  *
1293  * @param start The optional starting location. Defaults to the first character
1294  * if not specified.
1295  * @param length The length to convert. Defaults to the segment from the start
1296  * position to the end of the string.
1297  *
1298  * @return A new string object with the case conversion applied.
1299  */
1300 // in beahviour
1302 {
1303  size_t startPos = optionalPositionArgument(_start, 1, ARG_ONE) - 1;
1304  size_t rangeLength = optionalLengthArgument(_length, getLength(), ARG_TWO);
1305 
1306  // if we're starting beyond the end bounds, return unchanged
1307  if (startPos >= getLength())
1308  {
1309  return this;
1310  }
1311 
1312  rangeLength = Numerics::minVal(rangeLength, getLength() - startPos);
1313 
1314  // a zero length value is also a non-change.
1315  if (rangeLength == 0)
1316  {
1317  return this;
1318  }
1319 
1320  char *bufferData = getData() + startPos;
1321  // now uppercase in place
1322  for (size_t i = 0; i < rangeLength; i++)
1323  {
1324  *bufferData = toupper(*bufferData);
1325  bufferData++;
1326  }
1327  return this;
1328 }
1329 
1330 
1331 /**
1332  * translate characters in the buffer using a translation table.
1333  *
1334  * @param tableo The output table specification
1335  * @param tablei The input table specification
1336  * @param pad An optional padding character (default is a space).
1337  * @param _start The starting position to translate.
1338  * @param _range The length to translate
1339  *
1340  * @return The target mutable buffer.
1341  */
1342 // in behaviour
1344 {
1345  // just a simple uppercase?
1346  if (tableo == OREF_NULL && tablei == OREF_NULL && pad == OREF_NULL)
1347  {
1348  return this->upper(_start, _range);
1349  }
1350  /* validate the tables */
1351  tableo = optionalStringArgument(tableo, OREF_NULLSTRING, OREF_positional, ARG_ONE);
1352  ProtectedObject p1(tableo);
1353  size_t outTableLength = tableo->getLength(); /* get the table length */
1354  /* input table too */
1355  tablei = optionalStringArgument(tablei, OREF_NULLSTRING, OREF_positional, ARG_TWO);
1356  ProtectedObject p2(tablei);
1357  size_t inTableLength = tablei->getLength(); /* get the table length */
1358  const char *inTable = tablei->getStringData(); /* point at the input table */
1359  const char *outTable = tableo->getStringData(); /* and the output table */
1360  /* get the pad character */
1361  codepoint_t padChar = optionalPadArgument(pad, ' ', ARG_THREE);
1362  bool padInserted = false;
1363  bool padIsASCII = ((padChar & 0x80) == 0);
1364  size_t startPos = optionalPositionArgument(_start, 1, ARG_FOUR);
1365  size_t range = optionalLengthArgument(_range, getLength() - startPos + 1, ARG_FOUR);
1366 
1367  // if nothing to translate, we can return now
1368  if (startPos > getLength() || range == 0)
1369  {
1370  return this;
1371  }
1372  // cape the real range
1373  range = Numerics::minVal(range, getLength() - startPos + 1);
1374  char *scanPtr = getData() + startPos - 1; /* point to data */
1375  size_t scanLength = range; /* get the length too */
1376 
1377  bool translateIsASCII = true;
1378  while (scanLength-- != 0)
1379  { /* spin thru input */
1380  char ch = *scanPtr; /* get a character */
1381  size_t position;
1382 
1383  if (tablei != OREF_NULLSTRING) /* input table specified? */
1384  {
1385  /* search for the character */
1386  position = StringUtil::memPos(inTable, inTableLength, ch);
1387  }
1388  else
1389  {
1390  position = ((size_t)ch) & 0xff; /* position is the character value */
1391  }
1392  if (position != (size_t)(-1))
1393  { /* found in the table? */
1394  if (position < outTableLength) /* in the output table? */
1395  {
1396  /* convert the character */
1397  *scanPtr = *(outTable + position);
1398  if (*scanPtr & 0x80) translateIsASCII = false;
1399  }
1400  else
1401  {
1402  *scanPtr = (char)padChar; /* else use the pad character */
1403  padInserted = true;
1404  }
1405  }
1406  scanPtr++; /* step the pointer */
1407  }
1408 
1409  if (!translateIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
1410  else if (padInserted && !padIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
1411  // here we know that the new characters are all ASCII
1412  // if the buffer was not ASCII before the translation, maybe it is now, to check again
1413  else if (!this->isASCII()) this->setIsASCIIChecked(false); // check again
1414 
1415  return this;
1416 }
1417 
1418 
1419 /**
1420  * Test if regions within two strings match.
1421  *
1422  * @param start_ The starting compare position within the target string. This
1423  * must be within the bounds of the string.
1424  * @param other The other compare string.
1425  * @param offset_ The starting offset of the compare string. This must be
1426  * within the string bounds. The default start postion is 1.
1427  * @param len_ The length of the compare substring. The length and the
1428  * offset must specify a valid substring of other. If not
1429  * specified, this defaults to the substring from the
1430  * offset to the end of the string.
1431  *
1432  * @return True if the two regions match, false for any mismatch.
1433  */
1434 // in behaviour
1436 {
1437  stringsize_t _start = positionArgument(start_, ARG_ONE);
1438  // the start position must be within the string bounds
1439  if (_start > getLength())
1440  {
1442  }
1443  other = stringArgument(other, OREF_positional, ARG_TWO);
1444 
1445  stringsize_t offset = optionalPositionArgument(offset_, 1, ARG_THREE);
1446 
1447  if (offset > other->getLength())
1448  {
1450  }
1451 
1452  stringsize_t len = optionalLengthArgument(len_, other->getLength() - offset + 1, ARG_FOUR);
1453 
1454  if ((offset + len - 1) > other->getLength())
1455  {
1457  }
1458 
1459  return primitiveMatch(_start, other, offset, len) ? TheTrueObject : TheFalseObject;
1460 }
1461 
1462 
1463 /**
1464  * Test if regions within two strings match.
1465  *
1466  * @param start_ The starting compare position within the target string. This
1467  * must be within the bounds of the string.
1468  * @param other The other compare string.
1469  * @param offset_ The starting offset of the compare string. This must be
1470  * within the string bounds. The default start postion is 1.
1471  * @param len_ The length of the compare substring. The length and the
1472  * offset must specify a valid substring of other. If not
1473  * specified, this defaults to the substring from the
1474  * offset to the end of the string.
1475  *
1476  * @return True if the two regions match, false for any mismatch.
1477  */
1478 // in behaviour
1480 {
1481  stringsize_t _start = positionArgument(start_, ARG_ONE);
1482  // the start position must be within the string bounds
1483  if (_start > getLength())
1484  {
1486  }
1487  other = stringArgument(other, OREF_positional, ARG_TWO);
1488 
1489  stringsize_t offset = optionalPositionArgument(offset_, 1, ARG_THREE);
1490 
1491  if (offset > other->getLength())
1492  {
1494  }
1495 
1496  stringsize_t len = optionalLengthArgument(len_, other->getLength() - offset + 1, ARG_FOUR);
1497 
1498  if ((offset + len - 1) > other->getLength())
1499  {
1501  }
1502 
1503  return primitiveCaselessMatch(_start, other, offset, len) ? TheTrueObject : TheFalseObject;
1504 }
1505 
1506 
1507 /**
1508  * Perform a compare of regions of two string objects. Returns
1509  * true if the two regions match, returns false for mismatches.
1510  *
1511  * @param start The starting offset within the target string.
1512  * @param other The source string for the compare.
1513  * @param offset The offset of the substring of the other string to use.
1514  * @param len The length of the substring to compare.
1515  *
1516  * @return True if the regions match, false otherwise.
1517  */
1519 {
1520  _start--; // make the starting point origin zero
1521  offset--;
1522 
1523  // if the match is not possible in the target string, just return false now.
1524  if ((_start + len) > getLength())
1525  {
1526  return false;
1527  }
1528 
1529  return memcmp(getStringData() + _start, other->getStringData() + offset, len) == 0;
1530 }
1531 
1532 
1533 /**
1534  * Perform a caselesee compare of regions of two string objects.
1535  * Returns true if the two regions match, returns false for
1536  * mismatches.
1537  *
1538  * @param start The starting offset within the target string.
1539  * @param other The source string for the compare.
1540  * @param offset The offset of the substring of the other string to use.
1541  * @param len The length of the substring to compare.
1542  *
1543  * @return True if the regions match, false otherwise.
1544  */
1546 {
1547  _start--; // make the starting point origin zero
1548  offset--;
1549 
1550  // if the match is not possible in the target string, just return false now.
1551  if ((_start + len) > getLength())
1552  {
1553  return false;
1554  }
1555 
1556  return StringUtil::caselessCompare(getStringData() + _start, other->getStringData() + offset, len) == 0;
1557 }
1558 
1559 
1560 /**
1561  * Compare a single character at a give position against
1562  * a set of characters to see if any of the characters is
1563  * a match.
1564  *
1565  * @param position_ The character position
1566  * @param matchSet The set to compare against.
1567  *
1568  * @return true if the character at the give position is any of the characters,
1569  * false if none of them match.
1570  */
1571 // in behaviour
1573 {
1574  stringsize_t position = positionArgument(position_, ARG_ONE);
1575  // the start position must be within the string bounds
1576  if (position > getLength())
1577  {
1579  }
1580  matchSet = stringArgument(matchSet, OREF_positional, ARG_TWO);
1581 
1582  stringsize_t _setLength = matchSet->getLength();
1583  codepoint_t _matchChar = getCharB(position - 1);
1584 
1585  // iterate through the match set looking for a match
1586  for (stringsize_t i = 0; i < _setLength; i++)
1587  {
1588  if (_matchChar == matchSet->getChar(i))
1589  {
1590  return TheTrueObject;
1591  }
1592  }
1593  return TheFalseObject;
1594 }
1595 
1596 
1597 /**
1598  * Compare a single character at a give position against
1599  * a set of characters to see if any of the characters is
1600  * a match.
1601  *
1602  * @param position_ The character position
1603  * @param matchSet The set to compare against.
1604  *
1605  * @return true if the character at the give position is any of the characters,
1606  * false if none of them match.
1607  */
1608 // in behaviour
1610 {
1611  stringsize_t position = positionArgument(position_, ARG_ONE);
1612  // the start position must be within the string bounds
1613  if (position > getLength())
1614  {
1616  }
1617  matchSet = stringArgument(matchSet, OREF_positional, ARG_TWO);
1618 
1619  stringsize_t _setLength = matchSet->getLength();
1620  codepoint_t _matchChar = getCharB(position - 1);
1621  _matchChar = toupper((int)_matchChar);
1622 
1623  // iterate through the match set looking for a match, using a
1624  // caseless compare
1625  for (stringsize_t i = 0; i < _setLength; i++)
1626  {
1627  if (_matchChar == toupper(matchSet->getChar(i)))
1628  {
1629  return TheTrueObject;
1630  }
1631  }
1632  return TheFalseObject;
1633 }
1634 
1635 
1636 /**
1637  * Perform a character verify operation on a mutable buffer.
1638  *
1639  * @param ref The reference string.
1640  * @param option The match/nomatch option.
1641  * @param _start The start position for the verify.
1642  * @param range The range to search
1643  *
1644  * @return The offset of the first match/mismatch within the buffer.
1645  */
1646 // in behaviour
1648 {
1649  return StringUtil::verify(getStringData(), getLength(), ref, option, _start, range);
1650 }
1651 
1652 
1653 /**
1654  * Perform a subword extraction from a mutable buffer.
1655  *
1656  * @param position The first word to be extracted.
1657  * @param plength The number of words to extract.
1658  *
1659  * @return The substring containing the extacted words.
1660  */
1661 // in behaviour
1663 {
1664  return StringUtil::subWord(getStringData(), getLength(), position, plength);
1665 }
1666 
1667 
1668 /**
1669  * Returns an array of all words contained in the given range
1670  * of the string, using the same extraction rules used
1671  * for subWord() and word().
1672  *
1673  * @param position The optional starting position. If not provided, extraction
1674  * starts with the first word.
1675  * @param plength The number of words to extract. If omitted, will extract
1676  * from the starting postion to the end of the string.
1677  *
1678  * @return An array containing the extracted words. If no words are
1679  * available within the given range, this returns an empty
1680  * array.
1681  */
1683 {
1684  return StringUtil::subWords(getStringData(), getLength(), position, plength);
1685 }
1686 
1687 
1688 /**
1689  * Extract a given word from a mutable buffer.
1690  *
1691  * @param position The target word position.
1692  *
1693  * @return The extracted word, as a string.
1694  */
1695 // in behaviour
1697 {
1698  return StringUtil::word(getStringData(), getLength(), position);
1699 }
1700 
1701 
1702 /**
1703  * return the index of a given word position in a mutable buffer
1704  *
1705  *
1706  * @param position The target word position.
1707  *
1708  * @return The position of the target word.
1709  */
1710 // in behaviour
1712 {
1713  return StringUtil::wordIndex(getStringData(), getLength(), position);
1714 }
1715 
1716 
1717 /**
1718  * return the length of a given word position in a mutable
1719  * buffer
1720  *
1721  *
1722  * @param position The target word position.
1723  *
1724  * @return The length of the target word.
1725  */
1726 // in behaviour
1728 {
1729  return StringUtil::wordLength(getStringData(), getLength(), position);
1730 }
1731 
1732 /**
1733  * Return the count of words in the buffer.
1734  *
1735  * @return The buffer word count.
1736  */
1737 // in behaviour
1739 {
1740  size_t tempCount = StringUtil::wordCount(this->getStringData(), this->getLength());
1741  return new_integer(tempCount);
1742 }
1743 
1744 
1745 /**
1746  * Perform a wordpos search on a mutablebuffer object.
1747  *
1748  * @param phrase The search phrase
1749  * @param pstart The starting search position.
1750  *
1751  * @return The index of the match location.
1752  */
1753 // in behaviour
1755 {
1756  return StringUtil::wordPos(getStringData(), getLength(), phrase, pstart);
1757 }
1758 
1759 
1760 /**
1761  * Perform a caseless wordpos search on a string object.
1762  *
1763  * @param phrase The search phrase
1764  * @param pstart The starting search position.
1765  *
1766  * @return The index of the match location.
1767  */
1768 // in behaviour
1770 {
1771  return StringUtil::caselessWordPos(getStringData(), getLength(), phrase, pstart);
1772 }
1773 
1774 
1775 /**
1776  * Perform a delword operation on a mutable buffer
1777  *
1778  * @param position The position to delete.
1779  * @param plength The number of words to delete
1780  *
1781  * @return Always returns the target mutable buffer.
1782  */
1783 // in behaviour
1785 {
1786  /* convert position to binary */
1787  size_t _wordPos = positionArgument(position, ARG_ONE);
1788  /* get num of words to delete, the */
1789  /* default is "a very large number" */
1790  size_t count = optionalLengthArgument(plength, Numerics::MAX_WHOLENUMBER, ARG_TWO);
1791 
1792  size_t length = getLength(); /* get string length */
1793  if (length == 0) /* null string? */
1794  {
1795  return this; /* nothing to delete */
1796  }
1797  if (count == 0) /* deleting zero words? */
1798  {
1799  return this; /* also very easy */
1800  }
1801  const char *_word = getStringData(); /* point to the string */
1802  const char *nextSite = NULL;
1803  /* get the first word */
1804  size_t _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1805  while (--_wordPos > 0 && _wordLength != 0)
1806  { /* loop until we reach tArget */
1807  _word = nextSite; /* copy the start pointer */
1808  /* get the next word */
1809  _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1810  }
1811  if (_wordPos != 0) /* run out of words first */
1812  {
1813  return this; /* return the buffer unaltered */
1814  }
1815  // get the deletion point as an offset
1816  size_t deletePosition = _word - this->getStringData();
1817  while (--count > 0 && _wordLength != 0)
1818  { /* loop until we reach tArget */
1819  _word = nextSite; /* copy the start pointer */
1820  /* get the next word */
1821  _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1822  }
1823  if (length != 0) /* didn't use up the string */
1824  {
1825  StringUtil::skipBlanks(&nextSite, &length);/* skip over trailing blanks */
1826  }
1827 
1828  size_t gapSize = dataLength - (deletePosition + length);
1829  // close up the delete part
1830  closeGap(deletePosition, gapSize, length);
1831  // adjust for the deleted data
1832  this->setLength(dataLength - gapSize);
1833 
1834  // The buffer has been truncated
1835  // If the buffer before truncation was not ASCII, maybe the shorter buffer is ASCII
1836  if (!this->isASCII()) this->setIsASCIIChecked(false); // check again
1837 
1838  return this;
1839 }
1840 
1841 
1842 /**
1843 * Do an inplace space() operation on a mutable buffer.
1844 *
1845 * @param space_count The number of pad characters between
1846 * each word
1847 * @param pad The pad character
1848 *
1849 * @return The target MutableBuffer
1850 */
1852 {
1853  size_t count = 0; /* count word interstices in buffer*/
1854 
1855  /* get the spacing count */
1856  const size_t padLength = optionalLengthArgument(space_count, 1, ARG_ONE);
1857  /* get the pad character */
1858  const char padChar = (char)optionalPadArgument(pad, ' ', ARG_TWO);
1859  bool padInserted = false;
1860  bool padIsASCII = ((padChar & 0x80) == 0);
1861 
1862  // an inplace update has complications, depending on whether the new string
1863  // is shorter or longer than the original.
1864  // first execute padC with padLength == 0,1; later expand padC to padLength
1865  const char padC = ' '; /* intermediate pad: single space */
1866  const size_t padL = 1; /* intermediate pad length: 1 */
1867 
1868  // With padC the new string is not longer, so we can just overlay in place.
1869  // Set write position to start of buffer
1870  // Find first word: start position and length
1871  // While a word is found:
1872  // Copy word to write position
1873  // update write position
1874  // Find next word: start position and length
1875  // if no next word exists then leave
1876  // select spacing count:
1877  // when = 1 then append padChar and update write position
1878  // when = 0 then don't pad
1879  // otherwise append padC and update write position
1880  // increment word interstice count
1881  // iterate
1882  // adjust string dataLength to write position
1883  size_t writePos = 0; /* offset current write position */
1884  const char *_word = getStringData(); /* point to the start of string */
1885  const char *nextSite = NULL; /* start of the next word */
1886  size_t length = getLength(); /* get string data length */
1887 
1888  /* get the first word */
1889  size_t _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1890 
1891  while (_wordLength != 0)
1892  {
1893  /* copy first word to writePos */
1894  copyData(writePos, _word, _wordLength);
1895  writePos += _wordLength; /* update writePos for next word */
1896  _word = nextSite; /* set start pointer to next word */
1897  /* get the next word */
1898  _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1899  if (_wordLength == 0) /* is there no next word coming ? */
1900  {
1901  break; /* don't pad or count last word */
1902  }
1903  switch (padLength) /* handle different padLength */
1904  {
1905  case 1: /* more frequent case goes first */
1906  setData(writePos, padChar, padLength); /* write pad character */
1907  writePos += padLength; /* move write position one byte */
1908  padInserted = true;
1909  break;
1910  case 0:
1911  break; /* don't write pad character */
1912  default: /* padLength > 1 */
1913  setData(writePos, padC, padL); /* write padC pad character */
1914  writePos += padL; /* move write position one byte */
1915  }
1916  count++; /* increment the word count */
1917  }
1918  this->dataLength = writePos; /* set data length in buffer */
1919 
1920  if ( padLength > 1 ) /* do we need to expand padC ? */
1921  {
1922  size_t growth = count * (padLength-1); /* data grows by so many bytes */
1923  ensureCapacity(growth); /* make sure we have room for this */
1924 
1925  // As the string gets longer, we need to shift all data to the end and
1926  // then pull the pieces back in as we go.
1927  length = getLength(); /* get current string data length */
1928  openGap(0, growth, length); /* shift towards end of the buffer */
1929  writePos = 0;
1930  while (growth>0)
1931  {
1932  setData(writePos, padC, padL); /* fill gap with whitespace */
1933  writePos++;
1934  growth--;
1935  }
1936  dataLength = getLength() + count * (padLength-1);/*adjust data to size*/
1937 
1938  // Now we do the last loop over, using padChar and padLength
1939  writePos = 0; /* offset current write position */
1940  const char *_word = getStringData(); /*point to the start of string*/
1941  const char *nextSite = NULL; /* start of the next word */
1942  length = this->dataLength; /* get current string data length */
1943  /* get the first word */
1944  _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1945 
1946  while (_wordLength != 0) /* while there is a word ... */
1947  {
1948  /* copy first word to writePos */
1949  copyData(writePos, _word, _wordLength);
1950  writePos += _wordLength; /* update writePos for next word */
1951  _word = nextSite; /* set start pointer to next word */
1952  /* get the next word */
1953  _wordLength = StringUtil::nextWord(&_word, &length, &nextSite);
1954  if (_wordLength != 0) /* except for the last word */
1955  {
1956  setData(writePos, padChar, padLength); /* write padChar chars */
1957  writePos += padLength; /* update writePos for next word */
1958  padInserted = true;
1959  }
1960  }
1961  }
1962 
1963  if (padInserted)
1964  {
1965  if (!padIsASCII) this->setIsASCII(false); // no need to check again, we are sure it's not ASCII
1966  }
1967 
1968  return this; /* return the mutable buffer */
1969 }
void reportException(wholenumber_t error)
RexxBuffer * new_buffer(size_t s)
@ T_MutableBuffer
RexxInteger * new_integer(wholenumber_t v)
#define DEFAULT_BUFFER_LENGTH
codepoint_t optionalPadArgument(RexxObject *o, codepoint_t d, size_t p)
Definition: RexxCore.h:370
#define OREF_NULL
Definition: RexxCore.h:61
RexxString * stringArgument(RexxObject *object, RexxString *kind, size_t position)
Definition: RexxCore.h:315
const int ARG_FOUR
Definition: RexxCore.h:86
const int ARG_THREE
Definition: RexxCore.h:85
size_t optionalPositive(RexxObject *o, size_t d, RexxString *kind, size_t p)
Definition: RexxCore.h:387
#define OrefSet(o, r, v)
Definition: RexxCore.h:101
#define TheTrueObject
Definition: RexxCore.h:196
const int ARG_TWO
Definition: RexxCore.h:84
size_t optionalNonNegative(RexxObject *o, size_t d, RexxString *kind, size_t p)
Definition: RexxCore.h:382
size_t optionalLengthArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:355
size_t optionalPositionArgument(RexxObject *o, size_t d, size_t p)
Definition: RexxCore.h:363
#define TheFalseObject
Definition: RexxCore.h:195
const int ARG_ONE
Definition: RexxCore.h:83
RexxString * optionalStringArgument(RexxObject *o, RexxString *d, RexxString *kind, size_t p)
Definition: RexxCore.h:340
#define Error_Incorrect_method_position
#define Error_Incorrect_method_length
#define memory_mark(oref)
Definition: RexxMemory.hpp:450
RexxObject * new_object(size_t s)
Definition: RexxMemory.hpp:436
#define flatten_reference(oref, envel)
Definition: RexxMemory.hpp:498
#define CLASS_CREATE(name, id, className)
Definition: RexxMemory.hpp:503
#define memory_mark_general(oref)
Definition: RexxMemory.hpp:451
#define cleanUpFlatten
Definition: RexxMemory.hpp:484
#define setUpFlatten(type)
Definition: RexxMemory.hpp:478
RexxString * new_string(const char *s, stringsize_t l)
stringsize_t positionArgument(RexxObject *argument, size_t position)
stringsize_t lengthArgument(RexxObject *argument, size_t position)
static const wholenumber_t MAX_WHOLENUMBER
Definition: Numerics.hpp:62
static wholenumber_t minVal(wholenumber_t n1, wholenumber_t n2)
Definition: Numerics.hpp:116
void setDataLength(size_t l)
Definition: BufferClass.hpp:55
void copyData(size_t offset, const char *string, size_t l)
Definition: BufferClass.hpp:57
size_t getDataLength()
Definition: BufferClass.hpp:53
virtual char * getData()
void checkAbstract()
void setBehaviour(RexxBehaviour *b)
RexxObject * clone()
RexxMutableBuffer * newRexx(RexxObject **, size_t, size_t)
RexxInteger * isASCIIRexx()
RexxInteger * caselessLastPos(RexxString *needle, RexxInteger *_start, RexxInteger *_range)
RexxMutableBuffer * caselessChangeStr(RexxString *needle, RexxString *newNeedle, RexxInteger *countArg)
RexxMutableBuffer * translate(RexxString *tableo, RexxString *tablei, RexxString *pad, RexxInteger *, RexxInteger *)
RexxMutableBuffer * appendCstring(const char *, size_t blength)
void closeGap(size_t offset, size_t _size, size_t tailSize)
bool primitiveCaselessMatch(stringsize_t start, RexxString *other, stringsize_t offset, stringsize_t len)
RexxString * subchar(RexxInteger *startPosition)
void setData(size_t offset, codepoint_t character, size_t l)
void openGap(size_t offset, size_t _size, size_t tailSize)
void setIsASCII(bool value=true)
RexxMutableBuffer * mydelete(RexxObject *, RexxObject *)
RexxArray * subWords(RexxInteger *, RexxInteger *)
RexxMutableBuffer * changeStr(RexxString *needle, RexxString *newNeedle, RexxInteger *countArg)
void liveGeneral(int reason)
RexxInteger * verify(RexxString *, RexxString *, RexxInteger *, RexxInteger *)
RexxMutableBuffer * lower(RexxInteger *_start, RexxInteger *_length)
static void createInstance()
void flatten(RexxEnvelope *envelope)
const char * getStringData()
RexxMutableBuffer * append(RexxObject *)
RexxInteger * posRexx(RexxString *needle, RexxInteger *_start, RexxInteger *_range)
RexxMutableBuffer * space(RexxInteger *space_count, RexxString *pad)
RexxMutableBuffer * delWord(RexxInteger *position, RexxInteger *plength)
RexxInteger * wordPos(RexxString *, RexxInteger *)
bool primitiveMatch(stringsize_t start, RexxString *other, stringsize_t offset, stringsize_t len)
void setIsASCIIChecked(bool value=true)
size_t setDataLength(size_t l)
RexxInteger * caselessWordPos(RexxString *, RexxInteger *)
RexxObject * setBufferLength(size_t)
RexxInteger * caselessMatch(RexxInteger *start_, RexxString *other, RexxInteger *offset_, RexxInteger *len_)
RexxMutableBuffer * upper(RexxInteger *_start, RexxInteger *_length)
RexxInteger * countStrRexx(RexxString *needle)
RexxInteger * matchChar(RexxInteger *position_, RexxString *matchSet)
void copyData(size_t offset, const char *string, size_t l)
RexxString * makeString()
void adjustGap(size_t offset, size_t _size, size_t _newSize)
RexxObject * lengthRexx()
RexxInteger * caselessCountStrRexx(RexxString *needle)
RexxInteger * wordLength(RexxInteger *)
RexxInteger * wordIndex(RexxInteger *)
RexxObject * setBufferSize(RexxInteger *)
static RexxClass * classInstance
void ensureCapacity(size_t addedLength)
RexxInteger * lastPos(RexxString *needle, RexxInteger *_start, RexxInteger *_range)
RexxMutableBuffer * overlay(RexxObject *, RexxObject *, RexxObject *, RexxObject *)
RexxInteger * caselessMatchChar(RexxInteger *position_, RexxString *matchSet)
char * setCapacity(size_t newLength)
RexxMutableBuffer * replaceAt(RexxObject *str, RexxObject *pos, RexxObject *len, RexxObject *pad)
char getCharB(size_t offset)
RexxString * subWord(RexxInteger *, RexxInteger *)
RexxString * primitiveMakeString()
RexxInteger * match(RexxInteger *start_, RexxString *other, RexxInteger *offset_, RexxInteger *len_)
RexxInteger * caselessPos(RexxString *needle, RexxInteger *_start, RexxInteger *_range)
RexxString * word(RexxInteger *)
RexxString * substr(RexxInteger *startPosition, RexxInteger *len, RexxString *pad)
void setLength(size_t l)
RexxMutableBuffer * insert(RexxObject *, RexxObject *, RexxObject *, RexxObject *)
void sendMessage(RexxString *, RexxArray *, RexxDirectory *, ProtectedObject &)
bool messageSend(RexxString *, RexxObject **, size_t, size_t, ProtectedObject &, bool processUnknown=true, bool dynamicTarget=true)
RexxObject * makeArrayRexx()
size_t getLength()
bool checkIsASCII()
const char * getStringData()
char getChar(size_t p)
static size_t pos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
Definition: StringUtil.cpp:155
static RexxInteger * caselessWordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
static RexxInteger * wordIndex(const char *data, size_t length, RexxInteger *position)
static RexxArray * subWords(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
static bool checkIsASCII(const char *s, size_t length)
static size_t memPos(const char *string, size_t length, char target)
static void skipBlanks(const char **String, size_t *StringLength)
static size_t caselessLastPos(const char *stringData, size_t haystackLen, RexxString *needle, size_t _start, size_t range)
Definition: StringUtil.cpp:360
static size_t wordCount(const char *String, size_t StringLength)
static RexxString * subWord(const char *data, size_t length, RexxInteger *position, RexxInteger *plength)
static int caselessCompare(const char *, const char *, size_t)
Definition: StringUtil.cpp:580
static RexxInteger * lastPosRexx(const char *stringData, size_t haystackLen, RexxString *needle, RexxInteger *_start, RexxInteger *_range)
Definition: StringUtil.cpp:255
static size_t caselessPos(const char *stringData, size_t haystack_length, RexxString *needle, size_t _start, size_t _range)
Definition: StringUtil.cpp:205
static size_t nextWord(const char **String, size_t *StringLength, const char **NextString)
static RexxInteger * wordPos(const char *data, size_t length, RexxString *phrase, RexxInteger *pstart)
static RexxInteger * wordLength(const char *data, size_t length, RexxInteger *position)
static RexxArray * makearray(const char *start, size_t length, RexxString *separator)
Definition: StringUtil.cpp:493
static size_t countStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
static size_t caselessCountStr(const char *hayStack, size_t hayStackLength, RexxString *needle)
static RexxInteger * verify(const char *data, size_t stringLen, RexxString *ref, RexxString *option, RexxInteger *_start, RexxInteger *range)
static RexxString * word(const char *data, size_t length, RexxInteger *position)
static RexxString * subchar(const char *stringData, size_t stringLength, RexxInteger *positionArg)
Definition: StringUtil.cpp:442
static RexxString * substr(const char *, size_t, RexxInteger *, RexxInteger *, RexxString *)
Definition: StringUtil.cpp:66
static RexxInteger * posRexx(const char *stringData, size_t length, RexxString *needle, RexxInteger *pstart, RexxInteger *range)
Definition: StringUtil.cpp:130
ssize_t codepoint_t
Definition: rexx.h:232
size_t stringsize_t
Definition: rexx.h:228