Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
strscan.c
Go to the documentation of this file.
1/*
2 $Id$
3
4 Copyright (c) 1999-2006 Minero Aoki
5
6 This program is free software.
7 You can redistribute this program under the terms of the Ruby's or 2-clause
8 BSD License. For details, see the COPYING and LICENSE.txt files.
9*/
10
11#include "ruby/ruby.h"
12#include "ruby/re.h"
13#include "ruby/encoding.h"
14
15#ifdef RUBY_EXTCONF_H
16# include RUBY_EXTCONF_H
17#endif
18
19#ifdef HAVE_ONIG_REGION_MEMSIZE
20extern size_t onig_region_memsize(const struct re_registers *regs);
21#endif
22
23#include <stdbool.h>
24
25#define STRSCAN_VERSION "3.0.1"
26
27/* =======================================================================
28 Data Type Definitions
29 ======================================================================= */
30
31static VALUE StringScanner;
32static VALUE ScanError;
33static ID id_byteslice;
34
36{
37 /* multi-purpose flags */
38 unsigned long flags;
39#define FLAG_MATCHED (1 << 0)
40
41 /* the string to scan */
43
44 /* scan pointers */
45 long prev; /* legal only when MATCHED_P(s) */
46 long curr; /* always legal */
47
48 /* the regexp register; legal only when MATCHED_P(s) */
50
51 /* regexp used for last scan */
53
54 /* anchor mode */
56};
57
58#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
59#define MATCHED(s) (s)->flags |= FLAG_MATCHED
60#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
61
62#define S_PBEG(s) (RSTRING_PTR((s)->str))
63#define S_LEN(s) (RSTRING_LEN((s)->str))
64#define S_PEND(s) (S_PBEG(s) + S_LEN(s))
65#define CURPTR(s) (S_PBEG(s) + (s)->curr)
66#define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
67
68#define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
69
70#define GET_SCANNER(obj,var) do {\
71 (var) = check_strscan(obj);\
72 if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
73} while (0)
74
75/* =======================================================================
76 Function Prototypes
77 ======================================================================= */
78
79static inline long minl _((const long n, const long x));
80static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
81static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
82
83static struct strscanner *check_strscan _((VALUE obj));
84static void strscan_mark _((void *p));
85static void strscan_free _((void *p));
86static size_t strscan_memsize _((const void *p));
87static VALUE strscan_s_allocate _((VALUE klass));
88static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
89static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
90
91static VALUE strscan_s_mustc _((VALUE self));
92static VALUE strscan_terminate _((VALUE self));
93static VALUE strscan_clear _((VALUE self));
94static VALUE strscan_get_string _((VALUE self));
95static VALUE strscan_set_string _((VALUE self, VALUE str));
96static VALUE strscan_concat _((VALUE self, VALUE str));
97static VALUE strscan_get_pos _((VALUE self));
98static VALUE strscan_set_pos _((VALUE self, VALUE pos));
99static VALUE strscan_do_scan _((VALUE self, VALUE regex,
100 int succptr, int getstr, int headonly));
101static VALUE strscan_scan _((VALUE self, VALUE re));
102static VALUE strscan_match_p _((VALUE self, VALUE re));
103static VALUE strscan_skip _((VALUE self, VALUE re));
104static VALUE strscan_check _((VALUE self, VALUE re));
105static VALUE strscan_scan_full _((VALUE self, VALUE re,
106 VALUE succp, VALUE getp));
107static VALUE strscan_scan_until _((VALUE self, VALUE re));
108static VALUE strscan_skip_until _((VALUE self, VALUE re));
109static VALUE strscan_check_until _((VALUE self, VALUE re));
110static VALUE strscan_search_full _((VALUE self, VALUE re,
111 VALUE succp, VALUE getp));
112static void adjust_registers_to_matched _((struct strscanner *p));
113static VALUE strscan_getch _((VALUE self));
114static VALUE strscan_get_byte _((VALUE self));
115static VALUE strscan_getbyte _((VALUE self));
116static VALUE strscan_peek _((VALUE self, VALUE len));
117static VALUE strscan_peep _((VALUE self, VALUE len));
118static VALUE strscan_unscan _((VALUE self));
119static VALUE strscan_bol_p _((VALUE self));
120static VALUE strscan_eos_p _((VALUE self));
121static VALUE strscan_empty_p _((VALUE self));
122static VALUE strscan_rest_p _((VALUE self));
123static VALUE strscan_matched_p _((VALUE self));
124static VALUE strscan_matched _((VALUE self));
125static VALUE strscan_matched_size _((VALUE self));
126static VALUE strscan_aref _((VALUE self, VALUE idx));
127static VALUE strscan_pre_match _((VALUE self));
128static VALUE strscan_post_match _((VALUE self));
129static VALUE strscan_rest _((VALUE self));
130static VALUE strscan_rest_size _((VALUE self));
131
132static VALUE strscan_inspect _((VALUE self));
133static VALUE inspect1 _((struct strscanner *p));
134static VALUE inspect2 _((struct strscanner *p));
135
136/* =======================================================================
137 Utils
138 ======================================================================= */
139
140static VALUE
141str_new(struct strscanner *p, const char *ptr, long len)
142{
144 rb_enc_copy(str, p->str);
145 return str;
146}
147
148static inline long
149minl(const long x, const long y)
150{
151 return (x < y) ? x : y;
152}
153
154static VALUE
155extract_range(struct strscanner *p, long beg_i, long end_i)
156{
157 if (beg_i > S_LEN(p)) return Qnil;
158 end_i = minl(end_i, S_LEN(p));
159 return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
160}
161
162static VALUE
163extract_beg_len(struct strscanner *p, long beg_i, long len)
164{
165 if (beg_i > S_LEN(p)) return Qnil;
166 len = minl(len, S_LEN(p) - beg_i);
167 return str_new(p, S_PBEG(p) + beg_i, len);
168}
169
170/* =======================================================================
171 Constructor
172 ======================================================================= */
173
174static void
175strscan_mark(void *ptr)
176{
177 struct strscanner *p = ptr;
178 rb_gc_mark(p->str);
179 rb_gc_mark(p->regex);
180}
181
182static void
183strscan_free(void *ptr)
184{
185 struct strscanner *p = ptr;
186 onig_region_free(&(p->regs), 0);
187 ruby_xfree(p);
188}
189
190static size_t
191strscan_memsize(const void *ptr)
192{
193 const struct strscanner *p = ptr;
194 size_t size = sizeof(*p) - sizeof(p->regs);
195#ifdef HAVE_ONIG_REGION_MEMSIZE
197#endif
198 return size;
199}
200
201static const rb_data_type_t strscanner_type = {
202 "StringScanner",
203 {strscan_mark, strscan_free, strscan_memsize},
205};
206
207static VALUE
208strscan_s_allocate(VALUE klass)
209{
210 struct strscanner *p;
211 VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p);
212
214 onig_region_init(&(p->regs));
215 p->str = Qnil;
216 p->regex = Qnil;
217 return obj;
218}
219
220/*
221 * call-seq:
222 * StringScanner.new(string, fixed_anchor: false)
223 * StringScanner.new(string, dup = false)
224 *
225 * Creates a new StringScanner object to scan over the given +string+.
226 *
227 * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
228 * the string. Otherwise, +\A+ always matches the current position.
229 *
230 * +dup+ argument is obsolete and not used now.
231 */
232static VALUE
233strscan_initialize(int argc, VALUE *argv, VALUE self)
234{
235 struct strscanner *p;
236 VALUE str, options;
237
238 p = check_strscan(self);
239 rb_scan_args(argc, argv, "11", &str, &options);
240 options = rb_check_hash_type(options);
241 if (!NIL_P(options)) {
242 VALUE fixed_anchor;
243 ID keyword_ids[1];
244 keyword_ids[0] = rb_intern("fixed_anchor");
245 rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
246 if (fixed_anchor == Qundef) {
247 p->fixed_anchor_p = false;
248 }
249 else {
250 p->fixed_anchor_p = RTEST(fixed_anchor);
251 }
252 }
253 else {
254 p->fixed_anchor_p = false;
255 }
257 p->str = str;
258
259 return self;
260}
261
262static struct strscanner *
263check_strscan(VALUE obj)
264{
265 return rb_check_typeddata(obj, &strscanner_type);
266}
267
268/*
269 * call-seq:
270 * dup
271 * clone
272 *
273 * Duplicates a StringScanner object.
274 */
275static VALUE
276strscan_init_copy(VALUE vself, VALUE vorig)
277{
278 struct strscanner *self, *orig;
279
280 self = check_strscan(vself);
281 orig = check_strscan(vorig);
282 if (self != orig) {
283 self->flags = orig->flags;
284 self->str = orig->str;
285 self->prev = orig->prev;
286 self->curr = orig->curr;
287 if (rb_reg_region_copy(&self->regs, &orig->regs))
288 rb_memerror();
289 RB_GC_GUARD(vorig);
290 }
291
292 return vself;
293}
294
295/* =======================================================================
296 Instance Methods
297 ======================================================================= */
298
299/*
300 * call-seq: StringScanner.must_C_version
301 *
302 * This method is defined for backward compatibility.
303 */
304static VALUE
305strscan_s_mustc(VALUE self)
306{
307 return self;
308}
309
310/*
311 * Reset the scan pointer (index 0) and clear matching data.
312 */
313static VALUE
314strscan_reset(VALUE self)
315{
316 struct strscanner *p;
317
318 GET_SCANNER(self, p);
319 p->curr = 0;
321 return self;
322}
323
324/*
325 * call-seq:
326 * terminate
327 * clear
328 *
329 * Sets the scan pointer to the end of the string and clear matching data.
330 */
331static VALUE
332strscan_terminate(VALUE self)
333{
334 struct strscanner *p;
335
336 GET_SCANNER(self, p);
337 p->curr = S_LEN(p);
339 return self;
340}
341
342/*
343 * Equivalent to #terminate.
344 * This method is obsolete; use #terminate instead.
345 */
346static VALUE
347strscan_clear(VALUE self)
348{
349 rb_warning("StringScanner#clear is obsolete; use #terminate instead");
350 return strscan_terminate(self);
351}
352
353/*
354 * Returns the string being scanned.
355 */
356static VALUE
357strscan_get_string(VALUE self)
358{
359 struct strscanner *p;
360
361 GET_SCANNER(self, p);
362 return p->str;
363}
364
365/*
366 * call-seq: string=(str)
367 *
368 * Changes the string being scanned to +str+ and resets the scanner.
369 * Returns +str+.
370 */
371static VALUE
372strscan_set_string(VALUE self, VALUE str)
373{
374 struct strscanner *p = check_strscan(self);
375
377 p->str = str;
378 p->curr = 0;
380 return str;
381}
382
383/*
384 * call-seq:
385 * concat(str)
386 * <<(str)
387 *
388 * Appends +str+ to the string being scanned.
389 * This method does not affect scan pointer.
390 *
391 * s = StringScanner.new("Fri Dec 12 1975 14:39")
392 * s.scan(/Fri /)
393 * s << " +1000 GMT"
394 * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
395 * s.scan(/Dec/) # -> "Dec"
396 */
397static VALUE
398strscan_concat(VALUE self, VALUE str)
399{
400 struct strscanner *p;
401
402 GET_SCANNER(self, p);
404 rb_str_append(p->str, str);
405 return self;
406}
407
408/*
409 * Returns the byte position of the scan pointer. In the 'reset' position, this
410 * value is zero. In the 'terminated' position (i.e. the string is exhausted),
411 * this value is the bytesize of the string.
412 *
413 * In short, it's a 0-based index into bytes of the string.
414 *
415 * s = StringScanner.new('test string')
416 * s.pos # -> 0
417 * s.scan_until /str/ # -> "test str"
418 * s.pos # -> 8
419 * s.terminate # -> #<StringScanner fin>
420 * s.pos # -> 11
421 */
422static VALUE
423strscan_get_pos(VALUE self)
424{
425 struct strscanner *p;
426
427 GET_SCANNER(self, p);
428 return INT2FIX(p->curr);
429}
430
431/*
432 * Returns the character position of the scan pointer. In the 'reset' position, this
433 * value is zero. In the 'terminated' position (i.e. the string is exhausted),
434 * this value is the size of the string.
435 *
436 * In short, it's a 0-based index into the string.
437 *
438 * s = StringScanner.new("abcädeföghi")
439 * s.charpos # -> 0
440 * s.scan_until(/ä/) # -> "abcä"
441 * s.pos # -> 5
442 * s.charpos # -> 4
443 */
444static VALUE
445strscan_get_charpos(VALUE self)
446{
447 struct strscanner *p;
448
449 GET_SCANNER(self, p);
450
451 return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str)));
452}
453
454/*
455 * call-seq: pos=(n)
456 *
457 * Sets the byte position of the scan pointer.
458 *
459 * s = StringScanner.new('test string')
460 * s.pos = 7 # -> 7
461 * s.rest # -> "ring"
462 */
463static VALUE
464strscan_set_pos(VALUE self, VALUE v)
465{
466 struct strscanner *p;
467 long i;
468
469 GET_SCANNER(self, p);
470 i = NUM2INT(v);
471 if (i < 0) i += S_LEN(p);
472 if (i < 0) rb_raise(rb_eRangeError, "index out of range");
473 if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
474 p->curr = i;
475 return LONG2NUM(i);
476}
477
478static inline UChar *
479match_target(struct strscanner *p)
480{
481 if (p->fixed_anchor_p) {
482 return (UChar *)S_PBEG(p);
483 }
484 else
485 {
486 return (UChar *)CURPTR(p);
487 }
488}
489
490static inline void
491set_registers(struct strscanner *p, size_t length)
492{
493 const int at = 0;
494 OnigRegion *regs = &(p->regs);
496 if (onig_region_set(regs, at, 0, 0)) return;
497 if (p->fixed_anchor_p) {
498 regs->beg[at] = p->curr;
499 regs->end[at] = p->curr + length;
500 }
501 else
502 {
503 regs->end[at] = length;
504 }
505}
506
507static inline void
508succ(struct strscanner *p)
509{
510 if (p->fixed_anchor_p) {
511 p->curr = p->regs.end[0];
512 }
513 else
514 {
515 p->curr += p->regs.end[0];
516 }
517}
518
519static inline long
520last_match_length(struct strscanner *p)
521{
522 if (p->fixed_anchor_p) {
523 return p->regs.end[0] - p->prev;
524 }
525 else
526 {
527 return p->regs.end[0];
528 }
529}
530
531static inline long
532adjust_register_position(struct strscanner *p, long position)
533{
534 if (p->fixed_anchor_p) {
535 return position;
536 }
537 else {
538 return p->prev + position;
539 }
540}
541
542static VALUE
543strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
544{
545 struct strscanner *p;
546
547 if (headonly) {
548 if (!RB_TYPE_P(pattern, T_REGEXP)) {
549 StringValue(pattern);
550 }
551 }
552 else {
553 Check_Type(pattern, T_REGEXP);
554 }
555 GET_SCANNER(self, p);
556
558 if (S_RESTLEN(p) < 0) {
559 return Qnil;
560 }
561
562 if (RB_TYPE_P(pattern, T_REGEXP)) {
564 regex_t *re;
565 long ret;
566 int tmpreg;
567
568 p->regex = pattern;
569 re = rb_reg_prepare_re(pattern, p->str);
570 tmpreg = re != RREGEXP_PTR(pattern);
571 if (!tmpreg) RREGEXP(pattern)->usecnt++;
572
573 if (headonly) {
574 ret = onig_match(re,
575 match_target(p),
576 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
577 (UChar* )CURPTR(p),
578 &(p->regs),
580 }
581 else {
582 ret = onig_search(re,
583 match_target(p),
584 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
585 (UChar* )CURPTR(p),
586 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
587 &(p->regs),
589 }
590 if (!tmpreg) RREGEXP(pattern)->usecnt--;
591 if (tmpreg) {
592 if (RREGEXP(pattern)->usecnt) {
593 onig_free(re);
594 }
595 else {
596 onig_free(RREGEXP_PTR(pattern));
597 RREGEXP_PTR(pattern) = re;
598 }
599 }
600
601 if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
602 if (ret < 0) {
603 /* not matched */
604 return Qnil;
605 }
606 }
607 else {
608 rb_enc_check(p->str, pattern);
609 if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
610 return Qnil;
611 }
612 if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
613 return Qnil;
614 }
615 set_registers(p, RSTRING_LEN(pattern));
616 }
617
618 MATCHED(p);
619 p->prev = p->curr;
620
621 if (succptr) {
622 succ(p);
623 }
624 {
625 const long length = last_match_length(p);
626 if (getstr) {
627 return extract_beg_len(p, p->prev, length);
628 }
629 else {
630 return INT2FIX(length);
631 }
632 }
633}
634
635/*
636 * call-seq: scan(pattern) => String
637 *
638 * Tries to match with +pattern+ at the current position. If there's a match,
639 * the scanner advances the "scan pointer" and returns the matched string.
640 * Otherwise, the scanner returns +nil+.
641 *
642 * s = StringScanner.new('test string')
643 * p s.scan(/\w+/) # -> "test"
644 * p s.scan(/\w+/) # -> nil
645 * p s.scan(/\s+/) # -> " "
646 * p s.scan("str") # -> "str"
647 * p s.scan(/\w+/) # -> "ing"
648 * p s.scan(/./) # -> nil
649 *
650 */
651static VALUE
652strscan_scan(VALUE self, VALUE re)
653{
654 return strscan_do_scan(self, re, 1, 1, 1);
655}
656
657/*
658 * call-seq: match?(pattern)
659 *
660 * Tests whether the given +pattern+ is matched from the current scan pointer.
661 * Returns the length of the match, or +nil+. The scan pointer is not advanced.
662 *
663 * s = StringScanner.new('test string')
664 * p s.match?(/\w+/) # -> 4
665 * p s.match?(/\w+/) # -> 4
666 * p s.match?("test") # -> 4
667 * p s.match?(/\s+/) # -> nil
668 */
669static VALUE
670strscan_match_p(VALUE self, VALUE re)
671{
672 return strscan_do_scan(self, re, 0, 0, 1);
673}
674
675/*
676 * call-seq: skip(pattern)
677 *
678 * Attempts to skip over the given +pattern+ beginning with the scan pointer.
679 * If it matches, the scan pointer is advanced to the end of the match, and the
680 * length of the match is returned. Otherwise, +nil+ is returned.
681 *
682 * It's similar to #scan, but without returning the matched string.
683 *
684 * s = StringScanner.new('test string')
685 * p s.skip(/\w+/) # -> 4
686 * p s.skip(/\w+/) # -> nil
687 * p s.skip(/\s+/) # -> 1
688 * p s.skip("st") # -> 2
689 * p s.skip(/\w+/) # -> 4
690 * p s.skip(/./) # -> nil
691 *
692 */
693static VALUE
694strscan_skip(VALUE self, VALUE re)
695{
696 return strscan_do_scan(self, re, 1, 0, 1);
697}
698
699/*
700 * call-seq: check(pattern)
701 *
702 * This returns the value that #scan would return, without advancing the scan
703 * pointer. The match register is affected, though.
704 *
705 * s = StringScanner.new("Fri Dec 12 1975 14:39")
706 * s.check /Fri/ # -> "Fri"
707 * s.pos # -> 0
708 * s.matched # -> "Fri"
709 * s.check /12/ # -> nil
710 * s.matched # -> nil
711 *
712 * Mnemonic: it "checks" to see whether a #scan will return a value.
713 */
714static VALUE
715strscan_check(VALUE self, VALUE re)
716{
717 return strscan_do_scan(self, re, 0, 1, 1);
718}
719
720/*
721 * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
722 *
723 * Tests whether the given +pattern+ is matched from the current scan pointer.
724 * Advances the scan pointer if +advance_pointer_p+ is true.
725 * Returns the matched string if +return_string_p+ is true.
726 * The match register is affected.
727 *
728 * "full" means "#scan with full parameters".
729 */
730static VALUE
731strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
732{
733 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
734}
735
736/*
737 * call-seq: scan_until(pattern)
738 *
739 * Scans the string _until_ the +pattern+ is matched. Returns the substring up
740 * to and including the end of the match, advancing the scan pointer to that
741 * location. If there is no match, +nil+ is returned.
742 *
743 * s = StringScanner.new("Fri Dec 12 1975 14:39")
744 * s.scan_until(/1/) # -> "Fri Dec 1"
745 * s.pre_match # -> "Fri Dec "
746 * s.scan_until(/XYZ/) # -> nil
747 */
748static VALUE
749strscan_scan_until(VALUE self, VALUE re)
750{
751 return strscan_do_scan(self, re, 1, 1, 0);
752}
753
754/*
755 * call-seq: exist?(pattern)
756 *
757 * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
758 * without advancing the scan pointer. This predicates whether a #scan_until
759 * will return a value.
760 *
761 * s = StringScanner.new('test string')
762 * s.exist? /s/ # -> 3
763 * s.scan /test/ # -> "test"
764 * s.exist? /s/ # -> 2
765 * s.exist? /e/ # -> nil
766 */
767static VALUE
768strscan_exist_p(VALUE self, VALUE re)
769{
770 return strscan_do_scan(self, re, 0, 0, 0);
771}
772
773/*
774 * call-seq: skip_until(pattern)
775 *
776 * Advances the scan pointer until +pattern+ is matched and consumed. Returns
777 * the number of bytes advanced, or +nil+ if no match was found.
778 *
779 * Look ahead to match +pattern+, and advance the scan pointer to the _end_
780 * of the match. Return the number of characters advanced, or +nil+ if the
781 * match was unsuccessful.
782 *
783 * It's similar to #scan_until, but without returning the intervening string.
784 *
785 * s = StringScanner.new("Fri Dec 12 1975 14:39")
786 * s.skip_until /12/ # -> 10
787 * s #
788 */
789static VALUE
790strscan_skip_until(VALUE self, VALUE re)
791{
792 return strscan_do_scan(self, re, 1, 0, 0);
793}
794
795/*
796 * call-seq: check_until(pattern)
797 *
798 * This returns the value that #scan_until would return, without advancing the
799 * scan pointer. The match register is affected, though.
800 *
801 * s = StringScanner.new("Fri Dec 12 1975 14:39")
802 * s.check_until /12/ # -> "Fri Dec 12"
803 * s.pos # -> 0
804 * s.matched # -> 12
805 *
806 * Mnemonic: it "checks" to see whether a #scan_until will return a value.
807 */
808static VALUE
809strscan_check_until(VALUE self, VALUE re)
810{
811 return strscan_do_scan(self, re, 0, 1, 0);
812}
813
814/*
815 * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
816 *
817 * Scans the string _until_ the +pattern+ is matched.
818 * Advances the scan pointer if +advance_pointer_p+, otherwise not.
819 * Returns the matched string if +return_string_p+ is true, otherwise
820 * returns the number of bytes advanced.
821 * This method does affect the match register.
822 */
823static VALUE
824strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
825{
826 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
827}
828
829static void
830adjust_registers_to_matched(struct strscanner *p)
831{
832 onig_region_clear(&(p->regs));
833 if (p->fixed_anchor_p) {
834 onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
835 }
836 else {
837 onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
838 }
839}
840
841/*
842 * Scans one character and returns it.
843 * This method is multibyte character sensitive.
844 *
845 * s = StringScanner.new("ab")
846 * s.getch # => "a"
847 * s.getch # => "b"
848 * s.getch # => nil
849 *
850 * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
851 * s.getch # => "\x{A4A2}" # Japanese hira-kana "A" in EUC-JP
852 * s.getch # => nil
853 */
854static VALUE
855strscan_getch(VALUE self)
856{
857 struct strscanner *p;
858 long len;
859
860 GET_SCANNER(self, p);
862 if (EOS_P(p))
863 return Qnil;
864
866 len = minl(len, S_RESTLEN(p));
867 p->prev = p->curr;
868 p->curr += len;
869 MATCHED(p);
870 adjust_registers_to_matched(p);
871 return extract_range(p,
872 adjust_register_position(p, p->regs.beg[0]),
873 adjust_register_position(p, p->regs.end[0]));
874}
875
876/*
877 * Scans one byte and returns it.
878 * This method is not multibyte character sensitive.
879 * See also: #getch.
880 *
881 * s = StringScanner.new('ab')
882 * s.get_byte # => "a"
883 * s.get_byte # => "b"
884 * s.get_byte # => nil
885 *
886 * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
887 * s.get_byte # => "\xA4"
888 * s.get_byte # => "\xA2"
889 * s.get_byte # => nil
890 */
891static VALUE
892strscan_get_byte(VALUE self)
893{
894 struct strscanner *p;
895
896 GET_SCANNER(self, p);
898 if (EOS_P(p))
899 return Qnil;
900
901 p->prev = p->curr;
902 p->curr++;
903 MATCHED(p);
904 adjust_registers_to_matched(p);
905 return extract_range(p,
906 adjust_register_position(p, p->regs.beg[0]),
907 adjust_register_position(p, p->regs.end[0]));
908}
909
910/*
911 * Equivalent to #get_byte.
912 * This method is obsolete; use #get_byte instead.
913 */
914static VALUE
915strscan_getbyte(VALUE self)
916{
917 rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
918 return strscan_get_byte(self);
919}
920
921/*
922 * call-seq: peek(len)
923 *
924 * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
925 * advancing the scan pointer.
926 *
927 * s = StringScanner.new('test string')
928 * s.peek(7) # => "test st"
929 * s.peek(7) # => "test st"
930 *
931 */
932static VALUE
933strscan_peek(VALUE self, VALUE vlen)
934{
935 struct strscanner *p;
936 long len;
937
938 GET_SCANNER(self, p);
939
940 len = NUM2LONG(vlen);
941 if (EOS_P(p))
942 return str_new(p, "", 0);
943
944 len = minl(len, S_RESTLEN(p));
945 return extract_beg_len(p, p->curr, len);
946}
947
948/*
949 * Equivalent to #peek.
950 * This method is obsolete; use #peek instead.
951 */
952static VALUE
953strscan_peep(VALUE self, VALUE vlen)
954{
955 rb_warning("StringScanner#peep is obsolete; use #peek instead");
956 return strscan_peek(self, vlen);
957}
958
959/*
960 * Sets the scan pointer to the previous position. Only one previous position is
961 * remembered, and it changes with each scanning operation.
962 *
963 * s = StringScanner.new('test string')
964 * s.scan(/\w+/) # => "test"
965 * s.unscan
966 * s.scan(/../) # => "te"
967 * s.scan(/\d/) # => nil
968 * s.unscan # ScanError: unscan failed: previous match record not exist
969 */
970static VALUE
971strscan_unscan(VALUE self)
972{
973 struct strscanner *p;
974
975 GET_SCANNER(self, p);
976 if (! MATCHED_P(p))
977 rb_raise(ScanError, "unscan failed: previous match record not exist");
978 p->curr = p->prev;
980 return self;
981}
982
983/*
984 * Returns +true+ if and only if the scan pointer is at the beginning of the line.
985 *
986 * s = StringScanner.new("test\ntest\n")
987 * s.bol? # => true
988 * s.scan(/te/)
989 * s.bol? # => false
990 * s.scan(/st\n/)
991 * s.bol? # => true
992 * s.terminate
993 * s.bol? # => true
994 */
995static VALUE
996strscan_bol_p(VALUE self)
997{
998 struct strscanner *p;
999
1000 GET_SCANNER(self, p);
1001 if (CURPTR(p) > S_PEND(p)) return Qnil;
1002 if (p->curr == 0) return Qtrue;
1003 return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
1004}
1005
1006/*
1007 * Returns +true+ if the scan pointer is at the end of the string.
1008 *
1009 * s = StringScanner.new('test string')
1010 * p s.eos? # => false
1011 * s.scan(/test/)
1012 * p s.eos? # => false
1013 * s.terminate
1014 * p s.eos? # => true
1015 */
1016static VALUE
1017strscan_eos_p(VALUE self)
1018{
1019 struct strscanner *p;
1020
1021 GET_SCANNER(self, p);
1022 return EOS_P(p) ? Qtrue : Qfalse;
1023}
1024
1025/*
1026 * Equivalent to #eos?.
1027 * This method is obsolete, use #eos? instead.
1028 */
1029static VALUE
1030strscan_empty_p(VALUE self)
1031{
1032 rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
1033 return strscan_eos_p(self);
1034}
1035
1036/*
1037 * Returns true if and only if there is more data in the string. See #eos?.
1038 * This method is obsolete; use #eos? instead.
1039 *
1040 * s = StringScanner.new('test string')
1041 * s.eos? # These two
1042 * s.rest? # are opposites.
1043 */
1044static VALUE
1045strscan_rest_p(VALUE self)
1046{
1047 struct strscanner *p;
1048
1049 GET_SCANNER(self, p);
1050 return EOS_P(p) ? Qfalse : Qtrue;
1051}
1052
1053/*
1054 * Returns +true+ if and only if the last match was successful.
1055 *
1056 * s = StringScanner.new('test string')
1057 * s.match?(/\w+/) # => 4
1058 * s.matched? # => true
1059 * s.match?(/\d+/) # => nil
1060 * s.matched? # => false
1061 */
1062static VALUE
1063strscan_matched_p(VALUE self)
1064{
1065 struct strscanner *p;
1066
1067 GET_SCANNER(self, p);
1068 return MATCHED_P(p) ? Qtrue : Qfalse;
1069}
1070
1071/*
1072 * Returns the last matched string.
1073 *
1074 * s = StringScanner.new('test string')
1075 * s.match?(/\w+/) # -> 4
1076 * s.matched # -> "test"
1077 */
1078static VALUE
1079strscan_matched(VALUE self)
1080{
1081 struct strscanner *p;
1082
1083 GET_SCANNER(self, p);
1084 if (! MATCHED_P(p)) return Qnil;
1085 return extract_range(p,
1086 adjust_register_position(p, p->regs.beg[0]),
1087 adjust_register_position(p, p->regs.end[0]));
1088}
1089
1090/*
1091 * Returns the size of the most recent match in bytes, or +nil+ if there
1092 * was no recent match. This is different than <tt>matched.size</tt>,
1093 * which will return the size in characters.
1094 *
1095 * s = StringScanner.new('test string')
1096 * s.check /\w+/ # -> "test"
1097 * s.matched_size # -> 4
1098 * s.check /\d+/ # -> nil
1099 * s.matched_size # -> nil
1100 */
1101static VALUE
1102strscan_matched_size(VALUE self)
1103{
1104 struct strscanner *p;
1105
1106 GET_SCANNER(self, p);
1107 if (! MATCHED_P(p)) return Qnil;
1108 return LONG2NUM(p->regs.end[0] - p->regs.beg[0]);
1109}
1110
1111static int
1112name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
1113{
1114 int num;
1115
1117 (const unsigned char* )name, (const unsigned char* )name_end, regs);
1118 if (num >= 1) {
1119 return num;
1120 }
1121 else {
1122 rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
1123 rb_long2int(name_end - name), name);
1124 }
1125
1127}
1128
1129/*
1130 * call-seq: [](n)
1131 *
1132 * Returns the n-th subgroup in the most recent match.
1133 *
1134 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1135 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1136 * s[0] # -> "Fri Dec 12 "
1137 * s[1] # -> "Fri"
1138 * s[2] # -> "Dec"
1139 * s[3] # -> "12"
1140 * s.post_match # -> "1975 14:39"
1141 * s.pre_match # -> ""
1142 *
1143 * s.reset
1144 * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1145 * s[0] # -> "Fri Dec 12 "
1146 * s[1] # -> "Fri"
1147 * s[2] # -> "Dec"
1148 * s[3] # -> "12"
1149 * s[:wday] # -> "Fri"
1150 * s[:month] # -> "Dec"
1151 * s[:day] # -> "12"
1152 * s.post_match # -> "1975 14:39"
1153 * s.pre_match # -> ""
1154 */
1155static VALUE
1156strscan_aref(VALUE self, VALUE idx)
1157{
1158 const char *name;
1159 struct strscanner *p;
1160 long i;
1161
1162 GET_SCANNER(self, p);
1163 if (! MATCHED_P(p)) return Qnil;
1164
1165 switch (TYPE(idx)) {
1166 case T_SYMBOL:
1167 idx = rb_sym2str(idx);
1168 /* fall through */
1169 case T_STRING:
1170 if (!RTEST(p->regex)) return Qnil;
1171 RSTRING_GETMEM(idx, name, i);
1172 i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1173 break;
1174 default:
1175 i = NUM2LONG(idx);
1176 }
1177
1178 if (i < 0)
1179 i += p->regs.num_regs;
1180 if (i < 0) return Qnil;
1181 if (i >= p->regs.num_regs) return Qnil;
1182 if (p->regs.beg[i] == -1) return Qnil;
1183
1184 return extract_range(p,
1185 adjust_register_position(p, p->regs.beg[i]),
1186 adjust_register_position(p, p->regs.end[i]));
1187}
1188
1189/*
1190 * call-seq: size
1191 *
1192 * Returns the amount of subgroups in the most recent match.
1193 * The full match counts as a subgroup.
1194 *
1195 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1196 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1197 * s.size # -> 4
1198 */
1199static VALUE
1200strscan_size(VALUE self)
1201{
1202 struct strscanner *p;
1203
1204 GET_SCANNER(self, p);
1205 if (! MATCHED_P(p)) return Qnil;
1206 return INT2FIX(p->regs.num_regs);
1207}
1208
1209/*
1210 * call-seq: captures
1211 *
1212 * Returns the subgroups in the most recent match (not including the full match).
1213 * If nothing was priorly matched, it returns nil.
1214 *
1215 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1216 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1217 * s.captures # -> ["Fri", "Dec", "12"]
1218 * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1219 * s.captures # -> nil
1220 */
1221static VALUE
1222strscan_captures(VALUE self)
1223{
1224 struct strscanner *p;
1225 int i, num_regs;
1226 VALUE new_ary;
1227
1228 GET_SCANNER(self, p);
1229 if (! MATCHED_P(p)) return Qnil;
1230
1231 num_regs = p->regs.num_regs;
1232 new_ary = rb_ary_new2(num_regs);
1233
1234 for (i = 1; i < num_regs; i++) {
1235 VALUE str = extract_range(p,
1236 adjust_register_position(p, p->regs.beg[i]),
1237 adjust_register_position(p, p->regs.end[i]));
1238 rb_ary_push(new_ary, str);
1239 }
1240
1241 return new_ary;
1242}
1243
1244/*
1245 * call-seq:
1246 * scanner.values_at( i1, i2, ... iN ) -> an_array
1247 *
1248 * Returns the subgroups in the most recent match at the given indices.
1249 * If nothing was priorly matched, it returns nil.
1250 *
1251 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1252 * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1253 * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1254 * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1255 * s.values_at 0, -1, 5, 2 # -> nil
1256 */
1257
1258static VALUE
1259strscan_values_at(int argc, VALUE *argv, VALUE self)
1260{
1261 struct strscanner *p;
1262 long i;
1263 VALUE new_ary;
1264
1265 GET_SCANNER(self, p);
1266 if (! MATCHED_P(p)) return Qnil;
1267
1268 new_ary = rb_ary_new2(argc);
1269 for (i = 0; i<argc; i++) {
1270 rb_ary_push(new_ary, strscan_aref(self, argv[i]));
1271 }
1272
1273 return new_ary;
1274}
1275
1276/*
1277 * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1278 *
1279 * s = StringScanner.new('test string')
1280 * s.scan(/\w+/) # -> "test"
1281 * s.scan(/\s+/) # -> " "
1282 * s.pre_match # -> "test"
1283 * s.post_match # -> "string"
1284 */
1285static VALUE
1286strscan_pre_match(VALUE self)
1287{
1288 struct strscanner *p;
1289
1290 GET_SCANNER(self, p);
1291 if (! MATCHED_P(p)) return Qnil;
1292 return extract_range(p,
1293 0,
1294 adjust_register_position(p, p->regs.beg[0]));
1295}
1296
1297/*
1298 * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1299 *
1300 * s = StringScanner.new('test string')
1301 * s.scan(/\w+/) # -> "test"
1302 * s.scan(/\s+/) # -> " "
1303 * s.pre_match # -> "test"
1304 * s.post_match # -> "string"
1305 */
1306static VALUE
1307strscan_post_match(VALUE self)
1308{
1309 struct strscanner *p;
1310
1311 GET_SCANNER(self, p);
1312 if (! MATCHED_P(p)) return Qnil;
1313 return extract_range(p,
1314 adjust_register_position(p, p->regs.end[0]),
1315 S_LEN(p));
1316}
1317
1318/*
1319 * Returns the "rest" of the string (i.e. everything after the scan pointer).
1320 * If there is no more data (eos? = true), it returns <tt>""</tt>.
1321 */
1322static VALUE
1323strscan_rest(VALUE self)
1324{
1325 struct strscanner *p;
1326
1327 GET_SCANNER(self, p);
1328 if (EOS_P(p)) {
1329 return str_new(p, "", 0);
1330 }
1331 return extract_range(p, p->curr, S_LEN(p));
1332}
1333
1334/*
1335 * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
1336 */
1337static VALUE
1338strscan_rest_size(VALUE self)
1339{
1340 struct strscanner *p;
1341 long i;
1342
1343 GET_SCANNER(self, p);
1344 if (EOS_P(p)) {
1345 return INT2FIX(0);
1346 }
1347 i = S_RESTLEN(p);
1348 return INT2FIX(i);
1349}
1350
1351/*
1352 * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1353 * This method is obsolete; use #rest_size instead.
1354 */
1355static VALUE
1356strscan_restsize(VALUE self)
1357{
1358 rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
1359 return strscan_rest_size(self);
1360}
1361
1362#define INSPECT_LENGTH 5
1363
1364/*
1365 * Returns a string that represents the StringScanner object, showing:
1366 * - the current position
1367 * - the size of the string
1368 * - the characters surrounding the scan pointer
1369 *
1370 * s = StringScanner.new("Fri Dec 12 1975 14:39")
1371 * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1372 * s.scan_until /12/ # -> "Fri Dec 12"
1373 * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
1374 */
1375static VALUE
1376strscan_inspect(VALUE self)
1377{
1378 struct strscanner *p;
1379 VALUE a, b;
1380
1381 p = check_strscan(self);
1382 if (NIL_P(p->str)) {
1383 a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
1384 return a;
1385 }
1386 if (EOS_P(p)) {
1387 a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
1388 return a;
1389 }
1390 if (p->curr == 0) {
1391 b = inspect2(p);
1392 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
1393 rb_obj_class(self),
1394 p->curr, S_LEN(p),
1395 b);
1396 return a;
1397 }
1398 a = inspect1(p);
1399 b = inspect2(p);
1400 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
1401 rb_obj_class(self),
1402 p->curr, S_LEN(p),
1403 a, b);
1404 return a;
1405}
1406
1407static VALUE
1408inspect1(struct strscanner *p)
1409{
1410 VALUE str;
1411 long len;
1412
1413 if (p->curr == 0) return rb_str_new2("");
1414 if (p->curr > INSPECT_LENGTH) {
1415 str = rb_str_new_cstr("...");
1417 }
1418 else {
1419 str = rb_str_new(0, 0);
1420 len = p->curr;
1421 }
1422 rb_str_cat(str, CURPTR(p) - len, len);
1423 return rb_str_dump(str);
1424}
1425
1426static VALUE
1427inspect2(struct strscanner *p)
1428{
1429 VALUE str;
1430 long len;
1431
1432 if (EOS_P(p)) return rb_str_new2("");
1433 len = S_RESTLEN(p);
1434 if (len > INSPECT_LENGTH) {
1436 rb_str_cat2(str, "...");
1437 }
1438 else {
1439 str = rb_str_new(CURPTR(p), len);
1440 }
1441 return rb_str_dump(str);
1442}
1443
1444/*
1445 * call-seq:
1446 * scanner.fixed_anchor? -> true or false
1447 *
1448 * Whether +scanner+ uses fixed anchor mode or not.
1449 *
1450 * If fixed anchor mode is used, +\A+ always matches the beginning of
1451 * the string. Otherwise, +\A+ always matches the current position.
1452 */
1453static VALUE
1454strscan_fixed_anchor_p(VALUE self)
1455{
1456 struct strscanner *p;
1457 p = check_strscan(self);
1458 return p->fixed_anchor_p ? Qtrue : Qfalse;
1459}
1460
1461/* =======================================================================
1462 Ruby Interface
1463 ======================================================================= */
1464
1465/*
1466 * Document-class: StringScanner
1467 *
1468 * StringScanner provides for lexical scanning operations on a String. Here is
1469 * an example of its usage:
1470 *
1471 * s = StringScanner.new('This is an example string')
1472 * s.eos? # -> false
1473 *
1474 * p s.scan(/\w+/) # -> "This"
1475 * p s.scan(/\w+/) # -> nil
1476 * p s.scan(/\s+/) # -> " "
1477 * p s.scan(/\s+/) # -> nil
1478 * p s.scan(/\w+/) # -> "is"
1479 * s.eos? # -> false
1480 *
1481 * p s.scan(/\s+/) # -> " "
1482 * p s.scan(/\w+/) # -> "an"
1483 * p s.scan(/\s+/) # -> " "
1484 * p s.scan(/\w+/) # -> "example"
1485 * p s.scan(/\s+/) # -> " "
1486 * p s.scan(/\w+/) # -> "string"
1487 * s.eos? # -> true
1488 *
1489 * p s.scan(/\s+/) # -> nil
1490 * p s.scan(/\w+/) # -> nil
1491 *
1492 * Scanning a string means remembering the position of a <i>scan pointer</i>,
1493 * which is just an index. The point of scanning is to move forward a bit at
1494 * a time, so matches are sought after the scan pointer; usually immediately
1495 * after it.
1496 *
1497 * Given the string "test string", here are the pertinent scan pointer
1498 * positions:
1499 *
1500 * t e s t s t r i n g
1501 * 0 1 2 ... 1
1502 * 0
1503 *
1504 * When you #scan for a pattern (a regular expression), the match must occur
1505 * at the character after the scan pointer. If you use #scan_until, then the
1506 * match can occur anywhere after the scan pointer. In both cases, the scan
1507 * pointer moves <i>just beyond</i> the last character of the match, ready to
1508 * scan again from the next character onwards. This is demonstrated by the
1509 * example above.
1510 *
1511 * == Method Categories
1512 *
1513 * There are other methods besides the plain scanners. You can look ahead in
1514 * the string without actually scanning. You can access the most recent match.
1515 * You can modify the string being scanned, reset or terminate the scanner,
1516 * find out or change the position of the scan pointer, skip ahead, and so on.
1517 *
1518 * === Advancing the Scan Pointer
1519 *
1520 * - #getch
1521 * - #get_byte
1522 * - #scan
1523 * - #scan_until
1524 * - #skip
1525 * - #skip_until
1526 *
1527 * === Looking Ahead
1528 *
1529 * - #check
1530 * - #check_until
1531 * - #exist?
1532 * - #match?
1533 * - #peek
1534 *
1535 * === Finding Where we Are
1536 *
1537 * - #beginning_of_line? (<tt>#bol?</tt>)
1538 * - #eos?
1539 * - #rest?
1540 * - #rest_size
1541 * - #pos
1542 *
1543 * === Setting Where we Are
1544 *
1545 * - #reset
1546 * - #terminate
1547 * - #pos=
1548 *
1549 * === Match Data
1550 *
1551 * - #matched
1552 * - #matched?
1553 * - #matched_size
1554 * - <tt>#[]</tt>
1555 * - #pre_match
1556 * - #post_match
1557 *
1558 * === Miscellaneous
1559 *
1560 * - <tt><<</tt>
1561 * - #concat
1562 * - #string
1563 * - #string=
1564 * - #unscan
1565 *
1566 * There are aliases to several of the methods.
1567 */
1568void
1570{
1571#ifdef HAVE_RB_EXT_RACTOR_SAFE
1572 rb_ext_ractor_safe(true);
1573#endif
1574
1575#undef rb_intern
1576 ID id_scanerr = rb_intern("ScanError");
1577 VALUE tmp;
1578
1579 id_byteslice = rb_intern("byteslice");
1580
1581 StringScanner = rb_define_class("StringScanner", rb_cObject);
1582 ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1583 if (!rb_const_defined(rb_cObject, id_scanerr)) {
1584 rb_const_set(rb_cObject, id_scanerr, ScanError);
1585 }
1587 rb_obj_freeze(tmp);
1588 rb_const_set(StringScanner, rb_intern("Version"), tmp);
1589 tmp = rb_str_new2("$Id$");
1590 rb_obj_freeze(tmp);
1591 rb_const_set(StringScanner, rb_intern("Id"), tmp);
1592
1593 rb_define_alloc_func(StringScanner, strscan_s_allocate);
1594 rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
1595 rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
1596 rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
1597 rb_define_method(StringScanner, "reset", strscan_reset, 0);
1598 rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
1599 rb_define_method(StringScanner, "clear", strscan_clear, 0);
1600 rb_define_method(StringScanner, "string", strscan_get_string, 0);
1601 rb_define_method(StringScanner, "string=", strscan_set_string, 1);
1602 rb_define_method(StringScanner, "concat", strscan_concat, 1);
1603 rb_define_method(StringScanner, "<<", strscan_concat, 1);
1604 rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
1605 rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
1606 rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
1607 rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
1608 rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
1609
1610 rb_define_method(StringScanner, "scan", strscan_scan, 1);
1611 rb_define_method(StringScanner, "skip", strscan_skip, 1);
1612 rb_define_method(StringScanner, "match?", strscan_match_p, 1);
1613 rb_define_method(StringScanner, "check", strscan_check, 1);
1614 rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
1615
1616 rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
1617 rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
1618 rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
1619 rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
1620 rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
1621
1622 rb_define_method(StringScanner, "getch", strscan_getch, 0);
1623 rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1624 rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
1625 rb_define_method(StringScanner, "peek", strscan_peek, 1);
1626 rb_define_method(StringScanner, "peep", strscan_peep, 1);
1627
1628 rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1629
1630 rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
1631 rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
1632 rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
1633 rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
1634 rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
1635
1636 rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
1637 rb_define_method(StringScanner, "matched", strscan_matched, 0);
1638 rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
1639 rb_define_method(StringScanner, "[]", strscan_aref, 1);
1640 rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
1641 rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
1642 rb_define_method(StringScanner, "size", strscan_size, 0);
1643 rb_define_method(StringScanner, "captures", strscan_captures, 0);
1644 rb_define_method(StringScanner, "values_at", strscan_values_at, -1);
1645
1646 rb_define_method(StringScanner, "rest", strscan_rest, 0);
1647 rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
1648 rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1649
1650 rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1651
1652 rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1653}
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1301
#define UNREACHABLE
Definition: assume.h:30
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
Definition: cxxanyargs.hpp:653
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
Definition: cxxanyargs.hpp:668
#define rb_define_private_method(klass, mid, func, arity)
Defines klass#mid and makes it private.
Definition: cxxanyargs.hpp:660
struct RIMemo * ptr
Definition: debug.c:88
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:1070
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:1188
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:1089
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1218
big_t * num
Definition: enough.c:232
uint8_t len
Definition: escape.c:17
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
#define RSTRING_LEN(string)
Definition: fbuffer.h:22
#define RSTRING_PTR(string)
Definition: fbuffer.h:19
#define PRIsVALUE
Definition: function.c:10
void ruby_xfree(void *x)
Deallocates a storage instance.
Definition: gc.c:10914
void rb_memerror(void)
Definition: gc.c:10309
void rb_gc_mark(VALUE ptr)
Definition: gc.c:6112
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:748
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition: class.c:797
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:2296
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Definition: class.c:2085
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2917
VALUE rb_eStandardError
Definition: error.c:1054
VALUE rb_eRangeError
Definition: error.c:1061
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:1024
VALUE rb_eIndexError
Definition: error.c:1059
void rb_warning(const char *fmt,...)
Definition: error.c:439
VALUE rb_cObject
Object class.
Definition: object.c:49
VALUE rb_obj_class(VALUE)
Definition: object.c:245
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
Definition: object.c:1101
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:1860
long rb_enc_strlen(const char *, const char *, rb_encoding *)
Definition: string.c:1887
#define rb_ary_new2
Definition: array.h:72
void rb_ext_ractor_safe(bool flag)
Definition: load.c:1058
#define rb_str_new2
Definition: string.h:276
#define rb_str_cat2
Definition: string.h:285
#define rb_str_new(str, len)
Definition: string.h:213
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:2962
VALUE rb_str_dump(VALUE)
Definition: string.c:6311
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:3118
#define rb_str_new_cstr(str)
Definition: string.h:219
void rb_const_set(VALUE, ID, VALUE)
Definition: variable.c:3003
int rb_const_defined(VALUE, ID)
Definition: variable.c:2928
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
void rb_alias(VALUE, ID, ID)
Definition: vm_method.c:1926
VALUE rb_sym2str(VALUE)
Definition: symbol.c:927
ID rb_intern(const char *)
Definition: symbol.c:785
int rb_reg_region_copy(struct re_registers *, const struct re_registers *)
Definition: re.c:956
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Definition: re.c:1511
#define NUM2INT
Definition: int.h:44
voidpf void uLong size
Definition: ioapi.h:138
#define INT2FIX
Definition: long.h:48
#define LONG2NUM
Definition: long.h:50
#define rb_long2int
Definition: long.h:62
#define NUM2LONG
Definition: long.h:51
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
#define RB_GC_GUARD(v)
Definition: memory.h:91
const char * name
Definition: nkf.c:208
ONIG_EXTERN int onig_region_set(OnigRegion *region, int at, int beg, int end)
Definition: regexec.c:305
ONIG_EXTERN void onig_region_init(OnigRegion *region)
Definition: regexec.c:320
ONIG_EXTERN void onig_region_free(OnigRegion *region, int free_self)
Definition: regexec.c:343
#define UChar
Definition: onigmo.h:76
ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *start, const OnigUChar *range, OnigRegion *region, OnigOptionType option)
ONIG_EXTERN void onig_free(OnigRegex)
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
ONIG_EXTERN void onig_region_clear(OnigRegion *region)
Definition: regexec.c:235
ONIG_EXTERN int onig_name_to_backref_number(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, const OnigRegion *region)
#define ONIG_OPTION_NONE
Definition: onigmo.h:450
#define rb_enc_raise
Definition: parser.c:21
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5693
#define RREGEXP(obj)
Definition: rregexp.h:31
#define RREGEXP_PTR(obj)
Definition: rregexp.h:32
#define StringValue(v)
Definition: rstring.h:50
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: rstring.h:211
@ RUBY_TYPED_FREE_IMMEDIATELY
Definition: rtypeddata.h:62
#define TypedData_Make_Struct(klass, type, data_type, sval)
Definition: rtypeddata.h:122
int argc
Definition: ruby.c:240
char ** argv
Definition: ruby.c:241
#define Qundef
#define Qtrue
#define RTEST
#define Qnil
#define Qfalse
#define NIL_P
#define f
VALUE rb_sprintf(const char *,...)
Definition: sprintf.c:1203
#define _(args)
Definition: stdarg.h:31
C99 shim for <stdbool.h>
#define S_LEN(s)
Definition: strscan.c:63
#define S_PBEG(s)
Definition: strscan.c:62
#define STRSCAN_VERSION
Definition: strscan.c:25
#define S_RESTLEN(s)
Definition: strscan.c:66
#define S_PEND(s)
Definition: strscan.c:64
#define CLEAR_MATCH_STATUS(s)
Definition: strscan.c:60
#define CURPTR(s)
Definition: strscan.c:65
#define GET_SCANNER(obj, var)
Definition: strscan.c:70
#define MATCHED_P(s)
Definition: strscan.c:58
#define EOS_P(s)
Definition: strscan.c:68
#define INSPECT_LENGTH
Definition: strscan.c:1362
void Init_strscan(void)
Definition: strscan.c:1569
#define MATCHED(s)
Definition: strscan.c:59
OnigPosition * beg
Definition: onigmo.h:719
int num_regs
Definition: onigmo.h:718
OnigPosition * end
Definition: onigmo.h:720
struct re_registers regs
Definition: strscan.c:49
VALUE str
Definition: strscan.c:42
unsigned long flags
Definition: strscan.c:38
bool fixed_anchor_p
Definition: strscan.c:55
long curr
Definition: strscan.c:46
VALUE regex
Definition: strscan.c:52
long prev
Definition: strscan.c:45
unsigned long VALUE
Definition: value.h:38
unsigned long ID
Definition: value.h:39
#define TYPE(_)
Definition: value_type.h:105
#define T_STRING
Definition: value_type.h:77
#define T_SYMBOL
Definition: value_type.h:79
#define T_REGEXP
Definition: value_type.h:76