Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
pack.c
Go to the documentation of this file.
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/string.h"
23#include "internal/symbol.h"
24#include "internal/util.h"
25#include "internal/variable.h"
26
27#include "builtin.h"
28
29/*
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
36 */
37#ifdef HAVE_TRUE_LONG_LONG
38static const char natstr[] = "sSiIlLqQjJ";
39#else
40static const char natstr[] = "sSiIlLjJ";
41#endif
42static const char endstr[] = "sSiIlLqQjJ";
43
44#ifdef HAVE_TRUE_LONG_LONG
45/* It is intentional to use long long instead of LONG_LONG. */
46# define NATINT_LEN_Q NATINT_LEN(long long, 8)
47#else
48# define NATINT_LEN_Q 8
49#endif
50
51#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
52# define NATINT_PACK
53#endif
54
55#ifdef DYNAMIC_ENDIAN
56/* for universal binary of NEXTSTEP and MacOS X */
57/* useless since autoconf 2.63? */
58static int
59is_bigendian(void)
60{
61 static int init = 0;
62 static int endian_value;
63 char *p;
64
65 if (init) return endian_value;
66 init = 1;
67 p = (char*)&init;
68 return endian_value = p[0]?0:1;
69}
70# define BIGENDIAN_P() (is_bigendian())
71#elif defined(WORDS_BIGENDIAN)
72# define BIGENDIAN_P() 1
73#else
74# define BIGENDIAN_P() 0
75#endif
76
77#ifdef NATINT_PACK
78# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
79#else
80# define NATINT_LEN(type,len) ((int)sizeof(type))
81#endif
82
83typedef union {
84 float f;
86 char buf[4];
88typedef union {
89 double d;
91 char buf[8];
93#define swapf(x) swap32(x)
94#define swapd(x) swap64(x)
95
96#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
97#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
98#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
99#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
100#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
101#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
102#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
103#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
104
105#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
106#define HTONF(x) ((x).u = rb_htonf((x).u))
107#define HTOVF(x) ((x).u = rb_htovf((x).u))
108#define NTOHF(x) ((x).u = rb_ntohf((x).u))
109#define VTOHF(x) ((x).u = rb_vtohf((x).u))
110
111#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
112#define HTOND(x) ((x).u = rb_htond((x).u))
113#define HTOVD(x) ((x).u = rb_htovd((x).u))
114#define NTOHD(x) ((x).u = rb_ntohd((x).u))
115#define VTOHD(x) ((x).u = rb_vtohd((x).u))
116
117#define MAX_INTEGER_PACK_SIZE 8
118
119static const char toofew[] = "too few arguments";
120
121static void encodes(VALUE,const char*,long,int,int);
122static void qpencode(VALUE,VALUE,long);
123
124static unsigned long utf8_to_uv(const char*,long*);
125
126static ID id_associated;
127
128static void
129str_associate(VALUE str, VALUE add)
130{
131 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
132 rb_ivar_set(str, id_associated, add);
133}
134
135static VALUE
136str_associated(VALUE str)
137{
138 return rb_ivar_lookup(str, id_associated, Qfalse);
139}
140
141static void
142unknown_directive(const char *mode, char type, VALUE fmt)
143{
144 VALUE f;
145 char unknown[5];
146
147 if (ISPRINT(type)) {
148 unknown[0] = type;
149 unknown[1] = '\0';
150 }
151 else {
152 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
153 }
155 if (f != fmt) {
156 fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
157 }
158 rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
159 mode, unknown, fmt);
160}
161
162static float
163VALUE_to_float(VALUE obj)
164{
165 VALUE v = rb_to_float(obj);
166 double d = RFLOAT_VALUE(v);
167
168 if (isnan(d)) {
169 return NAN;
170 }
171 else if (d < -FLT_MAX) {
172 return -INFINITY;
173 }
174 else if (d <= FLT_MAX) {
175 return d;
176 }
177 else {
178 return INFINITY;
179 }
180}
181
182static VALUE
183pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
184{
185 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
186 static const char spc10[] = " ";
187 const char *p, *pend;
188 VALUE res, from, associates = 0;
189 char type;
190 long len, idx, plen;
191 const char *ptr;
192 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
193#ifdef NATINT_PACK
194 int natint; /* native integer */
195#endif
196 int integer_size, bigendian_p;
197
198 StringValue(fmt);
199 p = RSTRING_PTR(fmt);
200 pend = p + RSTRING_LEN(fmt);
201
202 if (NIL_P(buffer)) {
203 res = rb_str_buf_new(0);
204 }
205 else {
206 if (!RB_TYPE_P(buffer, T_STRING))
207 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
208 res = buffer;
209 }
210
211 idx = 0;
212
213#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
214#define MORE_ITEM (idx < RARRAY_LEN(ary))
215#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
216#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
217
218 while (p < pend) {
219 int explicit_endian = 0;
220 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
221 rb_raise(rb_eRuntimeError, "format string modified");
222 }
223 type = *p++; /* get data type */
224#ifdef NATINT_PACK
225 natint = 0;
226#endif
227
228 if (ISSPACE(type)) continue;
229 if (type == '#') {
230 while ((p < pend) && (*p != '\n')) {
231 p++;
232 }
233 continue;
234 }
235
236 {
237 modifiers:
238 switch (*p) {
239 case '_':
240 case '!':
241 if (strchr(natstr, type)) {
242#ifdef NATINT_PACK
243 natint = 1;
244#endif
245 p++;
246 }
247 else {
248 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
249 }
250 goto modifiers;
251
252 case '<':
253 case '>':
254 if (!strchr(endstr, type)) {
255 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
256 }
257 if (explicit_endian) {
258 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
259 }
260 explicit_endian = *p++;
261 goto modifiers;
262 }
263 }
264
265 if (*p == '*') { /* set data length */
266 len = strchr("@Xxu", type) ? 0
267 : strchr("PMm", type) ? 1
268 : RARRAY_LEN(ary) - idx;
269 p++;
270 }
271 else if (ISDIGIT(*p)) {
272 errno = 0;
273 len = STRTOUL(p, (char**)&p, 10);
274 if (errno) {
275 rb_raise(rb_eRangeError, "pack length too big");
276 }
277 }
278 else {
279 len = 1;
280 }
281
282 switch (type) {
283 case 'U':
284 /* if encoding is US-ASCII, upgrade to UTF-8 */
285 if (enc_info == 1) enc_info = 2;
286 break;
287 case 'm': case 'M': case 'u':
288 /* keep US-ASCII (do nothing) */
289 break;
290 default:
291 /* fall back to BINARY */
292 enc_info = 0;
293 break;
294 }
295 switch (type) {
296 case 'A': case 'a': case 'Z':
297 case 'B': case 'b':
298 case 'H': case 'h':
299 from = NEXTFROM;
300 if (NIL_P(from)) {
301 ptr = "";
302 plen = 0;
303 }
304 else {
305 StringValue(from);
306 ptr = RSTRING_PTR(from);
307 plen = RSTRING_LEN(from);
308 }
309
310 if (p[-1] == '*')
311 len = plen;
312
313 switch (type) {
314 case 'a': /* arbitrary binary string (null padded) */
315 case 'A': /* arbitrary binary string (ASCII space padded) */
316 case 'Z': /* null terminated string */
317 if (plen >= len) {
318 rb_str_buf_cat(res, ptr, len);
319 if (p[-1] == '*' && type == 'Z')
320 rb_str_buf_cat(res, nul10, 1);
321 }
322 else {
323 rb_str_buf_cat(res, ptr, plen);
324 len -= plen;
325 while (len >= 10) {
326 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
327 len -= 10;
328 }
329 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
330 }
331 break;
332
333#define castchar(from) (char)((from) & 0xff)
334
335 case 'b': /* bit string (ascending) */
336 {
337 int byte = 0;
338 long i, j = 0;
339
340 if (len > plen) {
341 j = (len - plen + 1)/2;
342 len = plen;
343 }
344 for (i=0; i++ < len; ptr++) {
345 if (*ptr & 1)
346 byte |= 128;
347 if (i & 7)
348 byte >>= 1;
349 else {
350 char c = castchar(byte);
351 rb_str_buf_cat(res, &c, 1);
352 byte = 0;
353 }
354 }
355 if (len & 7) {
356 char c;
357 byte >>= 7 - (len & 7);
358 c = castchar(byte);
359 rb_str_buf_cat(res, &c, 1);
360 }
361 len = j;
362 goto grow;
363 }
364 break;
365
366 case 'B': /* bit string (descending) */
367 {
368 int byte = 0;
369 long i, j = 0;
370
371 if (len > plen) {
372 j = (len - plen + 1)/2;
373 len = plen;
374 }
375 for (i=0; i++ < len; ptr++) {
376 byte |= *ptr & 1;
377 if (i & 7)
378 byte <<= 1;
379 else {
380 char c = castchar(byte);
381 rb_str_buf_cat(res, &c, 1);
382 byte = 0;
383 }
384 }
385 if (len & 7) {
386 char c;
387 byte <<= 7 - (len & 7);
388 c = castchar(byte);
389 rb_str_buf_cat(res, &c, 1);
390 }
391 len = j;
392 goto grow;
393 }
394 break;
395
396 case 'h': /* hex string (low nibble first) */
397 {
398 int byte = 0;
399 long i, j = 0;
400
401 if (len > plen) {
402 j = (len + 1) / 2 - (plen + 1) / 2;
403 len = plen;
404 }
405 for (i=0; i++ < len; ptr++) {
406 if (ISALPHA(*ptr))
407 byte |= (((*ptr & 15) + 9) & 15) << 4;
408 else
409 byte |= (*ptr & 15) << 4;
410 if (i & 1)
411 byte >>= 4;
412 else {
413 char c = castchar(byte);
414 rb_str_buf_cat(res, &c, 1);
415 byte = 0;
416 }
417 }
418 if (len & 1) {
419 char c = castchar(byte);
420 rb_str_buf_cat(res, &c, 1);
421 }
422 len = j;
423 goto grow;
424 }
425 break;
426
427 case 'H': /* hex string (high nibble first) */
428 {
429 int byte = 0;
430 long i, j = 0;
431
432 if (len > plen) {
433 j = (len + 1) / 2 - (plen + 1) / 2;
434 len = plen;
435 }
436 for (i=0; i++ < len; ptr++) {
437 if (ISALPHA(*ptr))
438 byte |= ((*ptr & 15) + 9) & 15;
439 else
440 byte |= *ptr & 15;
441 if (i & 1)
442 byte <<= 4;
443 else {
444 char c = castchar(byte);
445 rb_str_buf_cat(res, &c, 1);
446 byte = 0;
447 }
448 }
449 if (len & 1) {
450 char c = castchar(byte);
451 rb_str_buf_cat(res, &c, 1);
452 }
453 len = j;
454 goto grow;
455 }
456 break;
457 }
458 break;
459
460 case 'c': /* signed char */
461 case 'C': /* unsigned char */
462 integer_size = 1;
463 bigendian_p = BIGENDIAN_P(); /* not effective */
464 goto pack_integer;
465
466 case 's': /* s for int16_t, s! for signed short */
467 integer_size = NATINT_LEN(short, 2);
468 bigendian_p = BIGENDIAN_P();
469 goto pack_integer;
470
471 case 'S': /* S for uint16_t, S! for unsigned short */
472 integer_size = NATINT_LEN(short, 2);
473 bigendian_p = BIGENDIAN_P();
474 goto pack_integer;
475
476 case 'i': /* i and i! for signed int */
477 integer_size = (int)sizeof(int);
478 bigendian_p = BIGENDIAN_P();
479 goto pack_integer;
480
481 case 'I': /* I and I! for unsigned int */
482 integer_size = (int)sizeof(int);
483 bigendian_p = BIGENDIAN_P();
484 goto pack_integer;
485
486 case 'l': /* l for int32_t, l! for signed long */
487 integer_size = NATINT_LEN(long, 4);
488 bigendian_p = BIGENDIAN_P();
489 goto pack_integer;
490
491 case 'L': /* L for uint32_t, L! for unsigned long */
492 integer_size = NATINT_LEN(long, 4);
493 bigendian_p = BIGENDIAN_P();
494 goto pack_integer;
495
496 case 'q': /* q for int64_t, q! for signed long long */
497 integer_size = NATINT_LEN_Q;
498 bigendian_p = BIGENDIAN_P();
499 goto pack_integer;
500
501 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
502 integer_size = NATINT_LEN_Q;
503 bigendian_p = BIGENDIAN_P();
504 goto pack_integer;
505
506 case 'j': /* j for intptr_t */
507 integer_size = sizeof(intptr_t);
508 bigendian_p = BIGENDIAN_P();
509 goto pack_integer;
510
511 case 'J': /* J for uintptr_t */
512 integer_size = sizeof(uintptr_t);
513 bigendian_p = BIGENDIAN_P();
514 goto pack_integer;
515
516 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
517 integer_size = 2;
518 bigendian_p = 1;
519 goto pack_integer;
520
521 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
522 integer_size = 4;
523 bigendian_p = 1;
524 goto pack_integer;
525
526 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
527 integer_size = 2;
528 bigendian_p = 0;
529 goto pack_integer;
530
531 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
532 integer_size = 4;
533 bigendian_p = 0;
534 goto pack_integer;
535
536 pack_integer:
537 if (explicit_endian) {
538 bigendian_p = explicit_endian == '>';
539 }
540 if (integer_size > MAX_INTEGER_PACK_SIZE)
541 rb_bug("unexpected intger size for pack: %d", integer_size);
542 while (len-- > 0) {
543 char intbuf[MAX_INTEGER_PACK_SIZE];
544
545 from = NEXTFROM;
546 rb_integer_pack(from, intbuf, integer_size, 1, 0,
549 rb_str_buf_cat(res, intbuf, integer_size);
550 }
551 break;
552
553 case 'f': /* single precision float in native format */
554 case 'F': /* ditto */
555 while (len-- > 0) {
556 float f;
557
558 from = NEXTFROM;
559 f = VALUE_to_float(from);
560 rb_str_buf_cat(res, (char*)&f, sizeof(float));
561 }
562 break;
563
564 case 'e': /* single precision float in VAX byte-order */
565 while (len-- > 0) {
566 FLOAT_CONVWITH(tmp);
567
568 from = NEXTFROM;
569 tmp.f = VALUE_to_float(from);
570 HTOVF(tmp);
571 rb_str_buf_cat(res, tmp.buf, sizeof(float));
572 }
573 break;
574
575 case 'E': /* double precision float in VAX byte-order */
576 while (len-- > 0) {
577 DOUBLE_CONVWITH(tmp);
578 from = NEXTFROM;
579 tmp.d = RFLOAT_VALUE(rb_to_float(from));
580 HTOVD(tmp);
581 rb_str_buf_cat(res, tmp.buf, sizeof(double));
582 }
583 break;
584
585 case 'd': /* double precision float in native format */
586 case 'D': /* ditto */
587 while (len-- > 0) {
588 double d;
589
590 from = NEXTFROM;
591 d = RFLOAT_VALUE(rb_to_float(from));
592 rb_str_buf_cat(res, (char*)&d, sizeof(double));
593 }
594 break;
595
596 case 'g': /* single precision float in network byte-order */
597 while (len-- > 0) {
598 FLOAT_CONVWITH(tmp);
599 from = NEXTFROM;
600 tmp.f = VALUE_to_float(from);
601 HTONF(tmp);
602 rb_str_buf_cat(res, tmp.buf, sizeof(float));
603 }
604 break;
605
606 case 'G': /* double precision float in network byte-order */
607 while (len-- > 0) {
608 DOUBLE_CONVWITH(tmp);
609
610 from = NEXTFROM;
611 tmp.d = RFLOAT_VALUE(rb_to_float(from));
612 HTOND(tmp);
613 rb_str_buf_cat(res, tmp.buf, sizeof(double));
614 }
615 break;
616
617 case 'x': /* null byte */
618 grow:
619 while (len >= 10) {
620 rb_str_buf_cat(res, nul10, 10);
621 len -= 10;
622 }
623 rb_str_buf_cat(res, nul10, len);
624 break;
625
626 case 'X': /* back up byte */
627 shrink:
628 plen = RSTRING_LEN(res);
629 if (plen < len)
630 rb_raise(rb_eArgError, "X outside of string");
631 rb_str_set_len(res, plen - len);
632 break;
633
634 case '@': /* null fill to absolute position */
635 len -= RSTRING_LEN(res);
636 if (len > 0) goto grow;
637 len = -len;
638 if (len > 0) goto shrink;
639 break;
640
641 case '%':
642 rb_raise(rb_eArgError, "%% is not supported");
643 break;
644
645 case 'U': /* Unicode character */
646 while (len-- > 0) {
647 SIGNED_VALUE l;
648 char buf[8];
649 int le;
650
651 from = NEXTFROM;
652 from = rb_to_int(from);
653 l = NUM2LONG(from);
654 if (l < 0) {
655 rb_raise(rb_eRangeError, "pack(U): value out of range");
656 }
657 le = rb_uv_to_utf8(buf, l);
658 rb_str_buf_cat(res, (char*)buf, le);
659 }
660 break;
661
662 case 'u': /* uuencoded string */
663 case 'm': /* base64 encoded string */
664 from = NEXTFROM;
665 StringValue(from);
666 ptr = RSTRING_PTR(from);
667 plen = RSTRING_LEN(from);
668
669 if (len == 0 && type == 'm') {
670 encodes(res, ptr, plen, type, 0);
671 ptr += plen;
672 break;
673 }
674 if (len <= 2)
675 len = 45;
676 else if (len > 63 && type == 'u')
677 len = 63;
678 else
679 len = len / 3 * 3;
680 while (plen > 0) {
681 long todo;
682
683 if (plen > len)
684 todo = len;
685 else
686 todo = plen;
687 encodes(res, ptr, todo, type, 1);
688 plen -= todo;
689 ptr += todo;
690 }
691 break;
692
693 case 'M': /* quoted-printable encoded string */
695 if (len <= 1)
696 len = 72;
697 qpencode(res, from, len);
698 break;
699
700 case 'P': /* pointer to packed byte string */
701 from = THISFROM;
702 if (!NIL_P(from)) {
703 StringValue(from);
704 if (RSTRING_LEN(from) < len) {
705 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
706 RSTRING_LEN(from), len);
707 }
708 }
709 len = 1;
710 /* FALL THROUGH */
711 case 'p': /* pointer to string */
712 while (len-- > 0) {
713 char *t;
714 from = NEXTFROM;
715 if (NIL_P(from)) {
716 t = 0;
717 }
718 else {
719 t = StringValuePtr(from);
720 }
721 if (!associates) {
722 associates = rb_ary_new();
723 }
724 rb_ary_push(associates, from);
725 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
726 }
727 break;
728
729 case 'w': /* BER compressed integer */
730 while (len-- > 0) {
731 VALUE buf = rb_str_new(0, 0);
732 size_t numbytes;
733 int sign;
734 char *cp;
735
736 from = NEXTFROM;
737 from = rb_to_int(from);
738 numbytes = rb_absint_numwords(from, 7, NULL);
739 if (numbytes == 0)
740 numbytes = 1;
741 buf = rb_str_new(NULL, numbytes);
742
744
745 if (sign < 0)
746 rb_raise(rb_eArgError, "can't compress negative numbers");
747 if (sign == 2)
748 rb_bug("buffer size problem?");
749
750 cp = RSTRING_PTR(buf);
751 while (1 < numbytes) {
752 *cp |= 0x80;
753 cp++;
754 numbytes--;
755 }
756
758 }
759 break;
760
761 default: {
762 unknown_directive("pack", type, fmt);
763 break;
764 }
765 }
766 }
767
768 if (associates) {
769 str_associate(res, associates);
770 }
771 switch (enc_info) {
772 case 1:
774 break;
775 case 2:
777 break;
778 default:
779 /* do nothing, keep ASCII-8BIT */
780 break;
781 }
782 return res;
783}
784
785static const char uu_table[] =
786"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
787static const char b64_table[] =
788"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
789
790static void
791encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
792{
793 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
794 char buff[buff_size + 1]; /* +1 for tail_lf */
795 long i = 0;
796 const char *const trans = type == 'u' ? uu_table : b64_table;
797 char padding;
798 const unsigned char *s = (const unsigned char *)s0;
799
800 if (type == 'u') {
801 buff[i++] = (char)len + ' ';
802 padding = '`';
803 }
804 else {
805 padding = '=';
806 }
807 while (len >= input_unit) {
808 while (len >= input_unit && buff_size-i >= encoded_unit) {
809 buff[i++] = trans[077 & (*s >> 2)];
810 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
811 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
812 buff[i++] = trans[077 & s[2]];
813 s += input_unit;
814 len -= input_unit;
815 }
816 if (buff_size-i < encoded_unit) {
817 rb_str_buf_cat(str, buff, i);
818 i = 0;
819 }
820 }
821
822 if (len == 2) {
823 buff[i++] = trans[077 & (*s >> 2)];
824 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
825 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
826 buff[i++] = padding;
827 }
828 else if (len == 1) {
829 buff[i++] = trans[077 & (*s >> 2)];
830 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
831 buff[i++] = padding;
832 buff[i++] = padding;
833 }
834 if (tail_lf) buff[i++] = '\n';
835 rb_str_buf_cat(str, buff, i);
836 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
837}
838
839static const char hex_table[] = "0123456789ABCDEF";
840
841static void
842qpencode(VALUE str, VALUE from, long len)
843{
844 char buff[1024];
845 long i = 0, n = 0, prev = EOF;
846 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
847 unsigned char *send = s + RSTRING_LEN(from);
848
849 while (s < send) {
850 if ((*s > 126) ||
851 (*s < 32 && *s != '\n' && *s != '\t') ||
852 (*s == '=')) {
853 buff[i++] = '=';
854 buff[i++] = hex_table[*s >> 4];
855 buff[i++] = hex_table[*s & 0x0f];
856 n += 3;
857 prev = EOF;
858 }
859 else if (*s == '\n') {
860 if (prev == ' ' || prev == '\t') {
861 buff[i++] = '=';
862 buff[i++] = *s;
863 }
864 buff[i++] = *s;
865 n = 0;
866 prev = *s;
867 }
868 else {
869 buff[i++] = *s;
870 n++;
871 prev = *s;
872 }
873 if (n > len) {
874 buff[i++] = '=';
875 buff[i++] = '\n';
876 n = 0;
877 prev = '\n';
878 }
879 if (i > 1024 - 5) {
880 rb_str_buf_cat(str, buff, i);
881 i = 0;
882 }
883 s++;
884 }
885 if (n > 0) {
886 buff[i++] = '=';
887 buff[i++] = '\n';
888 }
889 if (i > 0) {
890 rb_str_buf_cat(str, buff, i);
891 }
892}
893
894static inline int
895hex2num(char c)
896{
897 int n;
898 n = ruby_digit36_to_number_table[(unsigned char)c];
899 if (16 <= n)
900 n = -1;
901 return n;
902}
903
904#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
905 tmp_len = 0; \
906 if (len > (long)((send-s)/(sz))) { \
907 if (!star) { \
908 tmp_len = len-(send-s)/(sz); \
909 } \
910 len = (send-s)/(sz); \
911 } \
912} while (0)
913
914#define PACK_ITEM_ADJUST() do { \
915 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
916 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
917} while (0)
918
919/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
920 * 12.4/12.5/12.6 C compiler optimization bug
921 * with "-xO4" optimization option.
922 */
923#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
924# define AVOID_CC_BUG volatile
925#else
926# define AVOID_CC_BUG
927#endif
928
929/* unpack mode */
930#define UNPACK_ARRAY 0
931#define UNPACK_BLOCK 1
932#define UNPACK_1 2
933
934static VALUE
935pack_unpack_internal(VALUE str, VALUE fmt, int mode)
936{
937#define hexdigits ruby_hexdigits
938 char *s, *send;
939 char *p, *pend;
940 VALUE ary;
941 char type;
942 long len;
943 AVOID_CC_BUG long tmp_len;
944 int star;
945#ifdef NATINT_PACK
946 int natint; /* native integer */
947#endif
948 int signed_p, integer_size, bigendian_p;
949#define UNPACK_PUSH(item) do {\
950 VALUE item_val = (item);\
951 if ((mode) == UNPACK_BLOCK) {\
952 rb_yield(item_val);\
953 }\
954 else if ((mode) == UNPACK_ARRAY) {\
955 rb_ary_push(ary, item_val);\
956 }\
957 else /* if ((mode) == UNPACK_1) { */ {\
958 return item_val; \
959 }\
960 } while (0)
961
963 StringValue(fmt);
964 s = RSTRING_PTR(str);
965 send = s + RSTRING_LEN(str);
966 p = RSTRING_PTR(fmt);
967 pend = p + RSTRING_LEN(fmt);
968
969 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
970 while (p < pend) {
971 int explicit_endian = 0;
972 type = *p++;
973#ifdef NATINT_PACK
974 natint = 0;
975#endif
976
977 if (ISSPACE(type)) continue;
978 if (type == '#') {
979 while ((p < pend) && (*p != '\n')) {
980 p++;
981 }
982 continue;
983 }
984
985 star = 0;
986 {
987 modifiers:
988 switch (*p) {
989 case '_':
990 case '!':
991
992 if (strchr(natstr, type)) {
993#ifdef NATINT_PACK
994 natint = 1;
995#endif
996 p++;
997 }
998 else {
999 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1000 }
1001 goto modifiers;
1002
1003 case '<':
1004 case '>':
1005 if (!strchr(endstr, type)) {
1006 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1007 }
1008 if (explicit_endian) {
1009 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1010 }
1011 explicit_endian = *p++;
1012 goto modifiers;
1013 }
1014 }
1015
1016 if (p >= pend)
1017 len = 1;
1018 else if (*p == '*') {
1019 star = 1;
1020 len = send - s;
1021 p++;
1022 }
1023 else if (ISDIGIT(*p)) {
1024 errno = 0;
1025 len = STRTOUL(p, (char**)&p, 10);
1026 if (len < 0 || errno) {
1027 rb_raise(rb_eRangeError, "pack length too big");
1028 }
1029 }
1030 else {
1031 len = (type != '@');
1032 }
1033
1034 switch (type) {
1035 case '%':
1036 rb_raise(rb_eArgError, "%% is not supported");
1037 break;
1038
1039 case 'A':
1040 if (len > send - s) len = send - s;
1041 {
1042 long end = len;
1043 char *t = s + len - 1;
1044
1045 while (t >= s) {
1046 if (*t != ' ' && *t != '\0') break;
1047 t--; len--;
1048 }
1050 s += end;
1051 }
1052 break;
1053
1054 case 'Z':
1055 {
1056 char *t = s;
1057
1058 if (len > send-s) len = send-s;
1059 while (t < s+len && *t) t++;
1060 UNPACK_PUSH(rb_str_new(s, t-s));
1061 if (t < send) t++;
1062 s = star ? t : s+len;
1063 }
1064 break;
1065
1066 case 'a':
1067 if (len > send - s) len = send - s;
1069 s += len;
1070 break;
1071
1072 case 'b':
1073 {
1074 VALUE bitstr;
1075 char *t;
1076 int bits;
1077 long i;
1078
1079 if (p[-1] == '*' || len > (send - s) * 8)
1080 len = (send - s) * 8;
1081 bits = 0;
1082 bitstr = rb_usascii_str_new(0, len);
1083 t = RSTRING_PTR(bitstr);
1084 for (i=0; i<len; i++) {
1085 if (i & 7) bits >>= 1;
1086 else bits = (unsigned char)*s++;
1087 *t++ = (bits & 1) ? '1' : '0';
1088 }
1089 UNPACK_PUSH(bitstr);
1090 }
1091 break;
1092
1093 case 'B':
1094 {
1095 VALUE bitstr;
1096 char *t;
1097 int bits;
1098 long i;
1099
1100 if (p[-1] == '*' || len > (send - s) * 8)
1101 len = (send - s) * 8;
1102 bits = 0;
1103 bitstr = rb_usascii_str_new(0, len);
1104 t = RSTRING_PTR(bitstr);
1105 for (i=0; i<len; i++) {
1106 if (i & 7) bits <<= 1;
1107 else bits = (unsigned char)*s++;
1108 *t++ = (bits & 128) ? '1' : '0';
1109 }
1110 UNPACK_PUSH(bitstr);
1111 }
1112 break;
1113
1114 case 'h':
1115 {
1116 VALUE bitstr;
1117 char *t;
1118 int bits;
1119 long i;
1120
1121 if (p[-1] == '*' || len > (send - s) * 2)
1122 len = (send - s) * 2;
1123 bits = 0;
1124 bitstr = rb_usascii_str_new(0, len);
1125 t = RSTRING_PTR(bitstr);
1126 for (i=0; i<len; i++) {
1127 if (i & 1)
1128 bits >>= 4;
1129 else
1130 bits = (unsigned char)*s++;
1131 *t++ = hexdigits[bits & 15];
1132 }
1133 UNPACK_PUSH(bitstr);
1134 }
1135 break;
1136
1137 case 'H':
1138 {
1139 VALUE bitstr;
1140 char *t;
1141 int bits;
1142 long i;
1143
1144 if (p[-1] == '*' || len > (send - s) * 2)
1145 len = (send - s) * 2;
1146 bits = 0;
1147 bitstr = rb_usascii_str_new(0, len);
1148 t = RSTRING_PTR(bitstr);
1149 for (i=0; i<len; i++) {
1150 if (i & 1)
1151 bits <<= 4;
1152 else
1153 bits = (unsigned char)*s++;
1154 *t++ = hexdigits[(bits >> 4) & 15];
1155 }
1156 UNPACK_PUSH(bitstr);
1157 }
1158 break;
1159
1160 case 'c':
1161 signed_p = 1;
1162 integer_size = 1;
1163 bigendian_p = BIGENDIAN_P(); /* not effective */
1164 goto unpack_integer;
1165
1166 case 'C':
1167 signed_p = 0;
1168 integer_size = 1;
1169 bigendian_p = BIGENDIAN_P(); /* not effective */
1170 goto unpack_integer;
1171
1172 case 's':
1173 signed_p = 1;
1174 integer_size = NATINT_LEN(short, 2);
1175 bigendian_p = BIGENDIAN_P();
1176 goto unpack_integer;
1177
1178 case 'S':
1179 signed_p = 0;
1180 integer_size = NATINT_LEN(short, 2);
1181 bigendian_p = BIGENDIAN_P();
1182 goto unpack_integer;
1183
1184 case 'i':
1185 signed_p = 1;
1186 integer_size = (int)sizeof(int);
1187 bigendian_p = BIGENDIAN_P();
1188 goto unpack_integer;
1189
1190 case 'I':
1191 signed_p = 0;
1192 integer_size = (int)sizeof(int);
1193 bigendian_p = BIGENDIAN_P();
1194 goto unpack_integer;
1195
1196 case 'l':
1197 signed_p = 1;
1198 integer_size = NATINT_LEN(long, 4);
1199 bigendian_p = BIGENDIAN_P();
1200 goto unpack_integer;
1201
1202 case 'L':
1203 signed_p = 0;
1204 integer_size = NATINT_LEN(long, 4);
1205 bigendian_p = BIGENDIAN_P();
1206 goto unpack_integer;
1207
1208 case 'q':
1209 signed_p = 1;
1210 integer_size = NATINT_LEN_Q;
1211 bigendian_p = BIGENDIAN_P();
1212 goto unpack_integer;
1213
1214 case 'Q':
1215 signed_p = 0;
1216 integer_size = NATINT_LEN_Q;
1217 bigendian_p = BIGENDIAN_P();
1218 goto unpack_integer;
1219
1220 case 'j':
1221 signed_p = 1;
1222 integer_size = sizeof(intptr_t);
1223 bigendian_p = BIGENDIAN_P();
1224 goto unpack_integer;
1225
1226 case 'J':
1227 signed_p = 0;
1228 integer_size = sizeof(uintptr_t);
1229 bigendian_p = BIGENDIAN_P();
1230 goto unpack_integer;
1231
1232 case 'n':
1233 signed_p = 0;
1234 integer_size = 2;
1235 bigendian_p = 1;
1236 goto unpack_integer;
1237
1238 case 'N':
1239 signed_p = 0;
1240 integer_size = 4;
1241 bigendian_p = 1;
1242 goto unpack_integer;
1243
1244 case 'v':
1245 signed_p = 0;
1246 integer_size = 2;
1247 bigendian_p = 0;
1248 goto unpack_integer;
1249
1250 case 'V':
1251 signed_p = 0;
1252 integer_size = 4;
1253 bigendian_p = 0;
1254 goto unpack_integer;
1255
1256 unpack_integer:
1257 if (explicit_endian) {
1258 bigendian_p = explicit_endian == '>';
1259 }
1260 PACK_LENGTH_ADJUST_SIZE(integer_size);
1261 while (len-- > 0) {
1262 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1263 VALUE val;
1264 if (signed_p)
1265 flags |= INTEGER_PACK_2COMP;
1266 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1267 UNPACK_PUSH(val);
1268 s += integer_size;
1269 }
1271 break;
1272
1273 case 'f':
1274 case 'F':
1275 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1276 while (len-- > 0) {
1277 float tmp;
1278 memcpy(&tmp, s, sizeof(float));
1279 s += sizeof(float);
1280 UNPACK_PUSH(DBL2NUM((double)tmp));
1281 }
1283 break;
1284
1285 case 'e':
1286 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1287 while (len-- > 0) {
1288 FLOAT_CONVWITH(tmp);
1289 memcpy(tmp.buf, s, sizeof(float));
1290 s += sizeof(float);
1291 VTOHF(tmp);
1292 UNPACK_PUSH(DBL2NUM(tmp.f));
1293 }
1295 break;
1296
1297 case 'E':
1298 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1299 while (len-- > 0) {
1300 DOUBLE_CONVWITH(tmp);
1301 memcpy(tmp.buf, s, sizeof(double));
1302 s += sizeof(double);
1303 VTOHD(tmp);
1304 UNPACK_PUSH(DBL2NUM(tmp.d));
1305 }
1307 break;
1308
1309 case 'D':
1310 case 'd':
1311 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1312 while (len-- > 0) {
1313 double tmp;
1314 memcpy(&tmp, s, sizeof(double));
1315 s += sizeof(double);
1316 UNPACK_PUSH(DBL2NUM(tmp));
1317 }
1319 break;
1320
1321 case 'g':
1322 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1323 while (len-- > 0) {
1324 FLOAT_CONVWITH(tmp);
1325 memcpy(tmp.buf, s, sizeof(float));
1326 s += sizeof(float);
1327 NTOHF(tmp);
1328 UNPACK_PUSH(DBL2NUM(tmp.f));
1329 }
1331 break;
1332
1333 case 'G':
1334 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1335 while (len-- > 0) {
1336 DOUBLE_CONVWITH(tmp);
1337 memcpy(tmp.buf, s, sizeof(double));
1338 s += sizeof(double);
1339 NTOHD(tmp);
1340 UNPACK_PUSH(DBL2NUM(tmp.d));
1341 }
1343 break;
1344
1345 case 'U':
1346 if (len > send - s) len = send - s;
1347 while (len > 0 && s < send) {
1348 long alen = send - s;
1349 unsigned long l;
1350
1351 l = utf8_to_uv(s, &alen);
1352 s += alen; len--;
1354 }
1355 break;
1356
1357 case 'u':
1358 {
1359 VALUE buf = rb_str_new(0, (send - s)*3/4);
1360 char *ptr = RSTRING_PTR(buf);
1361 long total = 0;
1362
1363 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1364 long a,b,c,d;
1365 char hunk[3];
1366
1367 len = ((unsigned char)*s++ - ' ') & 077;
1368
1369 total += len;
1370 if (total > RSTRING_LEN(buf)) {
1371 len -= total - RSTRING_LEN(buf);
1372 total = RSTRING_LEN(buf);
1373 }
1374
1375 while (len > 0) {
1376 long mlen = len > 3 ? 3 : len;
1377
1378 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1379 a = ((unsigned char)*s++ - ' ') & 077;
1380 else
1381 a = 0;
1382 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1383 b = ((unsigned char)*s++ - ' ') & 077;
1384 else
1385 b = 0;
1386 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1387 c = ((unsigned char)*s++ - ' ') & 077;
1388 else
1389 c = 0;
1390 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1391 d = ((unsigned char)*s++ - ' ') & 077;
1392 else
1393 d = 0;
1394 hunk[0] = (char)(a << 2 | b >> 4);
1395 hunk[1] = (char)(b << 4 | c >> 2);
1396 hunk[2] = (char)(c << 6 | d);
1397 memcpy(ptr, hunk, mlen);
1398 ptr += mlen;
1399 len -= mlen;
1400 }
1401 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1402 s++; /* possible checksum byte */
1403 if (s < send && *s == '\r') s++;
1404 if (s < send && *s == '\n') s++;
1405 }
1406
1407 rb_str_set_len(buf, total);
1409 }
1410 break;
1411
1412 case 'm':
1413 {
1414 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1415 char *ptr = RSTRING_PTR(buf);
1416 int a = -1,b = -1,c = 0,d = 0;
1417 static signed char b64_xtable[256];
1418
1419 if (b64_xtable['/'] <= 0) {
1420 int i;
1421
1422 for (i = 0; i < 256; i++) {
1423 b64_xtable[i] = -1;
1424 }
1425 for (i = 0; i < 64; i++) {
1426 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1427 }
1428 }
1429 if (len == 0) {
1430 while (s < send) {
1431 a = b = c = d = -1;
1432 a = b64_xtable[(unsigned char)*s++];
1433 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1434 b = b64_xtable[(unsigned char)*s++];
1435 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1436 if (*s == '=') {
1437 if (s + 2 == send && *(s + 1) == '=') break;
1438 rb_raise(rb_eArgError, "invalid base64");
1439 }
1440 c = b64_xtable[(unsigned char)*s++];
1441 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1442 if (s + 1 == send && *s == '=') break;
1443 d = b64_xtable[(unsigned char)*s++];
1444 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1445 *ptr++ = castchar(a << 2 | b >> 4);
1446 *ptr++ = castchar(b << 4 | c >> 2);
1447 *ptr++ = castchar(c << 6 | d);
1448 }
1449 if (c == -1) {
1450 *ptr++ = castchar(a << 2 | b >> 4);
1451 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1452 }
1453 else if (d == -1) {
1454 *ptr++ = castchar(a << 2 | b >> 4);
1455 *ptr++ = castchar(b << 4 | c >> 2);
1456 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1457 }
1458 }
1459 else {
1460 while (s < send) {
1461 a = b = c = d = -1;
1462 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1463 if (s >= send) break;
1464 s++;
1465 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1466 if (s >= send) break;
1467 s++;
1468 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1469 if (*s == '=' || s >= send) break;
1470 s++;
1471 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1472 if (*s == '=' || s >= send) break;
1473 s++;
1474 *ptr++ = castchar(a << 2 | b >> 4);
1475 *ptr++ = castchar(b << 4 | c >> 2);
1476 *ptr++ = castchar(c << 6 | d);
1477 a = -1;
1478 }
1479 if (a != -1 && b != -1) {
1480 if (c == -1)
1481 *ptr++ = castchar(a << 2 | b >> 4);
1482 else {
1483 *ptr++ = castchar(a << 2 | b >> 4);
1484 *ptr++ = castchar(b << 4 | c >> 2);
1485 }
1486 }
1487 }
1490 }
1491 break;
1492
1493 case 'M':
1494 {
1495 VALUE buf = rb_str_new(0, send - s);
1496 char *ptr = RSTRING_PTR(buf), *ss = s;
1497 int csum = 0;
1498 int c1, c2;
1499
1500 while (s < send) {
1501 if (*s == '=') {
1502 if (++s == send) break;
1503 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1504 s++;
1505 if (*s != '\n') {
1506 if ((c1 = hex2num(*s)) == -1) break;
1507 if (++s == send) break;
1508 if ((c2 = hex2num(*s)) == -1) break;
1509 csum |= *ptr++ = castchar(c1 << 4 | c2);
1510 }
1511 }
1512 else {
1513 csum |= *ptr++ = *s;
1514 }
1515 s++;
1516 ss = s;
1517 }
1519 rb_str_buf_cat(buf, ss, send-ss);
1523 }
1524 break;
1525
1526 case '@':
1527 if (len > RSTRING_LEN(str))
1528 rb_raise(rb_eArgError, "@ outside of string");
1529 s = RSTRING_PTR(str) + len;
1530 break;
1531
1532 case 'X':
1533 if (len > s - RSTRING_PTR(str))
1534 rb_raise(rb_eArgError, "X outside of string");
1535 s -= len;
1536 break;
1537
1538 case 'x':
1539 if (len > send - s)
1540 rb_raise(rb_eArgError, "x outside of string");
1541 s += len;
1542 break;
1543
1544 case 'P':
1545 if (sizeof(char *) <= (size_t)(send - s)) {
1546 VALUE tmp = Qnil;
1547 char *t;
1548
1549 memcpy(&t, s, sizeof(char *));
1550 s += sizeof(char *);
1551
1552 if (t) {
1553 VALUE a;
1554 const VALUE *p, *pend;
1555
1556 if (!(a = str_associated(str))) {
1557 rb_raise(rb_eArgError, "no associated pointer");
1558 }
1559 p = RARRAY_CONST_PTR(a);
1560 pend = p + RARRAY_LEN(a);
1561 while (p < pend) {
1562 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1563 if (len < RSTRING_LEN(*p)) {
1564 tmp = rb_str_new(t, len);
1565 str_associate(tmp, a);
1566 }
1567 else {
1568 tmp = *p;
1569 }
1570 break;
1571 }
1572 p++;
1573 }
1574 if (p == pend) {
1575 rb_raise(rb_eArgError, "non associated pointer");
1576 }
1577 }
1578 UNPACK_PUSH(tmp);
1579 }
1580 break;
1581
1582 case 'p':
1583 if (len > (long)((send - s) / sizeof(char *)))
1584 len = (send - s) / sizeof(char *);
1585 while (len-- > 0) {
1586 if ((size_t)(send - s) < sizeof(char *))
1587 break;
1588 else {
1589 VALUE tmp = Qnil;
1590 char *t;
1591
1592 memcpy(&t, s, sizeof(char *));
1593 s += sizeof(char *);
1594
1595 if (t) {
1596 VALUE a;
1597 const VALUE *p, *pend;
1598
1599 if (!(a = str_associated(str))) {
1600 rb_raise(rb_eArgError, "no associated pointer");
1601 }
1602 p = RARRAY_CONST_PTR(a);
1603 pend = p + RARRAY_LEN(a);
1604 while (p < pend) {
1605 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1606 tmp = *p;
1607 break;
1608 }
1609 p++;
1610 }
1611 if (p == pend) {
1612 rb_raise(rb_eArgError, "non associated pointer");
1613 }
1614 }
1615 UNPACK_PUSH(tmp);
1616 }
1617 }
1618 break;
1619
1620 case 'w':
1621 {
1622 char *s0 = s;
1623 while (len > 0 && s < send) {
1624 if (*s & 0x80) {
1625 s++;
1626 }
1627 else {
1628 s++;
1630 len--;
1631 s0 = s;
1632 }
1633 }
1634 }
1635 break;
1636
1637 default:
1638 unknown_directive("unpack", type, fmt);
1639 break;
1640 }
1641 }
1642
1643 return ary;
1644}
1645
1646static VALUE
1647pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1648{
1650 return pack_unpack_internal(str, fmt, mode);
1651}
1652
1653static VALUE
1654pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1655{
1656 return pack_unpack_internal(str, fmt, UNPACK_1);
1657}
1658
1659int
1660rb_uv_to_utf8(char buf[6], unsigned long uv)
1661{
1662 if (uv <= 0x7f) {
1663 buf[0] = (char)uv;
1664 return 1;
1665 }
1666 if (uv <= 0x7ff) {
1667 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1668 buf[1] = castchar((uv&0x3f)|0x80);
1669 return 2;
1670 }
1671 if (uv <= 0xffff) {
1672 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1673 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1674 buf[2] = castchar((uv&0x3f)|0x80);
1675 return 3;
1676 }
1677 if (uv <= 0x1fffff) {
1678 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1679 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1680 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1681 buf[3] = castchar((uv&0x3f)|0x80);
1682 return 4;
1683 }
1684 if (uv <= 0x3ffffff) {
1685 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1686 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1687 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1688 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1689 buf[4] = castchar((uv&0x3f)|0x80);
1690 return 5;
1691 }
1692 if (uv <= 0x7fffffff) {
1693 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1694 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1695 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1696 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1697 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1698 buf[5] = castchar((uv&0x3f)|0x80);
1699 return 6;
1700 }
1701 rb_raise(rb_eRangeError, "pack(U): value out of range");
1702
1704}
1705
1706static const unsigned long utf8_limits[] = {
1707 0x0, /* 1 */
1708 0x80, /* 2 */
1709 0x800, /* 3 */
1710 0x10000, /* 4 */
1711 0x200000, /* 5 */
1712 0x4000000, /* 6 */
1713 0x80000000, /* 7 */
1714};
1715
1716static unsigned long
1717utf8_to_uv(const char *p, long *lenp)
1718{
1719 int c = *p++ & 0xff;
1720 unsigned long uv = c;
1721 long n;
1722
1723 if (!(uv & 0x80)) {
1724 *lenp = 1;
1725 return uv;
1726 }
1727 if (!(uv & 0x40)) {
1728 *lenp = 1;
1729 rb_raise(rb_eArgError, "malformed UTF-8 character");
1730 }
1731
1732 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1733 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1734 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1735 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1736 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1737 else {
1738 *lenp = 1;
1739 rb_raise(rb_eArgError, "malformed UTF-8 character");
1740 }
1741 if (n > *lenp) {
1742 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1743 n, *lenp);
1744 }
1745 *lenp = n--;
1746 if (n != 0) {
1747 while (n--) {
1748 c = *p++ & 0xff;
1749 if ((c & 0xc0) != 0x80) {
1750 *lenp -= n + 1;
1751 rb_raise(rb_eArgError, "malformed UTF-8 character");
1752 }
1753 else {
1754 c &= 0x3f;
1755 uv = uv << 6 | c;
1756 }
1757 }
1758 }
1759 n = *lenp - 1;
1760 if (uv < utf8_limits[n]) {
1761 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1762 }
1763 return uv;
1764}
1765
1766#include "pack.rbinc"
1767
1768void
1770{
1771 id_associated = rb_make_internal_id();
1772}
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1301
VALUE rb_ary_new(void)
Definition: array.c:749
#define UNREACHABLE_RETURN
Definition: assume.h:31
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3553
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Definition: bignum.c:3388
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3639
Internal header for bitwise integer algorithms.
int bits(struct state *s, int need)
Definition: blast.c:72
Our own, locale independent, character handling routines.
#define ISSPACE
Definition: ctype.h:38
#define STRTOUL
Definition: ctype.h:54
#define ISDIGIT
Definition: ctype.h:43
#define ISALPHA
Definition: ctype.h:42
#define ISASCII
Definition: ctype.h:35
#define ISPRINT
Definition: ctype.h:36
#define add(x, y)
Definition: date_strftime.c:23
struct RIMemo * ptr
Definition: debug.c:88
char * strchr(char *, char)
#define RFLOAT_VALUE
Definition: double.h:28
#define DBL2NUM
Definition: double.h:29
#define rb_ascii8bit_encindex()
Definition: encindex.h:57
#define rb_usascii_encindex()
Definition: encindex.h:59
#define rb_utf8_encindex()
Definition: encindex.h:58
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:1028
uint8_t len
Definition: escape.c:17
const signed char ruby_digit36_to_number_table[]
Definition: util.c:79
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
#define RSTRING_LEN(string)
Definition: fbuffer.h:22
#define RSTRING_PTR(string)
Definition: fbuffer.h:19
#define memcpy(d, s, n)
Definition: ffi_common.h:55
#define PRIsVALUE
Definition: function.c:10
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:935
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2917
void rb_bug(const char *fmt,...)
Definition: error.c:768
VALUE rb_eRangeError
Definition: error.c:1061
VALUE rb_eTypeError
Definition: error.c:1057
VALUE rb_eRuntimeError
Definition: error.c:1055
VALUE rb_eArgError
Definition: error.c:1058
void rb_warning(const char *fmt,...)
Definition: error.c:439
VALUE rb_to_float(VALUE)
Converts a Numeric object into Float.
Definition: object.c:3559
VALUE rb_to_int(VALUE)
Converts val into Integer.
Definition: object.c:3051
#define ENC_CODERANGE_7BIT
Definition: encoding.h:93
#define ENC_CODERANGE_VALID
Definition: encoding.h:94
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:102
Thin wrapper to ruby/config.h.
Defines RBIMPL_HAS_BUILTIN.
#define INTEGER_PACK_LITTLE_ENDIAN
Definition: bignum.h:91
#define INTEGER_PACK_BIG_ENDIAN
Definition: bignum.h:94
#define INTEGER_PACK_2COMP
Definition: bignum.h:85
#define rb_str_new(str, len)
Definition: string.h:213
#define rb_str_buf_cat
Definition: string.h:283
#define rb_usascii_str_new(str, len)
Definition: string.h:224
void rb_str_set_len(VALUE, long)
Definition: string.c:2842
VALUE rb_str_buf_new(long)
Definition: string.c:1398
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:2624
VALUE rb_obj_as_string(VALUE)
Definition: string.c:1529
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1493
#define NAN
Definition: missing.h:151
#define INFINITY
Definition: missing.h:144
Internal header for Array.
Internal header for String.
VALUE rb_str_quote_unprintable(VALUE)
Definition: string.c:11046
ID rb_make_internal_id(void)
Definition: symbol.c:953
Internal header corresponding util.c.
VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef)
Definition: variable.c:1192
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
const char int mode
Definition: ioapi.h:137
voidpf void * buf
Definition: ioapi.h:138
#define ULONG2NUM
Definition: long.h:60
#define NUM2LONG
Definition: long.h:51
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
#define FLOAT_CONVWITH(x)
Definition: pack.c:105
#define UNPACK_ARRAY
Definition: pack.c:930
#define hexdigits
#define HTOVF(x)
Definition: pack.c:107
#define VTOHD(x)
Definition: pack.c:115
#define AVOID_CC_BUG
Definition: pack.c:926
#define UNPACK_PUSH(item)
#define DOUBLE_CONVWITH(x)
Definition: pack.c:111
void Init_pack(void)
Definition: pack.c:1769
#define NTOHD(x)
Definition: pack.c:114
#define HTOVD(x)
Definition: pack.c:113
#define NATINT_LEN(type, len)
Definition: pack.c:78
#define HTONF(x)
Definition: pack.c:106
#define VTOHF(x)
Definition: pack.c:109
#define PACK_LENGTH_ADJUST_SIZE(sz)
Definition: pack.c:904
#define UNPACK_1
Definition: pack.c:932
#define BIGENDIAN_P()
Definition: pack.c:74
#define MAX_INTEGER_PACK_SIZE
Definition: pack.c:117
#define NATINT_LEN_Q
Definition: pack.c:48
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Definition: pack.c:1660
#define PACK_ITEM_ADJUST()
Definition: pack.c:914
#define castchar(from)
#define THISFROM
#define NEXTFROM
#define HTOND(x)
Definition: pack.c:112
#define NTOHF(x)
Definition: pack.c:108
#define UNPACK_BLOCK
Definition: pack.c:931
#define RARRAY_CONST_PTR(s)
Definition: psych_emitter.c:4
#define RARRAY_LEN
Definition: rarray.h:52
#define NULL
Definition: regenc.h:69
#define StringValue(v)
Definition: rstring.h:50
#define StringValuePtr(v)
Definition: rstring.h:51
const char * rb_obj_classname(VALUE)
Definition: variable.c:308
unsigned int uint32_t
Definition: sha2.h:101
unsigned long long uint64_t
Definition: sha2.h:102
#define Qnil
#define Qfalse
#define NIL_P
#define f
#define snprintf
Definition: subst.h:14
#define t
Definition: symbol.c:253
#define le(x, y)
Definition: time.c:95
uint64_t u
Definition: pack.c:90
double d
Definition: pack.c:89
uint32_t u
Definition: pack.c:85
float f
Definition: pack.c:84
unsigned long VALUE
Definition: value.h:38
#define SIGNED_VALUE
Definition: value.h:40
unsigned long ID
Definition: value.h:39
#define T_STRING
Definition: value_type.h:77
#define EOF
Definition: vsnprintf.c:203
int intptr_t
Definition: win32.h:90
#define isnan(x)
Definition: win32.h:346
unsigned int uintptr_t
Definition: win32.h:106