Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
Data Structures | Macros | Typedefs | Functions | Variables
encoding.c File Reference
#include "ruby/internal/config.h"
#include <ctype.h>
#include "encindex.h"
#include "internal.h"
#include "internal/enc.h"
#include "internal/encoding.h"
#include "internal/inits.h"
#include "internal/load.h"
#include "internal/object.h"
#include "internal/string.h"
#include "internal/vm.h"
#include "regenc.h"
#include "ruby/encoding.h"
#include "ruby/util.h"
#include "ruby_assert.h"
#include "vm_sync.h"

Go to the source code of this file.

Data Structures

struct  rb_encoding_entry
 
struct  enc_table
 
struct  default_encoding
 

Macros

#define ENC_DEBUG   0
 
#define ENC_ASSERT(expr)   RUBY_ASSERT_WHEN(ENC_DEBUG, expr)
 
#define MUST_STRING(str)   (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)
 
#define DEFAULT_ENCODING_LIST_CAPA   128
 
#define GLOBAL_ENC_TABLE_ENTER(enc_table)   struct enc_table *enc_table = &global_enc_table; RB_VM_LOCK_ENTER()
 
#define GLOBAL_ENC_TABLE_LEAVE()   RB_VM_LOCK_LEAVE()
 
#define GLOBAL_ENC_TABLE_EVAL(enc_table, expr)
 
#define ENC_DUMMY_FLAG   (1<<24)
 
#define ENC_INDEX_MASK   (~(~0U<<24))
 
#define ENC_TO_ENCINDEX(enc)   (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
 
#define ENC_DUMMY_P(enc)   ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
 
#define ENC_SET_DUMMY(enc)   ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
 
#define ENCODING_COUNT   ENCINDEX_BUILTIN_MAX
 
#define UNSPECIFIED_ENCODING   INT_MAX
 
#define ENCODING_NAMELEN_MAX   63
 
#define valid_encoding_name_p(name)   ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
 
#define is_data_encoding(obj)   (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
 
#define is_obj_encoding(obj)   (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
 
#define ENC_REGISTER(enc)   enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
 
#define ENCDB_REGISTER(name, enc)   enc_register_at(enc_table, ENCINDEX_##enc, name, NULL)
 

Typedefs

typedef OnigEncodingType rb_raw_encoding
 

Functions

int rb_enc_register (const char *name, rb_encoding *encoding)
 
void rb_enc_set_base (const char *name, const char *orig)
 
int rb_enc_set_dummy (int index)
 
void rb_encdb_declare (const char *name)
 
int rb_encdb_replicate (const char *name, const char *orig)
 
int rb_encdb_dummy (const char *name)
 
int rb_encdb_alias (const char *alias, const char *orig)
 
void rb_encdb_set_unicode (int index)
 
int rb_data_is_encoding (VALUE obj)
 
VALUE rb_enc_from_encoding (rb_encoding *encoding)
 
int rb_enc_to_index (rb_encoding *enc)
 
int rb_enc_dummy_p (rb_encoding *enc)
 
int rb_to_encoding_index (VALUE enc)
 
rb_encodingrb_to_encoding (VALUE enc)
 
rb_encodingrb_find_encoding (VALUE enc)
 
rb_encodingrb_enc_from_index (int index)
 
int rb_enc_replicate (const char *name, rb_encoding *encoding)
 
int rb_define_dummy_encoding (const char *name)
 
int rb_enc_unicode_p (rb_encoding *enc)
 
int rb_enc_alias (const char *alias, const char *orig)
 
rb_encodingrb_enc_get_from_index (int index)
 
int rb_enc_autoload (rb_encoding *enc)
 
int rb_enc_find_index (const char *name)
 
int rb_enc_find_index2 (const char *name, long len)
 
rb_encodingrb_enc_find (const char *name)
 
int rb_enc_capable (VALUE obj)
 
ID rb_id_encoding (void)
 
int rb_enc_get_index (VALUE obj)
 
void rb_enc_set_index (VALUE obj, int idx)
 
VALUE rb_enc_associate_index (VALUE obj, int idx)
 
VALUE rb_enc_associate (VALUE obj, rb_encoding *enc)
 
rb_encodingrb_enc_get (VALUE obj)
 
rb_encodingrb_enc_check_str (VALUE str1, VALUE str2)
 
rb_encodingrb_enc_check (VALUE str1, VALUE str2)
 
rb_encodingrb_enc_compatible (VALUE str1, VALUE str2)
 
void rb_enc_copy (VALUE obj1, VALUE obj2)
 
VALUE rb_obj_encoding (VALUE obj)
 
int rb_enc_fast_mbclen (const char *p, const char *e, rb_encoding *enc)
 
int rb_enc_mbclen (const char *p, const char *e, rb_encoding *enc)
 
int rb_enc_precise_mbclen (const char *p, const char *e, rb_encoding *enc)
 
int rb_enc_ascget (const char *p, const char *e, int *len, rb_encoding *enc)
 
unsigned int rb_enc_codepoint_len (const char *p, const char *e, int *len_p, rb_encoding *enc)
 
unsigned int rb_enc_codepoint (const char *p, const char *e, rb_encoding *enc)
 
int rb_enc_codelen (int c, rb_encoding *enc)
 
int rb_enc_code_to_mbclen (int code, rb_encoding *enc)
 
int rb_enc_toupper (int c, rb_encoding *enc)
 
int rb_enc_tolower (int c, rb_encoding *enc)
 
rb_encodingrb_ascii8bit_encoding (void)
 
int rb_ascii8bit_encindex (void)
 
rb_encodingrb_utf8_encoding (void)
 
int rb_utf8_encindex (void)
 
rb_encodingrb_usascii_encoding (void)
 
int rb_usascii_encindex (void)
 
int rb_locale_charmap_index (void)
 
int rb_locale_encindex (void)
 
rb_encodingrb_locale_encoding (void)
 
int rb_filesystem_encindex (void)
 
rb_encodingrb_filesystem_encoding (void)
 
rb_encodingrb_default_external_encoding (void)
 
VALUE rb_enc_default_external (void)
 
void rb_enc_set_default_external (VALUE encoding)
 
rb_encodingrb_default_internal_encoding (void)
 
VALUE rb_enc_default_internal (void)
 
void rb_enc_set_default_internal (VALUE encoding)
 
void Init_Encoding (void)
 
void Init_encodings (void)
 
void rb_enc_foreach_name (int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
 

Variables

VALUE rb_cEncoding
 

Macro Definition Documentation

◆ DEFAULT_ENCODING_LIST_CAPA

#define DEFAULT_ENCODING_LIST_CAPA   128

Definition at line 59 of file encoding.c.

◆ ENC_ASSERT

#define ENC_ASSERT (   expr)    RUBY_ASSERT_WHEN(ENC_DEBUG, expr)

Definition at line 34 of file encoding.c.

◆ ENC_DEBUG

#define ENC_DEBUG   0

Definition at line 32 of file encoding.c.

◆ ENC_DUMMY_FLAG

#define ENC_DUMMY_FLAG   (1<<24)

Definition at line 91 of file encoding.c.

◆ ENC_DUMMY_P

#define ENC_DUMMY_P (   enc)    ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)

Definition at line 95 of file encoding.c.

◆ ENC_INDEX_MASK

#define ENC_INDEX_MASK   (~(~0U<<24))

Definition at line 92 of file encoding.c.

◆ ENC_REGISTER

#define ENC_REGISTER (   enc)    enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)

◆ ENC_SET_DUMMY

#define ENC_SET_DUMMY (   enc)    ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)

Definition at line 96 of file encoding.c.

◆ ENC_TO_ENCINDEX

#define ENC_TO_ENCINDEX (   enc)    (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)

Definition at line 94 of file encoding.c.

◆ ENCDB_REGISTER

#define ENCDB_REGISTER (   name,
  enc 
)    enc_register_at(enc_table, ENCINDEX_##enc, name, NULL)

◆ ENCODING_COUNT

#define ENCODING_COUNT   ENCINDEX_BUILTIN_MAX

Definition at line 98 of file encoding.c.

◆ ENCODING_NAMELEN_MAX

#define ENCODING_NAMELEN_MAX   63

Definition at line 101 of file encoding.c.

◆ GLOBAL_ENC_TABLE_ENTER

#define GLOBAL_ENC_TABLE_ENTER (   enc_table)    struct enc_table *enc_table = &global_enc_table; RB_VM_LOCK_ENTER()

Definition at line 80 of file encoding.c.

◆ GLOBAL_ENC_TABLE_EVAL

#define GLOBAL_ENC_TABLE_EVAL (   enc_table,
  expr 
)
Value:
do { \
GLOBAL_ENC_TABLE_ENTER(enc_table); \
{ \
expr; \
} \
GLOBAL_ENC_TABLE_LEAVE(); \
} while (0)

Definition at line 82 of file encoding.c.

◆ GLOBAL_ENC_TABLE_LEAVE

#define GLOBAL_ENC_TABLE_LEAVE ( )    RB_VM_LOCK_LEAVE()

Definition at line 81 of file encoding.c.

◆ is_data_encoding

#define is_data_encoding (   obj)    (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)

Definition at line 110 of file encoding.c.

◆ is_obj_encoding

#define is_obj_encoding (   obj)    (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))

Definition at line 111 of file encoding.c.

◆ MUST_STRING

#define MUST_STRING (   str)    (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)

Definition at line 35 of file encoding.c.

◆ UNSPECIFIED_ENCODING

#define UNSPECIFIED_ENCODING   INT_MAX

Definition at line 99 of file encoding.c.

◆ valid_encoding_name_p

#define valid_encoding_name_p (   name)    ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)

Definition at line 102 of file encoding.c.

Typedef Documentation

◆ rb_raw_encoding

Definition at line 41 of file encoding.c.

Function Documentation

◆ Init_Encoding()

void Init_Encoding ( void  )

◆ Init_encodings()

void Init_encodings ( void  )

Definition at line 2200 of file encoding.c.

◆ rb_ascii8bit_encindex()

int rb_ascii8bit_encindex ( void  )

Definition at line 1531 of file encoding.c.

References ENCINDEX_ASCII.

◆ rb_ascii8bit_encoding()

rb_encoding * rb_ascii8bit_encoding ( void  )

◆ rb_data_is_encoding()

int rb_data_is_encoding ( VALUE  obj)

Definition at line 114 of file encoding.c.

References is_data_encoding.

◆ rb_default_external_encoding()

rb_encoding * rb_default_external_encoding ( void  )

◆ rb_default_internal_encoding()

rb_encoding * rb_default_internal_encoding ( void  )

◆ rb_define_dummy_encoding()

int rb_define_dummy_encoding ( const char *  name)

◆ rb_enc_alias()

int rb_enc_alias ( const char *  alias,
const char *  orig 
)

Definition at line 720 of file encoding.c.

References alias, GLOBAL_ENC_TABLE_ENTER, GLOBAL_ENC_TABLE_LEAVE, and rb_enc_find_index().

◆ rb_enc_ascget()

int rb_enc_ascget ( const char *  p,
const char *  e,
int len,
rb_encoding enc 
)

◆ rb_enc_associate()

VALUE rb_enc_associate ( VALUE  obj,
rb_encoding enc 
)

◆ rb_enc_associate_index()

VALUE rb_enc_associate_index ( VALUE  obj,
int  idx 
)

◆ rb_enc_autoload()

int rb_enc_autoload ( rb_encoding enc)

Definition at line 867 of file encoding.c.

References rb_encoding_entry::enc, GLOBAL_ENC_TABLE_EVAL, and rb_enc_name.

Referenced by rb_enc_find_index(), and rb_enc_interned_str().

◆ rb_enc_capable()

int rb_enc_capable ( VALUE  obj)

Definition at line 941 of file encoding.c.

◆ rb_enc_check()

rb_encoding * rb_enc_check ( VALUE  str1,
VALUE  str2 
)

◆ rb_enc_check_str()

rb_encoding * rb_enc_check_str ( VALUE  str1,
VALUE  str2 
)

Definition at line 1078 of file encoding.c.

References rb_encoding_entry::enc, MUST_STRING, rb_eEncCompatError, rb_enc_get(), rb_enc_name, and rb_raise().

Referenced by rb_str_plus().

◆ rb_enc_code_to_mbclen()

int rb_enc_code_to_mbclen ( int  code,
rb_encoding enc 
)

Definition at line 1298 of file encoding.c.

References rb_encoding_entry::enc, and ONIGENC_CODE_TO_MBCLEN.

◆ rb_enc_codelen()

int rb_enc_codelen ( int  c,
rb_encoding enc 
)

◆ rb_enc_codepoint()

unsigned int rb_enc_codepoint ( const char *  p,
const char *  e,
rb_encoding enc 
)

Definition at line 1281 of file encoding.c.

References rb_encoding_entry::enc, and rb_enc_codepoint_len().

◆ rb_enc_codepoint_len()

unsigned int rb_enc_codepoint_len ( const char *  p,
const char *  e,
int len_p,
rb_encoding enc 
)

◆ rb_enc_compatible()

rb_encoding * rb_enc_compatible ( VALUE  str1,
VALUE  str2 
)

Definition at line 1172 of file encoding.c.

References rb_enc_from_index(), and rb_enc_get_index().

Referenced by rb_enc_check().

◆ rb_enc_copy()

void rb_enc_copy ( VALUE  obj1,
VALUE  obj2 
)

◆ rb_enc_default_external()

VALUE rb_enc_default_external ( void  )

Definition at line 1661 of file encoding.c.

References rb_default_external_encoding(), and rb_enc_from_encoding().

◆ rb_enc_default_internal()

VALUE rb_enc_default_internal ( void  )

Definition at line 1743 of file encoding.c.

References rb_default_internal_encoding(), and rb_enc_from_encoding().

◆ rb_enc_dummy_p()

int rb_enc_dummy_p ( rb_encoding enc)

Definition at line 203 of file encoding.c.

References rb_encoding_entry::enc, and ENC_DUMMY_P.

◆ rb_enc_fast_mbclen()

int rb_enc_fast_mbclen ( const char *  p,
const char *  e,
rb_encoding enc 
)

Definition at line 1212 of file encoding.c.

References rb_encoding_entry::enc, ONIGENC_MBC_ENC_LEN, and UChar.

◆ rb_enc_find()

rb_encoding * rb_enc_find ( const char *  name)

Definition at line 916 of file encoding.c.

References name, rb_enc_find_index(), and rb_enc_from_index().

◆ rb_enc_find_index()

int rb_enc_find_index ( const char *  name)

◆ rb_enc_find_index2()

int rb_enc_find_index2 ( const char *  name,
long  len 
)

Definition at line 905 of file encoding.c.

References ENCODING_NAMELEN_MAX, len, memcpy, name, and rb_enc_find_index().

◆ rb_enc_foreach_name()

void rb_enc_foreach_name ( int(*)(st_data_t name, st_data_t idx, st_data_t arg)  func,
st_data_t  arg 
)

Definition at line 2208 of file encoding.c.

References GLOBAL_ENC_TABLE_EVAL, and st_foreach.

Referenced by Init_w32_codepage().

◆ rb_enc_from_encoding()

VALUE rb_enc_from_encoding ( rb_encoding encoding)

◆ rb_enc_from_index()

rb_encoding * rb_enc_from_index ( int  index)

◆ rb_enc_get()

rb_encoding * rb_enc_get ( VALUE  obj)

◆ rb_enc_get_from_index()

rb_encoding * rb_enc_get_from_index ( int  index)

Definition at line 795 of file encoding.c.

◆ rb_enc_get_index()

int rb_enc_get_index ( VALUE  obj)

◆ rb_enc_mbclen()

int rb_enc_mbclen ( const char *  p,
const char *  e,
rb_encoding enc 
)

◆ rb_enc_precise_mbclen()

int rb_enc_precise_mbclen ( const char *  p,
const char *  e,
rb_encoding enc 
)

◆ rb_enc_register()

int rb_enc_register ( const char *  name,
rb_encoding encoding 
)

◆ rb_enc_replicate()

int rb_enc_replicate ( const char *  name,
rb_encoding encoding 
)

Definition at line 549 of file encoding.c.

References GLOBAL_ENC_TABLE_EVAL, and name.

◆ rb_enc_set_base()

void rb_enc_set_base ( const char *  name,
const char *  orig 
)

Definition at line 509 of file encoding.c.

References GLOBAL_ENC_TABLE_ENTER, GLOBAL_ENC_TABLE_LEAVE, name, and rb_enc_from_index().

◆ rb_enc_set_default_external()

void rb_enc_set_default_external ( VALUE  encoding)

Definition at line 1701 of file encoding.c.

References NIL_P, rb_eArgError, and rb_raise().

◆ rb_enc_set_default_internal()

void rb_enc_set_default_internal ( VALUE  encoding)

Definition at line 1784 of file encoding.c.

◆ rb_enc_set_dummy()

int rb_enc_set_dummy ( int  index)

Definition at line 524 of file encoding.c.

References rb_encoding_entry::enc, ENC_SET_DUMMY, and GLOBAL_ENC_TABLE_EVAL.

◆ rb_enc_set_index()

void rb_enc_set_index ( VALUE  obj,
int  idx 
)

Definition at line 1028 of file encoding.c.

References rb_check_frozen.

Referenced by rb_str_concat_literals().

◆ rb_enc_to_index()

int rb_enc_to_index ( rb_encoding enc)

◆ rb_enc_tolower()

int rb_enc_tolower ( int  c,
rb_encoding enc 
)

Definition at line 1310 of file encoding.c.

References ONIGENC_ASCII_CODE_TO_LOWER_CASE, and ONIGENC_IS_ASCII_CODE.

◆ rb_enc_toupper()

int rb_enc_toupper ( int  c,
rb_encoding enc 
)

Definition at line 1304 of file encoding.c.

References ONIGENC_ASCII_CODE_TO_UPPER_CASE, and ONIGENC_IS_ASCII_CODE.

Referenced by rb_str_format().

◆ rb_enc_unicode_p()

int rb_enc_unicode_p ( rb_encoding enc)

Definition at line 688 of file encoding.c.

References rb_encoding_entry::enc, and ONIGENC_IS_UNICODE.

Referenced by rb_str_escape(), and rb_str_inspect().

◆ rb_encdb_alias()

int rb_encdb_alias ( const char *  alias,
const char *  orig 
)

Definition at line 740 of file encoding.c.

References alias, GLOBAL_ENC_TABLE_ENTER, and GLOBAL_ENC_TABLE_LEAVE.

Referenced by Init_enc().

◆ rb_encdb_declare()

void rb_encdb_declare ( const char *  name)

Definition at line 473 of file encoding.c.

References GLOBAL_ENC_TABLE_ENTER, GLOBAL_ENC_TABLE_LEAVE, name, and rb_enc_from_index().

Referenced by Init_enc().

◆ rb_encdb_dummy()

int rb_encdb_dummy ( const char *  name)

◆ rb_encdb_replicate()

int rb_encdb_replicate ( const char *  name,
const char *  orig 
)

Definition at line 596 of file encoding.c.

References GLOBAL_ENC_TABLE_ENTER, GLOBAL_ENC_TABLE_LEAVE, name, and rb_enc_from_index().

◆ rb_encdb_set_unicode()

void rb_encdb_set_unicode ( int  index)

Definition at line 759 of file encoding.c.

References ONIGENC_FLAG_UNICODE, and rb_enc_from_index().

◆ rb_filesystem_encindex()

int rb_filesystem_encindex ( void  )

Definition at line 1589 of file encoding.c.

References ENCINDEX_ASCII, and GLOBAL_ENC_TABLE_EVAL.

Referenced by rb_filesystem_encoding(), and rb_str_encode_ospath().

◆ rb_filesystem_encoding()

rb_encoding * rb_filesystem_encoding ( void  )

◆ rb_find_encoding()

rb_encoding * rb_find_encoding ( VALUE  enc)

Definition at line 336 of file encoding.c.

References rb_encoding_entry::enc, NULL, rb_enc_from_index(), and RDATA.

◆ rb_id_encoding()

ID rb_id_encoding ( void  )

Definition at line 947 of file encoding.c.

References CONST_ID.

Referenced by Init_IO().

◆ rb_locale_charmap_index()

int rb_locale_charmap_index ( void  )

Definition at line 109 of file localeinit.c.

References ENCINDEX_US_ASCII.

Referenced by rb_locale_encindex().

◆ rb_locale_encindex()

int rb_locale_encindex ( void  )

◆ rb_locale_encoding()

rb_encoding * rb_locale_encoding ( void  )

◆ rb_obj_encoding()

VALUE rb_obj_encoding ( VALUE  obj)

Definition at line 1202 of file encoding.c.

References ENC_INDEX_MASK, rb_enc_get_index(), rb_eTypeError, and rb_raise().

Referenced by Init_Regexp(), and Init_String().

◆ rb_to_encoding()

rb_encoding * rb_to_encoding ( VALUE  enc)

Definition at line 329 of file encoding.c.

References rb_encoding_entry::enc, and RDATA.

Referenced by rb_io_extract_encoding_option().

◆ rb_to_encoding_index()

int rb_to_encoding_index ( VALUE  enc)

◆ rb_usascii_encindex()

int rb_usascii_encindex ( void  )

Definition at line 1555 of file encoding.c.

References ENCINDEX_US_ASCII.

◆ rb_usascii_encoding()

rb_encoding * rb_usascii_encoding ( void  )

◆ rb_utf8_encindex()

int rb_utf8_encindex ( void  )

Definition at line 1543 of file encoding.c.

References ENCINDEX_UTF_8.

◆ rb_utf8_encoding()

rb_encoding * rb_utf8_encoding ( void  )

Variable Documentation

◆ rb_cEncoding

VALUE rb_cEncoding

Definition at line 57 of file encoding.c.

Referenced by Init_Encoding(), Init_Exception(), and InitVM_transcode().