e$B$J$+$@$G$9!#e(B
-- coding: euc-jp -- e$B$J$I$H$7$F$*$/$H!"$9$Y$F$NJ8;zNs%j%F%i%ke(B
e$B$Ne(Bencodinge$B$,e(B"EUC-JP"e$B$K$J$C$F$$$^$9$,!"e(BASCIIe$B$N$_$+$i$J$kJ8;zNs$Ne(B
e$B>l9g$Oe(BUS-ASCIIe$B$N$^$^$N$[$&$,JXMx$G$O$J$$$G$7$g$&$+!#e(B
Index: encoding.c
— encoding.c (revision 13339)
+++ encoding.c (working copy)
@@ -153,17 +153,13 @@ rb_enc_check(VALUE str1, VALUE str2)
if (idx1 == 0) {
enc = rb_enc_from_index(idx2);
-#if 0
- if (m17n_asciicompat(enc)) {
- if (rb_enc_asciicompat(enc)) {
return enc;
}
-#endif
}
else if (idx2 == 0) {
enc = rb_enc_from_index(idx1);
-#if 0
- if (m17n_asciicompat(enc)) {
- if (rb_enc_asciicompat(enc)) {
return enc;
}
-#endif
}
rb_raise(rb_eArgError, “character encodings differ”);
Index: parse.y
===================================================================
— parse.y (revision 13339)
+++ parse.y (working copy)
@@ -264,4 +264,6 @@ struct parser_params {
#define STR_NEW(p,n) rb_enc_str_new(§,(n),parser->enc)
#define STR_NEW2§ rb_enc_str_new(§,strlen§,parser->enc)
+#define STR_NEW3(p,n,m) rb_enc_str_new(§,(n), STR_ENC(m))
+#define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
#ifdef YYMALLOC
@@ -3882,5 +3884,5 @@ dsym : tSYMBEG xstring_contents tSTRING
break;
}
-
$$->nd_lit = ID2SYM(rb_intern2(RSTRING_PTR(lit),
RSTRING_LEN(lit)));
-
$$->nd_lit = ID2SYM(rb_intern_str(lit)); nd_set_type($$, NODE_LIT); break;
@@ -4474,5 +4476,5 @@ none : /* none */
static int parser_regx_options(struct parser_params*);
-static int parser_tokadd_string(struct
parser_params*,int,int,int,long*);
+static int parser_tokadd_string(struct
parser_params*,int,int,int,long*,int*);
static int parser_parse_string(struct parser_params*,NODE*);
static int parser_here_document(struct parser_params*,NODE*);
@@ -4485,5 +4487,5 @@ static int parser_here_document(struct p
define tokadd_escape(t) parser_tokadd_escape(parser, t)
define regx_options() parser_regx_options(parser)
-# define tokadd_string(f,t,p,n) parser_tokadd_string(parser,f,t,p,n)
+# define tokadd_string(f,t,p,n,m)
parser_tokadd_string(parser,f,t,p,n,m)
define parse_string(n) parser_parse_string(parser,n)
define here_document(n) parser_here_document(parser,n)
@@ -5132,13 +5134,22 @@ dispose_string(VALUE str)
}
+static void
+parser_tokadd_mbchar(struct parser_params *parser, int c)
+{
- int len = parser_mbclen();
- do {
- tokadd©;
- } while (–len > 0 && (c = nextc()) != -1);
+}
+#define tokadd_mbchar© parser_tokadd_mbchar(parser, c)
+
static int
parser_tokadd_string(struct parser_params *parser,
- int func, int term, int paren, long *nest)
-
int func, int term, int paren, long *nest, int *mb)
{
int c;
-
unsigned char uc;
while ((c = nextc()) != -1) {
-
uc = (unsigned char)c;
if (paren && c == paren) {
++*nest;
@@ -5192,10 +5203,7 @@ parser_tokadd_string(struct parser_param
}
else if (parser_ismbchar()) { -
int i, len = parser_mbclen()-1;
-
for (i = 0; i < len; i++) {
-
tokadd©;
-
c = nextc();
-
}
-
tokadd_mbchar(c);
-
if (mb) *mb = 1;
-
}continue;
else if ((func & STR_FUNC_QWORDS) && ISSPACE©) {
@@ -5222,5 +5230,5 @@ parser_parse_string(struct parser_params
int term = nd_term(quote);
int paren = nd_paren(quote);
- int c, space = 0;
-
int c, space = 0, mb = 0;
if (func == -1) return tSTRING_END;
@@ -5256,5 +5264,5 @@ parser_parse_string(struct parser_params
}
pushback©;
- if (tokadd_string(func, term, paren, "e->nd_nest) == -1) {
-
if (tokadd_string(func, term, paren, "e->nd_nest, &mb) == -1) {
if (func & STR_FUNC_REGEXP) {
ruby_sourceline = nd_line(quote);
@@ -5270,5 +5278,5 @@ parser_parse_string(struct parser_paramstokfix();
- set_yylval_str(STR_NEW(tok(), toklen()));
- set_yylval_str(STR_NEW3(tok(), toklen(), mb));
return tSTRING_CONTENT;
}
@@ -5433,4 +5441,5 @@ parser_here_document(struct parser_param
}
else { - int mb = 0;
newtok();
if (c == ‘#’) {
@@ -5447,7 +5456,7 @@ parser_here_document(struct parser_param
do {
pushback©;
-
if ((c = tokadd_string(func, '\n', 0, NULL)) == -1) goto error;
-
if ((c = tokadd_string(func, '\n', 0, NULL, &mb)) == -1) goto
error;
if (c != ‘\n’) {
-
set_yylval_str(STR_NEW(tok(), toklen()));
-
return tSTRING_CONTENT;set_yylval_str(STR_NEW3(tok(), toklen(), mb));
}
@@ -5455,5 +5464,5 @@ parser_here_document(struct parser_param
if ((c = nextc()) == -1) goto error;
} while (!whole_match_p(eos, len, indent));
- str = STR_NEW(tok(), toklen());
- str = STR_NEW3(tok(), toklen(), mb);
}
heredoc_restore(lex_strterm);
@@ -5669,4 +5678,5 @@ parser_yylex(struct parser_params *parse
int cmd_state;
enum lex_state_e last_state; - int mb;
#ifdef RIPPER
int fallthru = Qfalse;
@@ -5987,11 +5997,5 @@ parser_yylex(struct parser_params *parse
newtok();
if (parser_ismbchar()) {
-
int i, len = parser_mbclen()-1;
-
tokadd(c);
-
for (i = 0; i < len; i++) {
- c = nextc();
- tokadd©;
-
}
-
}tokadd_mbchar(c);
else if ((rb_enc_isalnum(c, parser->enc) || c == ‘_’) &&
@@ -6678,5 +6682,5 @@ parser_yylex(struct parser_params *parse
c = nextc();
if (parser_is_identchar()) {
- tokadd©;
-
tokadd_mbchar©;
}
else {
@@ -6776,13 +6780,8 @@ parser_yylex(struct parser_params *parse
} -
mb = 0;
do {
-
int i, len;
- tokadd©;
- len = parser_mbclen()-1;
-
for (i = 0; i < len; i++) {
-
c = nextc();
-
tokadd(c);
- }
- if (!ISASCII©) mb = 1;
- tokadd_mbchar©;
c = nextc();
} while (parser_is_identchar());
@@ -6836,5 +6835,5 @@ parser_yylex(struct parser_params *parse
}
-
if (lex_state != EXPR_DOT) {
-
const struct kwtable *kw;if (!mb && lex_state != EXPR_DOT) {
@@ -6878,5 +6877,5 @@ parser_yylex(struct parser_params *parse
lex_state = EXPR_BEG;
nextc();
-
set_yylval_id(rb_intern(tok()));
-
}set_yylval_id(rb_intern3(tok(), toklen(), STR_ENC(mb))); return tLABEL;
@@ -6897,5 +6896,5 @@ parser_yylex(struct parser_params *parse
}
{
-
ID ident = rb_intern(tok());
-
ID ident = rb_intern3(tok(), toklen(), STR_ENC(mb)); set_yylval_id(ident);
@@ -8353,7 +8352,12 @@ int
rb_symname_p(const char *name)
{
- return rb_enc_symname_p(name, rb_enc_from_index(0));
+}
+int
+rb_enc_symname_p(const char *name, rb_encoding *enc)
+{
const char *m = name;
int localid = Qfalse;
-
rb_encoding *enc = rb_enc_from_index(0);
if (!m) return Qfalse;
@@ -8437,6 +8441,8 @@ rb_intern3(const char *name, long len, r
fake_str.as.heap.ptr = (char *)name;
fake_str.as.heap.aux.capa = len;
- str = (VALUE)&fake_str;
- rb_enc_associate(str, enc);
- if (st_lookup(global_symbols.sym_id, (st_data_t)&fake_str,
(st_data_t *)&id))
- if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
return id;
@@ -8499,5 +8505,5 @@ rb_intern3(const char *name, long len, r
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:
- str = rb_str_new(name, len);
- str = rb_enc_str_new(name, len, enc);
OBJ_FREEZE(str);
st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
Index: string.c
===================================================================
— string.c (revision 13339)
+++ string.c (working copy)
@@ -1130,5 +1130,6 @@ int
rb_str_hash(VALUE str)
{
- return rb_memhash(RSTRING_PTR(str), RSTRING_LEN(str));
- return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str),
- rb_enc_get_index(str));
}
@@ -1149,4 +1150,30 @@ rb_str_hash_m(VALUE str)
#define lesser(a,b) (((a)>(b))?(b):(a))
+static int
+is_ascii_string(VALUE str)
+{
- long i;
- for (i = 0; i < RSTRING_LEN(str); ++i) {
- int c = (unsigned char)RSTRING_PTR(str)[i];
- if (!ISASCII©) return Qfalse;
- }
- return Qtrue;
+}
+int
+rb_str_comparable(VALUE str1, VALUE str2)
+{
- int idx1 = rb_enc_get_index(str1);
- int idx2 = rb_enc_get_index(str2);
- if (idx1 == idx2) return Qtrue;
- if (!rb_enc_asciicompat(idx1)) return Qfalse;
- if (!rb_enc_asciicompat(idx2)) return Qfalse;
- if (!is_ascii_string(str1)) return Qfalse;
- if (!is_ascii_string(str2)) return Qfalse;
- return Qtrue;
+}
int
rb_str_cmp(VALUE str1, VALUE str2)
@@ -1177,5 +1204,5 @@ rb_str_cmp(VALUE str1, VALUE str2)
*/
-static VALUE
+VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
@@ -1187,5 +1214,5 @@ rb_str_equal(VALUE str1, VALUE str2)
return rb_equal(str2, str1);
}
- rb_enc_check(str1, str2); /* need weak check */
-
if (!rb_str_comparable(str1, str2)) return Qfalse;
if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
rb_str_cmp(str1, str2) == 0) {
@@ -1208,4 +1235,7 @@ rb_str_eql(VALUE str1, VALUE str2)
return Qfalse; -
if (rb_enc_get_index(str1) != rb_enc_get_index(str2))
-
return Qfalse;
-
if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2),
lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0)
@@ -5031,11 +5061,13 @@ sym_inspect(VALUE sym)
VALUE str, klass = Qundef;
ID id = SYM2ID(sym); -
rb_encoding *enc;
sym = rb_id2str(id);
- str = rb_str_new(0, RSTRING_LEN(sym)+1);
- enc = rb_enc_get(sym);
- str = rb_enc_str_new(0, RSTRING_LEN(sym)+1, enc);
RSTRING_PTR(str)[0] = ‘:’;
memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym));
if (RSTRING_LEN(sym) != strlen(RSTRING_PTR(sym)) ||
- !rb_symname_p(RSTRING_PTR(sym))) {
- !rb_enc_symname_p(RSTRING_PTR(sym), enc)) {
str = rb_str_dump(str);
strncpy(RSTRING_PTR(str), “:”", 2);
Index: vm.c
===================================================================
— vm.c (revision 13339)
+++ vm.c (working copy)
@@ -13,4 +13,5 @@
#include “ruby/node.h”
#include “ruby/st.h”
+#include “ruby/encoding.h”
#include “gc.h”
Index: insns.def
— insns.def (revision 13339)
+++ insns.def (working copy)
@@ -1701,5 +1701,6 @@ opt_eq
val = Qtrue;
}
-
else if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
-
else if (!ENCODING_GET(str1) && !ENCODING_GET(str2) &&
-
RSTRING_LEN(str1) == RSTRING_LEN(str2) && rb_memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), RSTRING_LEN(str1)) == 0) {
@@ -1707,5 +1708,5 @@ opt_eq
}
else {
- val = Qfalse;
- val = rb_str_equal(str1, str2);
}
}
Index: include/ruby/encoding.h
===================================================================
— include/ruby/encoding.h (revision 13339)
+++ include/ruby/encoding.h (working copy)
@@ -28,4 +28,5 @@ typedef OnigEncodingType rb_encoding;
int rb_enc_to_index(rb_encoding*);
+int rb_enc_get_index(VALUE obj);
rb_encoding* rb_enc_get(VALUE);
rb_encoding* rb_enc_check(VALUE,VALUE);
@@ -74,7 +75,10 @@ int rb_enc_codelen(int, rb_encoding*);
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
+#define rb_enc_asciicompat(enc) ((enc)!=0)
+
int rb_enc_toupper(int c, rb_encoding enc);
int rb_enc_tolower(int c, rb_encoding enc);
ID rb_intern3(const char, long, rb_encoding);
+int rb_enc_symname_p(const char*, rb_encoding*);
#endif /* RUBY_ENCODING_H */
Index: include/ruby/intern.h
— include/ruby/intern.h (revision 13339)
+++ include/ruby/intern.h (working copy)
@@ -514,5 +514,7 @@ VALUE rb_str_concat(VALUE, VALUE);
int rb_memhash(const void *ptr, long len);
int rb_str_hash(VALUE);
+int rb_str_comparable(VALUE, VALUE);
int rb_str_cmp(VALUE, VALUE);
+VALUE rb_str_equal(VALUE str1, VALUE str2);
void rb_str_update(VALUE, long, long, VALUE);
VALUE rb_str_inspect(VALUE);