e$B$J$+$@$G$9!#e(B
At Wed, 17 Mar 2010 22:46:43 +0900,
Yusuke E. wrote in [ruby-dev:40687]:
- File.open e$BJ}<0$G7hDj$9$ke(B
#2 e$B$NA*Br;h$O<B8=2DG=@-$,8!>Z$5$l$F$$$J$$5$$,$9$k$N$G!"!V%Q%C%A$re(B
e$B=q$$$F$_$?$i<B$OFq$7$$$3$H$,$o$+$C$?e(B e$B"*e(B 1.9.2 e$B8+Aw$j!W$H$$$&4m81$,e(B
e$B$"$k$+$b$7$l$^$;$s!#e(B
e$BJL$KFq$7$/$O$"$j$^$;$s!#e(B
$ ./ruby -Eus-ascii -e ‘ARGV.each{|file|open(file,
“r:magic-comment”){|f|p [f.path, f.external_encoding]}}’ version.h
lib/rexml/rexml.rb lib/rubygems/package.rb
[“version.h”, #Encoding:US-ASCII]
[“lib/rexml/rexml.rb”, #Encoding:UTF-8]
[“lib/rubygems/package.rb”, #Encoding:ISO-8859-1]
diff --git c/include/ruby/io.h i/include/ruby/io.h
index e05a0f5…f067831 100644
— c/include/ruby/io.h
+++ i/include/ruby/io.h
@@ -96,4 +96,5 @@ typedef struct rb_io_t {
/* #define FMODE_PREP 0x00010000 */
#define FMODE_SETENC_BY_BOM 0x00100000
+#define FMODE_SETENC_BY_MAGIC_COMMENT 0x00200000
#define GetOpenFile(obj,fp) rb_io_check_closed((fp) =
RFILE(rb_io_taint_check(obj))->fptr)
diff --git c/io.c i/io.c
index 60afd6c…60761f0 100644
— c/io.c
+++ i/io.c
@@ -4125,4 +4125,6 @@ rb_io_ext_int_to_encs(rb_encoding *ext,
rb_encoding *intern, rb_encoding **enc,
}
+#define is_magic_comment(str) (STRCASECMP(str, “magic-comment”) == 0)
+
static void
parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding
**enc2_p)
@@ -4166,5 +4168,5 @@ parse_mode_enc(const char *estr, rb_encoding
**enc_p, rb_encoding **enc2_p)
ext_enc = rb_enc_from_index(idx);
else {
- if (idx != -2 && !is_magic_comment(estr))
rb_warn(“Unsupported encoding %s ignored”, estr);
ext_enc = NULL;
@@ -4337,6 +4339,11 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE
*vperm_p, VALUE opthash,
has_enc = 1;
parse_mode_enc(p+1, &enc, &enc2);
-static void
-io_set_encoding_by_bom(VALUE io)
+int rb_magic_comment_encoding(const char *str, long len);
+
+static int
+io_parse_encoding_comment(VALUE io)
{
- int idx = io_strip_bom(io);
- if (idx) {
+static void
+io_guess_encoding(VALUE io, int fmode)
+{
- int idx;
- if (((fmode & FMODE_SETENC_BY_BOM) &&
- (idx = io_strip_bom(io)) != 0) ||
- ((fmode & FMODE_SETENC_BY_MAGIC_COMMENT) &&
- (idx = io_parse_encoding_comment(io)) != 0)) {
rb_io_t *fptr;
GetOpenFile(io, fptr);
@@ -4638,5 +4679,5 @@ rb_file_open_generic(VALUE io, VALUE filename, int
oflags, int fmode, convconfig
fptr->fd = rb_sysopen(fptr->pathv, oflags, perm);
io_check_tty(fptr);
- if (fmode & FMODE_SETENC_BY_BOM) io_set_encoding_by_bom(io);
-
io_guess_encoding(io, fmode);
return io;
@@ -6396,5 +6437,5 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io)
fp->stdio_file = stderr;
- if (fmode & FMODE_SETENC_BY_BOM) io_set_encoding_by_bom(io);
- io_guess_encoding(io, fmode);
return io;
}
diff --git c/parse.y i/parse.y
index 340a825…a42c8f6 100644
— c/parse.y
+++ i/parse.y
@@ -6248,13 +6248,15 @@ magic_comment_marker(const char *str, long len)
}
+typedef int rb_magic_comment_func(const char *name, long nlen, const
char *value, long vlen, void *arg);
+
static int
-parser_magic_comment(struct parser_params *parser, const char *str,
long len)
+parse_magic_comment(const char *str, long len, rb_magic_comment_func
*func, void *arg)
{
- VALUE name = 0;
const char *beg, *end, *vbeg, *vend;
#define str_copy(_s, _p, _n) ((_s)
? (rb_str_resize((_s), (_n)),
MEMCPY(RSTRING_PTR(_s), (_p), char, (_n)), (_s)) \
- : ((_s) = STR_NEW((_p), (_n))))
-
: ((_s) = rb_str_new((_p), (_n))))
if (len <= 7) return FALSE;
@@ -6266,7 +6268,4 @@ parser_magic_comment(struct parser_params parser,
const char str, long len)
/
%r"([^\s’":;]+)\s:\s*("(?:\\.|[^"])"|[^"\s;]+)[\s;]" */
while (len > 0) {
-#ifndef RIPPER
- const struct magic_comment *p = magic_comments;
-#endif
char *s;
int i;
@@ -6321,24 +6320,68 @@ parser_magic_comment(struct parser_params
*parser, const char *str, long len)
if (s[i] == ‘-’) s[i] = ‘_’;
}
- if ((*func)(s, n, vbeg, vend - vbeg, arg)) break;
- }
-
- return TRUE;
+}
-
+static int
+magic_comment_i(const char *name, long nlen, const char *value, long
vlen, void *arg)
+{
- struct parser_params *parser = arg;
#ifndef RIPPER
- do {
-
if (STRNCASECMP(p->name, s, n) == 0) {
- n = vend - vbeg;
- if (p->length) {
-
n = (*p->length)(parser, vbeg, n);
- }
- str_copy(val, vbeg, n);
- (*p->func)(parser, s, RSTRING_PTR(val));
- break;
- const struct magic_comment *p = magic_comments;
- do {
- if (STRNCASECMP(p->name, name, nlen) == 0) {
-
char *val;
-
if (p->length) {
- vlen = (*p->length)(parser, value, vlen);
}
- } while (++p < magic_comments + numberof(magic_comments));
-
val = ALLOCA_N(char, vlen + 1);
-
memcpy(val, value, vlen);
-
val[vlen] = '\0';
-
(*p->func)(parser, name, val);
-
break;
- }
- } while (++p < magic_comments + numberof(magic_comments));
#else
- dispatch2(magic_comment, name, val);
- dispatch2(magic_comment, name, val);
#endif
+static int
+parser_magic_comment(struct parser_params *parser, const char *str,
long len)
+{
- return parse_magic_comment(str, len, magic_comment_i, (void
*)parser);
+}
-
+static int
+find_magic_comment_encoding(const char *name, long nlen, const char
*value, long vlen, void *arg)
+{
- char *val;
- switch (nlen) {
-
case 8:
- if (STRNCASECMP(“en”, name, 2) != 0) return FALSE;
- name += 2;
-
case 6:
- if (STRNCASECMP(“coding”, name, 6) != 0) return FALSE;
- }
- vlen = parser_encode_length(0, value, vlen);
- memcpy(val = ALLOCA_N(char, vlen + 1), value, vlen);
- val[vlen] = ‘\0’;
- *(int *)arg = rb_enc_find_index(val);
return TRUE;
}
+int
+rb_magic_comment_encoding(const char *str, long len)
+{
- int idx = 0;
- if (!parse_magic_comment(str, len, find_magic_comment_encoding,
&idx)) return 0;
- return idx;
+}
-
static void
set_file_encoding(struct parser_params *parser, const char *str, const
char *send)