[m17n] String#chop & String#succ

e$B$J$+$@$G$9!#e(B

http://dontstopmusic.no-ip.org/diary/20070827.html#p02 e$B$G;XE&$5e(B
e$B$l$F$$$ke(Bchope$B$He(Bsucce$B$K4X$9$k%Q%C%A$G$9!#e(B

succe$B$K4X$7$F$O!"1Q?t;z0J30$OC1=c$KJ8;z%3!<%I=g$K$:$i$7$F$$$^$9$,!"e(B
e$BK\Mh$J$i$R$i$,$J$H$+%+%?%+%J$H$+$NCf$G7+$j>e$2$r$7$?$[$&$,$$$$$Ne(B
e$B$+$b$7$l$^$;$s!#$?$V$s0J2<$N$h$&$J5!G=$,I,MW$K$J$k$+$H;W$$$^$9!#e(B

1 scripte$B$rD4$Y$ke(B
2 e$B=g=xIU2DG=$+D4$Y$ke(B
3 e$B$“$ke(Bscripte$BCf$G$N<!$NJ8;z$rF@$ke(B
4 e$B$”$ke(Bscripte$BCf$G$N:G=i$NJ8;z$rF@$ke(B

Index: string.c

— string.c (revision 13339)
+++ string.c (working copy)
@@ -1585,4 +1585,45 @@ succ_char(char *s)
}

+static int
+enc_succ_char(unsigned int c, char *s, rb_encoding *enc)
+{

  • unsigned int cs;
  • /* numerics */
  • if (rb_enc_isdigit(c, enc)) {
  • cs = c++;
  • if (rb_enc_isdigit(c, enc)) {
  •  rb_enc_mbcput(c, s, enc);
    
  •  return 0;
    
  • }
  • do c = cs–; while (rb_enc_isdigit(cs, enc));
  • rb_enc_mbcput(c, s, enc);
  • return ++c;
  • }
  • /* small alphabets */
  • if (rb_enc_islower(c, enc)) {
  • cs = c++;
  • if (rb_enc_islower(c, enc)) {
  •  rb_enc_mbcput(c, s, enc);
    
  •  return 0;
    
  • }
  • do c = cs–; while (rb_enc_islower(cs, enc));
  • rb_enc_mbcput(c, s, enc);
  • return c;
  • }
  • /* capital alphabets */
  • if (rb_enc_isupper(c, enc)) {
  • cs = c++;
  • if (rb_enc_isupper(c, enc)) {
  •  rb_enc_mbcput(c, s, enc);
    
  •  return 0;
    
  • }
  • do c = cs–; while (rb_enc_isupper(cs, enc));
  • rb_enc_mbcput(c, s, enc);
  • return c;
  • }
  • return -1;
    +}

/*
@@ -1618,36 +1659,49 @@ rb_str_succ(VALUE orig)
char *sbeg, *s, *e;
int c = -1;

  • long n = 0;
  • long n = 0, o = 0, l;

  • char carry[ONIGENC_CODE_TO_MBC_MAXLEN];

    str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));

  • rb_enc_copy(str, orig);
    OBJ_INFECT(str, orig);
    if (RSTRING_LEN(str) == 0) return str;

    enc = rb_enc_get(orig);

  • sbeg = RSTRING_PTR(str); s = sbeg + RSTRING_LEN(str) - 1;
  • e = RSTRING_END(str);
  • sbeg = RSTRING_PTR(str);
  • s = e = sbeg + RSTRING_LEN(str);
  • while (sbeg <= s) {
  • while (sbeg <= (s = rb_enc_prev_char(sbeg, s, enc))) {
    unsigned int cc = rb_enc_codepoint(s, e, enc);
    if (rb_enc_isalnum(cc, enc)) {
  •  if ((c = succ_char(s)) == 0) break;
    
  •  if (isascii(cc)) {
    
  • if ((c = succ_char(s)) == 0) break;
  •  }
    
  •  else {
    
  • if ((c = enc_succ_char(cc, s, enc)) == 0) break;
  •  }
     n = s - sbeg;
    
    }
  • s–;
    }
    if (c == -1) { /* str contains no alnum */
  • sbeg = RSTRING_PTR(str); s = sbeg + RSTRING_LEN(str) - 1;
    c = ‘\001’;
  • while (sbeg <= s) {
  •  if ((*s += 1) != 0) break;
    
  •  s--;
    
  • s = e;
  • while (sbeg <= (s = rb_enc_prev_char(sbeg, e, enc))) {
  •  unsigned int cc = rb_enc_codepoint(s, e, enc) + 1;
    
  •  l = rb_enc_mbcput(cc, carry, enc);
    
  •  if (l > 0) {
    
  • if (l == (o = e - s)) goto overlay;
  • n = s - sbeg;
  • goto insert;
  •  }
    
    }
    }
  • if (s < sbeg) {
  • RESIZE_CAPA(str, RSTRING_LEN(str) + 1);
  • if (s < sbeg && (l = rb_enc_mbcput(c, carry, enc)) > 0) {
  •  insert:
    
  • RESIZE_CAPA(str, RSTRING_LEN(str) + l - o);
    s = RSTRING_PTR(str) + n;
  • memmove(s+1, s, RSTRING_LEN(str) - n);
  • *s = c;
  • STR_SET_LEN(str, RSTRING_LEN(str) + 1);
  • memmove(s + l, s + o, RSTRING_LEN(str) - n - o);
  •  overlay:
    
  • memmove(s, carry, l);
  • STR_SET_LEN(str, RSTRING_LEN(str) + l - o);
    RSTRING_PTR(str)[RSTRING_LEN(str)] = ‘\0’;
    }
    @@ -4041,4 +4095,22 @@ rb_str_each_char(VALUE str)
    }

+static long
+chopped_length(VALUE str)
+{

  • rb_encoding *enc = rb_enc_get(str);
  • const char *p, *p2, *beg, *end;
  • beg = RSTRING_PTR(str);
  • end = beg + RSTRING_LEN(str);
  • if (beg > end) return 0;
  • p = rb_enc_prev_char(beg, end, enc);
  • if (!p) return 0;
  • if (p > beg && rb_enc_codepoint(p, end, enc) == ‘\n’) {
  • p2 = rb_enc_prev_char(beg, p, enc);
  • if (p2 && rb_enc_codepoint(p2, end, enc) == ‘\r’) p = p2;
  • }
  • return p - beg;
    +}

/*

  • call-seq:
    @@ -4054,13 +4126,9 @@ rb_str_chop_bang(VALUE str)
    {
    if (RSTRING_LEN(str) > 0) {
  • long len;
    rb_str_modify(str);
  • STR_DEC_LEN(str);
  • if (RSTRING_PTR(str)[RSTRING_LEN(str)] == ‘\n’) {
  •  if (RSTRING_LEN(str) > 0 &&
    
  • RSTRING_PTR(str)[RSTRING_LEN(str)-1] == ‘\r’) {
  • STR_DEC_LEN(str);
  •  }
    
  • }
  • RSTRING_PTR(str)[RSTRING_LEN(str)] = ‘\0’;
  • len = chopped_length(str);
  • STR_SET_LEN(str, len);
  • RSTRING_PTR(str)[len] = ‘\0’;
    return str;
    }
    @@ -4089,7 +4157,8 @@ static VALUE
    rb_str_chop(VALUE str)
    {
  • str = rb_str_dup(str);
  • rb_str_chop_bang(str);
  • return str;
  • VALUE str2 = rb_str_new5(str, RSTRING_PTR(str),
    chopped_length(str));
  • rb_enc_copy(str2, str);
  • OBJ_INFECT(str2, str);
  • return str2;
    }

Index: include/ruby/encoding.h

— include/ruby/encoding.h (revision 13339)
+++ include/ruby/encoding.h (working copy)
@@ -63,5 +63,5 @@ int rb_enc_codelen(int, rb_encoding*);

/* ptr, ptr, encoding → prev_char /
-#define rb_enc_prev_char(s,p,enc)
onigenc_get_prev_char_head(enc,(UChar
)s,(UChar*)p)
+#define rb_enc_prev_char(s,p,enc) (char
)onigenc_get_prev_char_head(enc,(UChar)s,(UChar*)p)

#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31734] [m17n] String#chop & String#succ”
on Wed, 5 Sep 2007 16:25:46 +0900, Nobuyoshi N.
[email protected] writes:

|http://dontstopmusic.no-ip.org/diary/20070827.html#p02 e$B$G;XE&$5e(B
|e$B$l$F$$$ke(Bchope$B$He(Bsucce$B$K4X$9$k%Q%C%A$G$9!#e(B

e$B%3%_%C%H$7$F$/$@$5$$!#e(B

|succe$B$K4X$7$F$O!“1Q?t;z0J30$OC1=c$KJ8;z%3!<%I=g$K$:$i$7$F$$$^$9$,!“e(B
|e$BK\Mh$J$i$R$i$,$J$H$+%+%?%+%J$H$+$NCf$G7+$j>e$2$r$7$?$[$&$,$$$$$Ne(B
|e$B$+$b$7$l$^$;$s!#$?$V$s0J2<$N$h$&$J5!G=$,I,MW$K$J$k$+$H;W$$$^$9!#e(B
|
|1 scripte$B$rD4$Y$ke(B
|2 e$B=g=xIU2DG=$+D4$Y$ke(B
|3 e$B$”$ke(Bscripte$BCf$G$N<!$NJ8;z$rF@$ke(B
|4 e$B$”$ke(Bscripte$BCf$G$N:G=i$NJ8;z$rF@$ke(B

1e$B$O$9$G$K$“$k$h$&$G$9$,!”$=$l0J30$O?7@_$9$kI,MW$,$"$j$^$9$M!#e(B
e$B$7$+$7!"e(Bsucce$B$4$H$-$N$?$a$Ke(Bpremitivee$B$rA}$d$9$N$O;?@.$7$+$M$^$9!#e(B
e$B!Ve(Binvalide$B$JJ8;z$r@8@.$7$J$$!W$H$$$&%A%'%C%/$@$1$G=<J,$G$O$Je(B
e$B$$$G$7$g$&$+!#e(B