enumerable $B$+$i(B hash $B$r@8@.$9$k%a%=%C%I$H$7$F(B
Enumerable#categorize $B$rDI2C$9$k$N$O$I$&$G$7$g$&$+!#(B
$B$^$@(B Ruby $B$G;n$7$K<BAu$7$?CJ3,$G!“(BC
$B$G$O<BAu$7$F$$$J$$$N$G!”$9$0$K$H$$$&OC$G$O$"$j$^$;$s$,!#(B
categorize $B%a%=%C%I$O(B enumerable $B$NMWAG$r%+%F%4%jJ,3d$7$F(B
hash $B$r@8@.$9$k%a%=%C%I$G$9!#(B
$B$?$H$($P!“0J2<$N$h$&$JG[Ns$,$”$C$?$H$7$^$7$g$&!#(B
($B$h$j8=<BE*$J>u67$rA[Dj$9$k$J$i!"(BCSV $B%U%!%$%k$r(B CSV.read
$B$GFI$_9~$s$@7k2L$3$&$$$&G[Ns$,F@$i$l$?$H9M$($F$/$@$5$$(B)
ary = [
[“matz”, “Yukihiro M.”],
[“nobu”, “Nobuyoshi N.”],
[“akr”, “Tanaka A.”],
[“usa”, “Usaku NAKAMURA”],
[“naruse”, “NARUSE, Yui”],
[“ko1”, “SASADA Koichi”]
]
$B$3$&$$$&%G!<%?$+$i!“3FMWAG$NBh0lMWAG$+$iBhFsMWAG!”(B
$B$“$k$$$OBhFsMWAG$+$iBh0lMWAG$X$N%O%C%7%e$r:n$k$H$$$&$N$O$”$j$,$A$JOC$G$9!#(B
$B9,$$$K$7$FBh0lMWAG$+$iBhFsMWAG$X$N%O%C%7%e$O(B Hash[ary]
$B$G:n$l$k$h$&$K$J$C$?$N$G!“(B
$B5U$NBhFsMWAG$+$iBh0lMWAG$X$N%O%C%7%e$K$D$$$F9M$($^$7$g$&!#(B
($BF1@+F1L>$r9MN8$7$F!”%O%C%7%e$NCM$OBh0lMWAG$NG[Ns$H$7$^$7$g$&(B)
$B$3$l$rF@$k$K$O8=:_!“;DG0$J;v$K!”<+J,$G%k!<%W$r=q$/I,MW$,$"$j$^$9!#(B
h = {}
ary.each {|a, b|
(h[b] ||= []) << a
}
pp h
#=> {“Yukihiro M.”=>[“matz”],
“Nobuyoshi N.”=>[“nobu”],
“Tanaka A.”=>[“akr”],
“Usaku NAKAMURA”=>[“usa”],
“NARUSE, Yui”=>[“naruse”],
“SASADA Koichi”=>[“ko1”]}
Enumerable#categorize $B$O$3$l$r$R$H$D$N%a%=%C%I8F$S=P$7$G<B8=$7$^$9!#(B
h = ary.categorize(1, 0)
pp h
#=> {“Yukihiro M.”=>[“matz”],
“Nobuyoshi N.”=>[“nobu”],
“Tanaka A.”=>[“akr”],
“Usaku NAKAMURA”=>[“usa”],
“NARUSE, Yui”=>[“naruse”],
“SASADA Koichi”=>[“ko1”]}
$B0z?t$N(B 1, 0 $B$,2?$r0UL#$9$k$+$H$$$&$H!“(Benumerable $B$NMWAG$+$i(B
$B%O%C%7%e$N%-!<5Z$SCM$r<h$j=P$9;XDj$G$9!#(B
$B6qBNE*$K$O(B ary $B$N3FMWAG$KBP$7!”(B[] $B%a%=%C%I$r8F$S=P$7!“(B
$B$=$N0z?t$K0z$-EO$7$F%-!<(B/$BCM$rF@$^$9!#(B
$B$D$^$j!”(B[“matz”, “Yukihiro M.”][1] $B$H$7$F(B “Yukihiro
Matsumoto” $B$H$$$&%-!<$rF@$F!"(B
[“matz”, “Yukihiro M.”][0] $B$H$7$F(B “matz”
$B$H$$$&CM$rF@$k$o$1$G$9!#(B
Enumerable#categorize
$B$N4pK\E*$J;H$$J}$O$3$3$^$G$J$N$G$9$,!“$$$/$D$+DI2C5!G=$,$”$j$^$9!#(B
-
$B0z?t$K(B Proc $B%%V%8%'%/%H(B ($B@53N$K$O(B call
$B%a%=%C%I$r;}$C$?%%V%8%‘%/%H(B) $B$r;XDj$9$k$H!“(B
[] $B%a%=%C%I$G$J$/!”$=$N(B Proc
$B%*%V%8%’%/%H$G%-!<(B/$BCM$r<h$j=P$9(B$B$D$^$j!"(Bary.categorize(1, 0) $B$O(B
ary.categorize(lambda {|elt| elt[1] }, lambda {|elt| elt[0] })
$B$HF1$8$G$9!#(BHash[ary]
$B$N$h$&$J5!G=$,2?2s$b%j%/%(%9%H$5$l$J$,$iD9G/<B8=$5$l$J$+$C$?M}M3$O!“(B
$BMWAG$,D9$5(B2$B$NG[Ns$G$”$k$H$$$&2>Dj$rCV$$$?%a%=%C%I$KBP$9$km4m0$,$“$C$?$h$&$K;W$($k$N$G$9$,!”(B
$B$3$N(B Proc $B%*%V%8%'%/%H$K$h$k;XDj$r2DG=$K$9$k$3$H$K$h$j!"(B
Enumerable#categorize $B$OMWAG$KBP$9$k2>Dj$rI,?$G$O$J$/$7$F$$$^$9!#(B -
$B%M%9%H$7$?%O%C%7%e$N@8@.(B
[ruby-talk:372481] $B$d(B [ruby-talk:288931]
$B$J$I!“$?$^$K$”$k$N$G$9$,!“(B
$B%M%9%H$7$?%O%C%7%e$,I,MW$J;v$,$”$j$^$9!#(B
Enumerable#categorize
$B$N%-!<$r;XDj$9$k0z?t$O!“<B$O$U$?$D0J>e;XDj$G$-$F!”(B
$B$=$&$9$k$H%M%9%H$7$?%O%C%7%e$r@8@.$7$^$9!#(Bh = ary.categorize(lambda {|e| e[0][0] }, lambda {|e| e[0][1]}, 0)
pp h
#=> {“m”=>{“a”=>[“matz”]},
“n”=>{“o”=>[“nobu”], “a”=>[“naruse”]},
“a”=>{“k”=>[“akr”]},
“u”=>{“s”=>[“usa”]},
“k”=>{“o”=>[“ko1”]}} -
$B%O%C%7%e$NCM$N8e=hM}(B
$B%O%C%7%e$NCM$OG[Ns$G$9$,!“$=$NG[Ns$r%=!<%H$7$?$$$J$I$N8e=hM}$,$7$?$$$3$H$,$”$j$^$9!#(B
$B$=$3$G!“(BEnumerable#categorize $B$K%V%m%C%/$r$D$1$k$H!”(B
$B@8@.$7$?G[Ns$+$iB>$NCM$KJQ49$9$k$3$H$,$G$-$^$9!#(Bh = ary.categorize(lambda {|e| e[0][0] }, 1) {|ks, vs| vs.sort }
pp h’
{“m”=>[“Yukihiro M.”],
“n”=>[“NARUSE, Yui”, “Nobuyoshi N.”],
“a”=>[“Tanaka A.”],
“u”=>[“Usaku NAKAMURA”],
“k”=>[“SASADA Koichi”]}$B$^$?!“;H$&B&$H$7$F$OF1$8%-!<$KBP$7$F$O$R$H$D$NCM$7$+$J$$;v$rCN$C$F$$$F(B
$BG[Ns$K$7$?$/$J$$$H$+!”(B
$B$“$k$$$O9g7W!&:G>.CM!&:GBgCM!&J?6QCM$,M_$7$$$H$+!”$^$?C1$KMWAG?t$@$1M_$7$$$H$+!"(B
$B$=$&$$$&MQES$K$b;H$($^$9!#(B
($B$3$l$i$K$D$$$F$O%a%b%j8zN($NE@$+$i<!$K=R$Y$k(B :seed, :op, :update
$B$r;H$&J}$,E,@Z$G$9$,(B)h = ary.categorize(1, 0) {|ks, vs|
raise “duplicate keys: #{ks.inspcet}” if vs.length != 1
vs[0]
}
pp h
#=> {“Yukihiro M.”=>“matz”,
“Nobuyoshi N.”=>“nobu”,
“Tanaka A.”=>“akr”,
“Usaku NAKAMURA”=>“usa”,
“NARUSE, Yui”=>“naruse”,
“SASADA Koichi”=>“ko1”}$B$J$*!“%V%m%C%/0z?t$N(B ks $B$O@8@.$5$l$?CM$KBP1~$9$k%-!<$NG[Ns$G!”(B
$B>e5-$NNc$G$O!"$R$H$D$NCM$7$+$J$$$O$:$J$N$K$=$&$G$J$+$C$?$H$-$N(B
$BNc30%a%C%;!<%8$K;H$C$F$$$^$9!#(B -
:seed, :op, :update $B%*%W%7%g%s(B
$B%a%b%j>CHq$NET9g>e!“G[Ns$r@8@.$7$?$/$J$$!”$H$$$&>u67$b$“$j$^$9!#(B
$BNc$($P!”$$$/$D$“$k$N$+?t$($?$$$H$$$&$@$1$J$i!”(B
$BG[Ns$r@8@.$7$F$+$i(B length $B$r8F$S=P$9$N$O$+$J$jL5BL$G$9!#(B
$B$=$N$h$&$J>u67$KBP1~$9$k$?$a!"G[Ns$r@8@.$9$k$+$o$j$NA`:n$r;XDj$9$k$3$H$,$G$-$^$9!#(Bh = ary.categorize(lambda {|e| e[0][0] }, lambda {|e| 1 }, :op=>:+)
pp h
#=> {“m”=>1, “n”=>2, “a”=>1, “u”=>1, “k”=>1}:seed $B$H(B :op $B$O$@$$$?$$(B inject $B$N$h$&$JF0:n$K$J$j$^$9!#(B
:update $B$O(B :op
$B$H$[$\F1$8$G$9$,!"0z?t$,$R$H$DB?$/$F!"CM$KBP1~$9$k%-!<$,M?$($i$l$^$9!#(B
(:seed $B$,M?$($i$l$J$$;~$K$O!"3F%+%F%4%j$N:G=i$NCM$,(B seed
$B07$$$K$J$j$^$9(B):op, :update $B$K;XDj$7$?$b$N$O(B to_proc $B$GJQ49$5$l$k$N$G!“(B
:op => :+ $B$H$$$&$N$O!”(Blambda {|x,y| x + y } $B$N0UL#$G$9!#(B -
$BJ#?t$NCM$r<h$j=P$9(B
enum $B$NMWAG$,G[Ns$d%O%C%7%e$GB?$/$NMWAG$r$b$D>l9g!“(B
$B$R$H$D$G$J$/!”$$$/$D$+$NMWAG$r<h$j=P$9$3$H$,$"$j$^$9!#(B
$B$=$N$?$a$K!“G[Ns$r;XDj$7$?>l9g$K$O:F5"E*$KMWAG$N<h$j=P$7$r9T$C$F!”(B
$B<h$j=P$7$?CM$rG[Ns$K$^$H$a$^$9!#(B$BA0=R$N(B ary $B$O(B
2$BMWAG$7$+$J$/!“$”$^$j$3$l$rE,MQ$9$k0UL#$,$J$$$N$G!“(B
http://coderepos.org/share/export/38695/lang/ruby/ruby-committers/ruby-committers.yml
$B$r;H$&$H!”$3$l$O%O%C%7%e$rMWAG$H$9$k$NG[Ns$J$N$G!"(Bcommitters = open(“ruby-committers.yml”) {|f| YAML.load(f) }
pp committers.categorize(“account”, [“name”, “nick”]) {|ks, vs| vs[0]
}
#=> {“matz”=>[[“$B>>K\9T90(B”, “$B$^$D$b$H$f$-$R$m(B”, “Yukihiro
Matsumoto”], [“Matz”]],
“H_Konishi”=>[[“$B>.@>90>-(B”, “KONISHI Hiromasa”], nil],
“aamine”=>[[“$B@DLZJvO:(B”, “Minero A.”], [“$B@DLZ$5$s(B”]],
…$B$J$I$H$G$-!"(Bname $B$H(B nick $B$rN>J}<h$j=P$9$3$H$,$G$-$^$9!#(B
(categorize $B$N0z?t$,@0?t$8$c$J$/$FJ8;zNs$K$J$C$F$$$k$N$O(B ary
$B$NMWAG$,J8;zNs$r%-!<$H$9$k%O%C%7%e$@$+$i$G$9(B)
$B$I$&$G$7$g$&$+!#(B
$BG[Ns$+$i(B hash $B$r@8@.$9$k%a%=%C%I$O8=>u(B enum.group_by $B$d(B
Hash[assoc] $B$,$“$j$^$9$,!”(B
categorize
$B$NA@$$=j$H$7$F$O!“$=$l$i$h$j$bDc%l%Y%k$G9-$$1~MQ$r;}$D$,!”$$$/$i$+5-=R$,D9$/!“(B
$B$G$b%k!<%W$G5-=R$9$k$h$j$O9b%l%Y%k$G$+$J$jC;$$!”$H$$$&$"$?$j$G$9!#(B
$B$?$H$($P!“(B (1…6).group_by {|i| i % 3 } $B$O(B
(1…6).categorize(lambda {|e| e % 3}, lambda {|e| e}) $B$H<B8=$G$-!”(B
Hash[ [ [“a”, 100], [“b”, 200] ] ] $B$O(B
[ [“a”, 100], [“b”, 200] ].categorize(0, 1, :op=>lambda {|x,y| y })
$B$H<B8=$G$-$^$9!#(B
($B$"$k$$$OG[Ns$,$G$-$A$c$&$1$I(B [ [“a”, 100], [“b”, 200]
].categorize(0, 1) {|ks, vs|
vs.last } $B$H$+(B)
$B;29M(B:
- RDB $B$N(B hash-join $B%"%k%4%j%:%`(B
- SQL $B$N=8Ls4X?t(B
- MapReduce
Ruby $B$K$h$k;n83E*$J<BAu(B:
module Enumerable
:call-seq:
enum.categorize(ksel1, ksel2, …, vsel, [opts])
enum.categorize(ksel1, ksel2, …, vsel, [opts]) {|ks, vs| … }
categorizes the elements in enum and returns a hash.
This method assumes multiple elements for a category.
+categorize+ takes one or more key selectors,
one value selector and
an optional option hash.
It also takes an optional block.
The selectors specify how to extract a value from an element in
enum.
The key selectors, kselN, are used to extract hash keys from an
element.
If two or more key selectors are specified, the result hash will be
nested.
The value selector, vsel, is used for the values of innermost
hashes.
By default, all values extracted by vsel from the elements which
key selectors extracts same value are composed as an array.
The array is set to the values of the innermost hashes.
This behavior can be customized by the options: :seed, :op and
:update.
a = [{:fruit => “banana”, :color => “yellow”, :taste => “sweet”,
:price => 100},
{:fruit => “melon”, :color => “green”, :taste => “sweet”,
:price => 300},
{:fruit => “grapefruit”, :color => “yellow”, :taste =>
“tart”, :price => 200}]
p a.categorize(:color, :fruit)
#=> {“yellow”=>[“banana”, “grapefruit”], “green”=>[“melon”]}
p a.categorize(:taste, :fruit)
#=> {“sweet”=>[“banana”, “melon”], “tart”=>[“grapefruit”]}
p a.categorize(:taste, :color, :fruit)
#=> {“sweet”=>{“yellow”=>[“banana”], “green”=>[“melon”]},
“tart”=>{“yellow”=>[“grapefruit”]}}
p a.categorize(:taste, :color)
#=> {“sweet”=>[“yellow”, “green”], “tart”=>[“yellow”]}
In the above example, :fruit, :color and :taste is specified as
selectors.
There are several types of selectors as follows:
- object with +call+ method (procedure, etc.): extracts a value
from the element by calling the procedure with the element as an
argument.
- array of selectors: make an array which contains the values
extracted by the selectors.
- other object: extracts a value from the element using +[]+
method as +element[selector]+.
So the selector :fruit extracts the value from the element
{:fruit => “banana”, :color => “yellow”, :taste => “sweet”, :price
=> 100}
as {…}[:fruit].
p a.categorize(lambda {|elt| elt[:fruit][4] }, :fruit)
#=> {“n”=>[“banana”, “melon”], “e”=>[“grapefruit”]}
When the key selectors returns same key for two or or more elements,
corresponding values extracted by the value selector are combined.
By default, all values are collected as an array.
:seed, :op and :update option in the option hash customizes this
behavior.
:seed option and :op option is similar to Enumerable#inject.
:seed option specifies an initial value.
(If :seed option is not given, the first value for each category
is treated as an initial value.)
:op option specifies a procedure to combine a seed and an element
into a next seed.
:update option is same as :op option except it takes three
arguments instead of two:
keys, seed and element.
+to_proc+ method is used to convert :op and :update option to a
procedure.
So a symbol can be used for them.
# count categorized elements.
p a.categorize(:color, lambda {|e| 1 }, :op=>:+)
#=> {“yellow”=>2, “green”=>1}
p a.categorize(:color, :fruit, :seed=>“”, :op=>:+)
#=> {“yellow”=>“bananagrapefruit”, “green”=>“melon”}
The default behavior, collecting all values as an array, is
implemented as follows.
:seed => nil
:update => {|ks, s, v| !s ? [v] : (s << v) }
:op and :update option are disjoint.
ArgumentError is raised if both are specified.
The block for +categorize+ method converts combined values to
final innermost hash values.
p a.categorize(:color, :fruit) {|ks, vs| vs.join(“,”) }
#=> {“yellow”=>“banana,grapefruit”, “green”=>“melon”}
# calculates the average price for fruits of each color.
p a.categorize(:color, :price) {|ks, vs| vs.inject(0.0, &:+) /
vs.length }
#=> {“yellow”=>150.0, “green”=>300.0}
def categorize(*args, &reduce_proc)
opts = args.last.kind_of?(Hash) ? args.pop : {}
if args.length < 2
raise ArgumentError, “needs 2 or more arguments without option
hash (but #{args.length})”
end
value_selector = cat_selector_proc(args.pop)
key_selectors = args.map {|a| cat_selector_proc(a) }
has_seed = opts.include? :seed
seed_value = opts[:seed]
if opts.include?(:update) && opts.include?(:op)
raise ArgumentError, “both :op and :update option specified”
elsif opts.include? :update
update_proc = opts[:update].to_proc
elsif opts.include? :op
op_proc = opts[:op].to_proc
update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
else
has_seed = true
seed_value = nil
update_proc = lambda {|ks, s, v| !s ? [v] : (s << v) }
end
result = {}
each {|*elts|
elt = elts.length <= 1 ? elts[0] : elts
ks = key_selectors.map {|ksel| ksel.call(elt) }
v = value_selector.call(elt)
h = result
0.upto(ks.length-2) {|i|
k = ks[i]
h[k] = {} if !h.include?(k)
h = h[k]
}
lastk = ks.last
if !h.include?(lastk)
if has_seed
h[lastk] = update_proc.call(ks, seed_value, v)
else
h[lastk] = v
end
else
h[lastk] = update_proc.call(ks, h[lastk], v)
end
}
if reduce_proc
cat_reduce(result, [], key_selectors.length-1, reduce_proc)
end
result
end
def cat_selector_proc(selector)
if selector.respond_to?(:call)
selector
elsif selector.respond_to? :to_ary
selector_procs = selector.to_ary.map {|sel| cat_selector_proc(sel)
}
lambda {|elt| selector_procs.map {|selproc| selproc.call(elt) } }
else
lambda {|elt| elt[selector] }
end
end
private :cat_selector_proc
def cat_reduce(hash, ks, nestlevel, reduce_proc)
if nestlevel.zero?
hash.each {|k, v|
ks << k
begin
hash[k] = reduce_proc.call(ks.dup, v)
ensure
ks.pop
end
}
else
hash.each {|k, h|
ks << k
begin
cat_reduce(h, ks, nestlevel-1, reduce_proc)
ensure
ks.pop
end
}
end
end
private :cat_reduce
end