Sparse Hashに興味があったので、Rubyバインディングを作ってみた。
http://storehouse.sakura.ne.jp/viewvc/viewvc.cgi/ruby-sparsehash/?root=svn
#include "google/sparse_hash_map" #include "google/dense_hash_map" #include "sparsehash_internal.h" namespace { template <class T> struct Sparsehash { typename T *m; static void rb_sparsehash_mark(Sparsehash<T> *p) { if (!p->m) { return; } T *m = p->m; for(T::iterator i = m->begin(); i != m->end(); i++) { rb_gc_mark(i->first); rb_gc_mark(i->second); } } static void rb_sparsehash_free(Sparsehash<T> *p) { if (p->m) { delete p->m; } delete p; } static VALUE rb_sparsehash_alloc(VALUE klass) { Sparsehash<T> *p; p = new Sparsehash<T>; p->m = NULL; return Data_Wrap_Struct(klass, &rb_sparsehash_mark, &rb_sparsehash_free, p); } static VALUE rb_sparsehash_get(VALUE self, VALUE key) { Sparsehash<T> *p; Data_Get_Struct(self, Sparsehash<T>, p); T &m = *(p->m); VALUE value = m[key]; return(value ? value : Qnil); } static VALUE rb_sparsehash_set(VALUE self, VALUE key, VALUE value) { Sparsehash<T> *p; Data_Get_Struct(self, Sparsehash<T>, p); T &m = *(p->m); m[key] = value; return value; } static VALUE rb_sparsehash_each0(VALUE args) { return rb_yield(args); } static VALUE rb_sparsehash_each(VALUE self) { Sparsehash<T> *p; rb_need_block(); Data_Get_Struct(self, Sparsehash<T>, p); T &m = *(p->m); int status = 0; for(T::iterator i = m.begin(); i != m.end(); i++) { VALUE args = rb_ary_new3(2, i->first, i->second); rb_protect(rb_sparsehash_each0, args, &status); if (status != 0) { break; } } if (status != 0) { rb_jump_tag(status); } return self; } static VALUE rb_sparsehash_erase(VALUE self, VALUE key) { Sparsehash<T> *p; Data_Get_Struct(self, Sparsehash<T>, p); T &m = *(p->m); VALUE value = m[key]; m.erase(key); return(value ? value : Qnil); } static void init(VALUE &rb_mSparsehash, const char *name) { VALUE rb_cSparseHashMap = rb_define_class_under(rb_mSparsehash, name, rb_cObject); rb_define_alloc_func(rb_cSparseHashMap, &rb_sparsehash_alloc); rb_define_private_method(rb_cSparseHashMap, "initialize", __F(rb_sparsehash_initialize<T>), 0); rb_define_method(rb_cSparseHashMap, "[]", __F(&rb_sparsehash_get), 1); rb_define_method(rb_cSparseHashMap, "[]=", __F(&rb_sparsehash_set), 2); rb_define_method(rb_cSparseHashMap, "each", __F(&rb_sparsehash_each), 0); rb_define_method(rb_cSparseHashMap, "erase", __F(&rb_sparsehash_erase), 1); rb_define_method(rb_cSparseHashMap, "delete", __F(rb_sparsehash_erase), 1); } }; struct rb_sparsehash_equal_key { bool operator() (const VALUE &x, const VALUE &y) const { return (rb_funcall(x, rb_intern("=="), 1, y) == Qtrue); } }; struct rb_sparsehash_hash { size_t operator() (const VALUE &x) const { VALUE hash = rb_funcall(x, rb_intern("hash"), 0); return NUM2INT(hash); } }; typedef google::sparse_hash_map<VALUE, VALUE, rb_sparsehash_hash, rb_sparsehash_equal_key> SHMap; typedef google::dense_hash_map<VALUE, VALUE, rb_sparsehash_hash, rb_sparsehash_equal_key> DHMap; template <class T> VALUE rb_sparsehash_initialize(VALUE self) { Sparsehash<T> *p; Data_Get_Struct(self, Sparsehash<T>, p); p->m = new T; return Qnil; } template <> VALUE rb_sparsehash_initialize<DHMap>(VALUE self) { Sparsehash<DHMap> *p; Data_Get_Struct(self, Sparsehash<DHMap>, p); p->m = new DHMap; p->m->set_empty_key(Qnil); return Qnil; } } // namespace void Init_sparsehash() { VALUE rb_mSparsehash = rb_define_module("Sparsehash"); Sparsehash<SHMap>::init(rb_mSparsehash, "SparseHashMap"); Sparsehash<DHMap>::init(rb_mSparsehash, "DenseHashMap"); }
せっかくなのでテンプレートを活用。結構、コードを圧縮できるなぁ…でも、staticばっかの書き方はどうなんだろう?
で、以下のテストコードで計測。
require 'sparsehash' require 'timeout' include Sparsehash def memoryusage() tasks = `tasklist` lines = tasks.split("\n") lines.each do |line| row = line.split(/\s+/) if row[1].to_i == $$ return row[3].gsub(',', '').to_i / 1024.0 end end return -1; end #def memoryusage() # status = `cat /proc/#{$$}/status` # lines = status.split("\n") # lines.each do |line| # if line =~ /^VmRSS:/ # line.gsub!(/.*:\s*(\d+).*/, '\1') # return line.to_i / 1024.0 # end # end # return -1; #end [100, 10000, 1000000].each do |rnum| [Hash, SparseHashMap, DenseHashMap].each do |clazz| puts("#{clazz}: #{rnum}") time = Time.now size = memoryusage hash = clazz.new begin timeout(120) do (0...rnum).each do |i| buf = sprintf("%08d", i) hash[buf] = buf end time = Time.now - time GC.start size = memoryusage - size printf("Time: %.3f sec.\n", time) printf("Usage: %.3f MB\n\n", size) end rescue Timeout::Error printf("-----\n\n") end end end
結果はこんな感じ。
rnum = | 100 | 10000 | 1000000 |
---|---|---|---|
Hash | 0.297 sec. / 0.145 MB | 0.188 sec. / 2.543 MB | 13.203 sec. / 245.555 MB |
SparseHashMap | 0.156 sec. / 0.039 MB | 3.406 sec. / 0.027 MB | 計測不能*1 |
DenseHashMap | 0.141 sec. / 0.055 MB | 0.656 sec. / 0.113 MB | 69.047 sec. / 125.324 MB |
使いどころは微妙かも。ハッシュ関数を変えたらもう少しパフォーマンスが改善するかなぁ。
*1:時間かかりすぎ…