until nil {

YAML::dump != Marshal.dump

04 Jan 2009

People! Using Yaml for serialising and quick and dirty DB projects is fun... but not optimal.

If you can serialise with Marshal.dump instead of YAML::dump, then go for it!

A > 30x dump and > 2x load, is not what I would call a marginal difference.

See for yourself by running this test script: (git://gist.github.com/43177.git )

#!/usr/bin/env ruby

require 'yaml'

marshal_file = 'marshal_dump.marshal'
yaml_file = 'yaml_dump.yml'

def line; puts "\n"; end; line

# building a immense array of hashes
print "building the bully data object..."; $stdout.flush
bully = Array.new
indexes = 5000; keys = 100; rand_value = keys * 100; entry_count = 0
indexes.times do |index|
  bully[index] = Hash.new
  keys.times do
    bully[index][rand(rand_value)] = rand(rand_value)
  end
  entry_count += bully[index].size
  print '.' if index % 10 == 0; $stdout.flush
end
line
puts "bully data structure containing #{entry_count} entry built"
line

# marshall test
puts "Testing Marshal dump..."
start = Time.now

f = File.new(marshal_file, 'w')
f.puts( Marshal.dump(bully) )
f.close

marshal_dump = Time.now - start
puts "marshall dump time: #{marshal_dump} sec"; line

puts "Testing Marshal load..."
start = Time.now

bully_marshaled = Marshal.load(File.open(marshal_file))

marshal_load = Time.now - start
puts "marshall load time: #{marshal_load} sec"
marshal_size = File.size(marshal_file)
puts "for #{marshal_size} bytes"; line

if bully.first == bully_marshaled.first &&
  bully.last == bully_marshaled.last &&
  bully.length == bully_marshaled.length then
  puts "and both bully match!!!"
else
  puts "and they don't match!!!"
end
line

# yaml test
puts "Testing YAML dump..."
start = Time.now

f = File.new(yaml_file, 'w')
f.puts( YAML::dump(bully) )
f.close

yaml_dump = Time.now - start
puts "yaml dump time: #{yaml_dump} sec"; line


puts "Testing YAML load..."
start = Time.now

bully_yaml = YAML::load(File.open(yaml_file))

yaml_load = Time.now - start
puts "yaml load time: #{yaml_load} sec"
yaml_size = File.size(yaml_file)
puts "for #{yaml_size} bytes"; line

if bully.first == bully_yaml.first &&
  bully.last == bully_yaml.last &&
  bully.length == bully_yaml.length then
  puts "and both bully match!!!"
else
  puts "and they don't match!!!"
end
line


# Final Stats:
dump_stat = marshal_dump / yaml_dump
load_stat = marshal_load / yaml_load
size_stat = marshal_size.to_f / yaml_size

printf "STATS: Marshal dump finishes in %.4f of YAML time\n", dump_stat
printf "STATS: Marshal load finishes in %.4f of YAML time\n", load_stat
printf "STATS: Marshal file is %.4f of YAML file size\n", size_stat
line
blog comments powered by Disqus

}

Older Posts... Blog powered by Jekyll.
Built using Liquid, RedCloth, Pygments and Blueprint.

Copyright © 2008-2010 Louis-Philippe Perron