Simon Danisch / Feb 01 2019

Language Benchmarks

using CSV, DataFrames, Query, Statistics
file = download("https://salsa.debian.org/benchmarksgame-team/benchmarksgame/raw/master/public/data/alldata.csv")
data = CSV.read(file, allowmissing = :none);
max_n = Dict(eachrow(by(data, :name) do g
  maximum(g.n)
end))
by(data, [:name]) do g
  (name = key(g), n = maximum(g.n), lang = g.lang[1])
end
relevant_data = DataFrame(
	lang = data.lang, cpu = data[Symbol("elapsed(s)")], 
  name = data.name, n = data.n
)
filter!(relevant_data) do x
    x.lang in ("python3", "gcc", "java") && x.n == max_n[x.name]
end
filter(x-> x.lang == "python3" && x.name == "fannkuchredux", relevant_data)
fastest = @from i in relevant_data begin
  @group i by i.name into g
  @select begin
    val, idx = findmin(g.cpu)
    row = g[idx]
    (name = row.name, lang = row.lang, cpu = val)
  end
  @collect DataFrame
end
gcc_fastest = Dict(zip(fastest.name, fastest.cpu))
slowdown = @from i in relevant_data begin
  @group i by (i.name, i.lang) into g
  @select begin
    k = key(g)
    (name = k, speed = minimum(g.cpu) / gcc_fastest[k[1]])
  end
  @collect
end;
round(mean(getfield.(filter(x-> x.name[2] == "python3", slowdown), :speed)))
round(maximum(getfield.(filter(x-> x.name[2] == "python3", slowdown), :speed)))
round(mean(getfield.(filter(x-> x.name[2] == "java", slowdown), :speed)))
round(maximum(getfield.(filter(x-> x.name[2] == "java", slowdown), :speed)))