diff --git a/.gitignore b/.gitignore index e60c5ea..34a7cf6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ result.bin fasta.txt +build diff --git a/build_julia.jl b/build_julia.jl new file mode 100644 index 0000000..68d9ef3 --- /dev/null +++ b/build_julia.jl @@ -0,0 +1,118 @@ +""" +Usage: build_julia relative/path/to/file.jl +Result: compiles an executable under the name build/file +""" + +using Libdl +global gcc = "gcc" +gccworks = try + success(`$gcc -v`) +catch + error("GCC wasn't found. Please make sure that gcc is on the path and run again") +end +using Libdl +threadingOn() = ccall(:jl_threading_enabled, Cint, ()) != 0 + +shell_escape(str) = "'$(replace(str, "'" => "'\''"))'" +libDir() = dirname(abspath(Libdl.dlpath("libjulia"))) +private_libDir() = abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR) + +includeDir() = abspath(Sys.BINDIR, Base.INCLUDEDIR, "julia") + +function ldflags() + fl = "-L$(shell_escape(libDir()))" + if Sys.iswindows() + fl = fl * " -Wl,--stack,8388608" + elseif Sys.islinux() + fl = fl * " -Wl,--export-dynamic" + end + return fl +end + +function ldlibs() + libname = "julia" + if Sys.isunix() + return "-Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname" + else + return "-l$libname -lopenlibm" + end +end + +function cflags() + return sprint() do io + print(io, "-std=gnu99") + include = shell_escape(includeDir()) + print(io, " -I", include) + print(io, " -DJULIA_ENABLE_THREADING=1") + if Sys.isunix() + print(io, " -fPIC") + end + end +end + +function allflags() + return "$(cflags()) $(ldflags()) $(ldlibs())" +end + +function julia_flags(optimize, debug, cc_flags) + flags = Base.shell_split(allflags()) + bitness_flag = Sys.ARCH == :aarch64 ? `` : Int == Int32 ? "-m32" : "-m64" + flags = `$flags $bitness_flag` + optimize == nothing || (flags = `$flags -O$optimize`) + debug == 2 && (flags = `$flags -g`) + cc_flags == nothing || isempty(cc_flags) || (flags = `$flags $cc_flags`) + flags +end +path = ARGS[1] + +command = """ +Base.reinit_stdio() +_bindir = ccall(:jl_get_julia_bindir, Any, ())::String +@eval(Sys, BINDIR = \$(_bindir)) +@eval(Sys, STDLIB = joinpath(\$_bindir, "..", "share", "julia", "stdlib", string('v', (VERSION.major), '.', VERSION.minor))) +Base.init_load_path() +Base.init_depot_path() +using REPL +Base.REPL_MODULE_REF[] = REPL +include("$path") +""" +function default_sysimg_path(debug = false) + ext = "sys" + if Sys.isunix() + dirname(Libdl.dlpath(ext)) + else + normpath(Sys.BINDIR, "..", "lib", "julia") + end +end +function build_shared(s_file, o_file) + # Prevent compiler from stripping all symbols from the shared lib. + if Sys.isapple() + o_file = `-Wl,-all_load $o_file` + else + o_file = `-Wl,--whole-archive $o_file -Wl,--no-whole-archive` + end + command = `gcc -shared -DJULIAC_PROGRAM_LIBNAME=\"$s_file\" -o $s_file $o_file $(julia_flags("3", false, ""))` + if Sys.isapple() + command = `$command -Wl,-install_name,@rpath/$s_file` + elseif Sys.iswindows() + command = `$command -Wl,--export-all-symbols` + end + run(command) +end +function build_exec(e_file, cprog, s_file) + command = `gcc -DJULIAC_PROGRAM_LIBNAME=\"$s_file\" -o $e_file $cprog $s_file $(julia_flags("3", false, ""))` + if Sys.isapple() + command = `$command -Wl,-rpath,@executable_path` + else + command = `$command -Wl,-rpath,\$ORIGIN` + end + run(command) +end +sys_so = joinpath(default_sysimg_path(false), "sys.so") +isdir("build") || mkdir("build") +syso = abspath(joinpath("build", "sys.so")) +sysa = abspath(joinpath("build", "sys.a")) +run(`julia -C native --output-o $sysa -J $sys_so -O3 -e "$command"`) +build_shared(syso, sysa) +exe_name = splitext(basename(path))[1] +build_exec(joinpath("build", exe_name), "program.c", syso) diff --git a/fannkuchredux/fannkuchredux-compile.jl b/fannkuchredux/fannkuchredux-compile.jl new file mode 100644 index 0000000..8e91fdc --- /dev/null +++ b/fannkuchredux/fannkuchredux-compile.jl @@ -0,0 +1,182 @@ +# The Computer Language Benchmarks Game +# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ + +# based on Oleg Mazurov's Java Implementation and Jeremy Zerfas' C implementation +# transliterated by Hamza Yusuf Çakır + +global const preferred_num_blocks = 24 + +struct Fannkuch + n::Int64 + blocksz::Int64 + maxflips::Vector{Int32} + chksums::Vector{Int32} + + function Fannkuch(n, nthreads) + nfact = factorial(n) + + blocksz = nfact ÷ (nfact < preferred_num_blocks ? 1 : preferred_num_blocks) + maxflips = zeros(Int32, nthreads) + chksums = zeros(Int32, nthreads) + + new(n, blocksz, maxflips, chksums) + end +end + +struct Perm + p::Vector{Int8} + pp::Vector{Int8} + count::Vector{Int32} + + function Perm(n) + p = zeros(Int8, n) + pp = zeros(Int8, n) + count = zeros(Int32, n) + + new(p, pp, count) + end +end + +Base.@propagate_inbounds @inline function first_permutation(perm::Perm, idx) + p = perm.p + pp = perm.pp + + for i = 2:length(p) + p[i] = i - 1 + end + + for i = length(p):-1:2 + ifact = factorial(i-1) + d = idx ÷ ifact + perm.count[i] = d + idx = idx % ifact + + for j = 1:i + pp[j] = p[j] + end + + for j = 1:i + p[j] = j+d <= i ? pp[j+d] : pp[j+d-i] + end + end +end + +Base.@propagate_inbounds @inline function next_permutation(perm::Perm) + p = perm.p + count = perm.count + + first = p[2] + p[2] = p[1] + p[1] = first + + i = 2 + while count[i] >= i - 1 + count[i] = 0 + + next = p[1] = p[2] + + for j = 1:i + p[j] = p[j+1] + end + + i += 1 + p[i] = first + first = next + end + count[i] += 1 + nothing +end + +Base.@propagate_inbounds @inline function count_flips(perm::Perm) + p = perm.p + pp = perm.pp + + flips = Int32(1) + + first = p[1] + 1 + + if p[first] != 0 + + unsafe_copyto!(pp, 2, p, 2, length(p) - 1) + + while true + flips += one(flips) + new_first = pp[first] + pp[first] = first - 1 + + if first > 3 + lo = 2; hi = first - 1 + # see the note in Jeremy Zerfas' C implementation for + # this loop + for k = 0:13 + t = pp[lo] + pp[lo] = pp[hi] + pp[hi] = t + (hi < lo + 3) && break + lo += 1 + hi -= 1 + end + end + + first = new_first + 1 + pp[first] == 0 && break + end + end + + return flips +end + +Base.@propagate_inbounds function run_task(f::Fannkuch, perm::Perm, idxmin, idxmax) + maxflips = Int32(0) + chksum = Int32(0) + + i = idxmin + while true + if perm.p[1] != 0 + flips = count_flips(perm) + maxflips = max(maxflips, flips) + chksum += iseven(i) ? flips : -flips + end + i != idxmax || break + i += 1 + next_permutation(perm) + end + + id = Threads.threadid() + f.maxflips[id] = max(f.maxflips[id], maxflips) + f.chksums[id] += chksum + nothing +end + +function runf(f::Fannkuch) + factn = factorial(f.n) + + Threads.@threads for idxmin = 0:f.blocksz:factn-1 + perm = Perm(f.n) + @inbounds first_permutation(perm, idxmin) + idxmax = idxmin + f.blocksz - 1 + @inbounds run_task(f, perm, idxmin, idxmax) + end +end + +function fannkuchredux(n) + f = Fannkuch(n, Threads.nthreads()) + + runf(f) + + # reduce results + chk = sum(f.chksums) + res = maximum(f.maxflips) + + println(chk, "\nPfannkuchen(", n, ") = ", res) +end + +Base.@ccallable function julia_main(args::Vector{String})::Cint + n = parse(Int, args[1]) + fannkuchredux(n) + return 0 +end + +# call once to make sure everything gets compiled! +# this is in global scope, so won't be called when calling the executable +julia_main(["7"]) diff --git a/nbody/nbody-compile.jl b/nbody/nbody-compile.jl new file mode 100644 index 0000000..16625a0 --- /dev/null +++ b/nbody/nbody-compile.jl @@ -0,0 +1,135 @@ +# The Computer Language Benchmarks Game +# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ + +# based on the Java version + +using Printf +using LinearAlgebra + +# Constants +const solar_mass = 4π^2 +const days_per_year = 365.24 + +# Use a 4-tuple here to get better SIMD instructions. +# This is a space-time tradeoff, but this benchmark is well within L1 cache limits. +struct Vec3 + x::NTuple{4, Float64} +end +Vec3(x, y, z) = Vec3((x,y,z,0.0)) +Base.:/(v::Vec3, n::Number) = Vec3(1/n .* v.x) +Base.:*(v::Vec3, n::Number) = Vec3(n .* v.x) +Base.:-(v1::Vec3, v2::Vec3) = Vec3(v1.x .- v2.x) +Base.:+(v1::Vec3, v2::Vec3) = Vec3(v1.x .+ v2.x) +# Todo, prettify +squarednorm(v1::Vec3) = v1.x[1]^2 + v1.x[2]^2 + v1.x[3]^2 +Base.muladd(x::Vec3, y::Number, z::Vec3) = Vec3(muladd.(x.x, y, z.x)) + +# A heavenly body in the system +mutable struct Body + pos::Vec3 + v::Vec3 + mass::Float64 +end + +function offset_momentum!(b::Body, p::Vec3) + b.v -= p / solar_mass +end + +function init_sun!(bodies::Vector{Body}) + p = Vec3(0.0, 0.0, 0.0) + for b in bodies + p += b.v * b.mass + end + offset_momentum!(bodies[1], p) +end + +function advance(bodies::Vector{Body}, dt::Number) + @inbounds for i = 1:length(bodies) + bi = bodies[i] + for j = i+1:length(bodies) + bj = bodies[j] + delta = bi.pos - bj.pos + dsq = squarednorm(delta) + distance = sqrt(dsq) + mag = dt / (dsq * distance) + bi.v = muladd(delta, -(bj.mass * mag), bi.v) + bj.v = muladd(delta, (bi.mass * mag), bj.v) + end + end + + for b in bodies + b.pos = muladd(b.v, dt, b.pos) + end +end + +function energy(bodies::Vector{Body}) + e = 0.0 + @inbounds for i = 1:length(bodies) + bi = bodies[i] + e += 0.5 * bi.mass * squarednorm(bi.v) + for j = i+1:length(bodies) + bj = bodies[j] + delta = bi.pos - bj.pos + distance = sqrt(squarednorm(delta)) + dinv = 1.0 / distance + e = muladd((bi.mass * bj.mass), -dinv, e) + end + end + return e +end + + +function perf_nbody(N::Int=1000) + jupiter = Body( Vec3(4.84143144246472090e+00, # pos[1] = x + -1.16032004402742839e+00, # pos[2] = y + -1.03622044471123109e-01), # pos[3] = z + Vec3(1.66007664274403694e-03 * days_per_year, # v[1] = vx + 7.69901118419740425e-03 * days_per_year, # v[2] = vy + -6.90460016972063023e-05 * days_per_year), # v[3] = vz + 9.54791938424326609e-04 * solar_mass) # mass + + saturn = Body(Vec3(8.34336671824457987e+00, + 4.12479856412430479e+00, + -4.03523417114321381e-01), + Vec3(-2.76742510726862411e-03 * days_per_year, + 4.99852801234917238e-03 * days_per_year, + 2.30417297573763929e-05 * days_per_year), + 2.85885980666130812e-04 * solar_mass) + + uranus = Body(Vec3(1.28943695621391310e+01, + -1.51111514016986312e+01, + -2.23307578892655734e-01), + Vec3(2.96460137564761618e-03 * days_per_year, + 2.37847173959480950e-03 * days_per_year, + -2.96589568540237556e-05 * days_per_year), + 4.36624404335156298e-05 * solar_mass) + + neptune = Body(Vec3(1.53796971148509165e+01, + -2.59193146099879641e+01, + 1.79258772950371181e-01), + Vec3(2.68067772490389322e-03 * days_per_year, + 1.62824170038242295e-03 * days_per_year, + -9.51592254519715870e-05 * days_per_year), + 5.15138902046611451e-05 * solar_mass) + + sun = Body(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 0.0), solar_mass) + + bodies = [sun, jupiter, saturn, uranus, neptune] + + init_sun!(bodies) + @printf("%.9f\n", energy(bodies)) + for i = 1:N + advance(bodies, 0.01) + end + @printf("%.9f\n", energy(bodies)) +end + +Base.@ccallable function julia_main(args::Vector{String})::Cint + n = parse(Int, args[1]) + perf_nbody(n) + return 0 +end + +# call once to make sure everything gets compiled! +# this is in global scope, so won't be called when calling the executable +julia_main(["1000"]) diff --git a/program.c b/program.c new file mode 100644 index 0000000..67a6d0a --- /dev/null +++ b/program.c @@ -0,0 +1,54 @@ +// This file is a part of Julia. License is MIT: http://julialang.org/license + +// Standard headers +#include +#include + +// Julia headers (for initialization and gc commands) +#include "uv.h" +#include "julia.h" + +#ifdef JULIA_DEFINE_FAST_TLS // only available in Julia v0.7 and above +JULIA_DEFINE_FAST_TLS() +#endif + +// Declare C prototype of a function defined in Julia +extern int julia_main(jl_array_t*); + +// main function (windows UTF16 -> UTF8 argument conversion code copied from julia's ui/repl.c) +int main(int argc, char *argv[]) +{ + int retcode; + int i; + uv_setup_args(argc, argv); // no-op on Windows + + // initialization + libsupport_init(); + + // jl_options.compile_enabled = JL_OPTIONS_COMPILE_OFF; + // JULIAC_PROGRAM_LIBNAME defined on command-line for compilation + jl_options.image_file = JULIAC_PROGRAM_LIBNAME; + julia_init(JL_IMAGE_JULIA_HOME); + + // Initialize Core.ARGS with the full argv. + jl_set_ARGS(argc, argv); + + // Set PROGRAM_FILE to argv[0]. + jl_set_global(jl_base_module, + jl_symbol("PROGRAM_FILE"), (jl_value_t*)jl_cstr_to_string(argv[0])); + + // Set Base.ARGS to `String[ unsafe_string(argv[i]) for i = 1:argc ]` + jl_array_t *ARGS = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("ARGS")); + jl_array_grow_end(ARGS, argc - 1); + for (i = 1; i < argc; i++) { + jl_value_t *s = (jl_value_t*)jl_cstr_to_string(argv[i]); + jl_arrayset(ARGS, s, i - 1); + } + + // call the work function, and get back a value + retcode = julia_main(ARGS); + + // Cleanup and gracefully exit + jl_atexit_hook(retcode); + return retcode; +}