diff --git a/scripts/run.jl b/scripts/run.jl
index 7b6c767d6e3b5b3f56f1de4465f69ecd8f71b4bb..c45cd5c7a721610c2faa5adf7472dbef438233ec 100644
--- a/scripts/run.jl
+++ b/scripts/run.jl
@@ -14,4 +14,6 @@ ctx(experiment_surrogate_outerinner_ref, "surrogate/outerinner_ref")
 
 ctx(experiment_global_basic, "global/basic")
 
+# note: run these with multiple worker processes
+# don't forget `@everywhere Pkg.activate(".")`
 ctx(experiment_scaling_opticalflow, "scaling/opticalflow")
diff --git a/scripts/run_experiments.jl b/scripts/run_experiments.jl
index 27f72d58e829b035a9a558d2214be1b857b4d7d7..73f63623fc5bf4135a3e295ffa47239c50f0c1c8 100644
--- a/scripts/run_experiments.jl
+++ b/scripts/run_experiments.jl
@@ -270,7 +270,7 @@ function experiment_scaling_opticalflow(ctx)
     β = 0.001
 
     ninner = 300
-    Mdir = 2 * floor(Int, sqrt(nworkers()))
+    Mdir = 2 * floor(Int, sqrt(nworkers())) # to have enough workers available
     M = (Mdir, Mdir)
     overlap = (5, 5)
 
@@ -301,10 +301,12 @@ function experiment_scaling_opticalflow(ctx)
 
     tg = timeit(galg)
 
+    nparallel = prod(M) ÷ 2^2
     ws = workers()
+    @assert nparallel <= length(ws)
     df = DataFrame()
-    for np in 0:Mdir÷2
-        nw = 2^np
+    for nw in 1:nparallel
+        nparallel % nw == 0 || continue
         push!(df, (
             nworkers = nw,
             time = timeit(dalg(ws[1:nw]))))
diff --git a/src/problems.jl b/src/problems.jl
index 716c76e028ef46531e925e46fcc001da5a8460b2..c766c950d7e328af8b853dcd975c56b8740c5ddc 100644
--- a/src/problems.jl
+++ b/src/problems.jl
@@ -20,11 +20,12 @@ DualTVL1ROFOpProblem(g, B, λ::Real) = DualTVL1ROFOpProblem(g, B, fill!(similar(
 function energy(p, prob::DualTVL1ROFOpProblem)
     d = ndims(p)
 
-    @inline kfΛ(w) = @inbounds divergence(w) + prob.g[w.position]
+    @inline kfΛ(w) = @inbounds divergence(w)
     kΛ = Kernel{ntuple(_->-1:1, d)}(kfΛ)
 
     # v = div(p) + g
     v = map(kΛ, extend(p, ExtensionNothing()))
+    v .+= prob.g
 
     # |v|_B^2
     u = prob.B * vec(v)