diff --git a/scripts/run.jl b/scripts/run.jl
index cfc70b66eb5f91bb5a19e1048a3813d2d8e1fe39..5deaddfe3bed035155d74bc5e74acd8c50404253 100644
--- a/scripts/run.jl
+++ b/scripts/run.jl
@@ -5,6 +5,7 @@ using Colors: Gray
 import ColorTypes
 import DataFrames: DataFrame
 import FileIO
+using OpticalFlowUtils
 using WriteVTK: paraview_collection
 
 using SemiSmoothNewton
@@ -14,9 +15,20 @@ using SemiSmoothNewton: vtk_mesh, vtk_append!, vtk_save
 include("util.jl")
 isdefined(Main, :Revise) && Revise.track(joinpath(@__DIR__, "util.jl"))
 
-toimg(arr) = Gray.(clamp.(arr, 0., 1.))
-loadimg(x) = reverse(transpose(Float64.(FileIO.load(x))); dims = 2)
-saveimg(io, x) = FileIO.save(io, transpose(reverse(toimg(x); dims = 2)))
+_toimg(arr) = Gray.(clamp.(arr, 0., 1.))
+#loadimg(x) = reverse(transpose(Float64.(FileIO.load(x))); dims = 2)
+#saveimg(io, x) = FileIO.save(io, transpose(reverse(_toimg(x); dims = 2)))
+loadimg(x) = Float64.(FileIO.load(x))
+saveimg(io, x) = FileIO.save(io, _toimg(x))
+
+from_img(arr) = permutedims(reverse(arr; dims = 1))
+to_img(arr) = permutedims(reverse(arr; dims = 2))
+function to_img(arr::AbstractArray{<:Any,3})
+    out = permutedims(reverse(arr; dims = (1,3)), (1, 3, 2))
+    out[1, :, :] .= .-out[1, :, :]
+    return out
+end
+
 
 """
     logfilter(dt; a=20)
@@ -477,6 +489,7 @@ end
 
 function denoise(img; name, params...)
     m = 1
+    img = from_img(img) # coord flip
     #mesh = init_grid(img; type=:vertex)
     mesh = init_grid(img, 5, 5)
 
@@ -533,6 +546,7 @@ end
 
 function denoise_pd(img; df=nothing, name, algorithm, params...)
     m = 1
+    img = from_img(img) # coord flip
     mesh = init_grid(img; type=:vertex)
     #mesh = init_grid(img, 5, 5)
 
@@ -607,6 +621,7 @@ function experiment1(img)
     path = "data/pd-comparison"
 
     img = loadimg("data/denoising/input.png")
+    img = from_img(img) # coord flip
 
     algparams = (alpha1=0., alpha2=30., lambda=1., beta=0., gamma1=1e-3, gamma2=1e-3)
 
@@ -635,6 +650,8 @@ function inpaint(img, imgmask; name, params...)
 	throw(ArgumentError("non-matching dimensions"))
 
     m = 1
+    img = from_img(img) # coord flip
+    imgmask = from_img(imgmask) # coord flip
     mesh = init_grid(img; type=:vertex)
 
     # inpaint specific stuff
@@ -669,14 +686,16 @@ function inpaint(img, imgmask; name, params...)
 end
 
 function optflow(ctx)
-    imgf0 = ctx.params.imgf0
-    imgf1 = ctx.params.imgf1
+    # coord flip
+    imgf0 = from_img(ctx.params.imgf0)
+    imgf1 = from_img(ctx.params.imgf1)
     size(imgf0) == size(imgf1) ||
 	throw(ArgumentError("non-matching dimensions"))
 
     m = 2
     #mesh = init_grid(imgf0; type=:vertex)
-    mesh = init_grid(imgf0, 50, 50)
+    mesh = init_grid(imgf0, 20, 20)
+    #mesh = init_grid(imgf0)
 
     # optflow specific stuff
     Vg = FeSpace(mesh, P1(), (1,))
@@ -685,47 +704,46 @@ function optflow(ctx)
     fw = FeFunction(Vg, name="fw")
     nablafw = nabla(fw)
 
-    T(tdata, u) = tdata * u
+    T(tdata, u) = tdata * u # tdata = nablafw
     S(u, nablau) = nablau
 
-    name = "run"
-    st = L1L2TVContext(name, mesh, m; T, tdata = nablafw, S,
+    st = L1L2TVContext("run", mesh, m; T, tdata = nablafw, S,
         ctx.params.alpha1, ctx.params.alpha2, ctx.params.lambda, ctx.params.beta,
         ctx.params.gamma1, ctx.params.gamma2)
 
     function reproject!()
         project_img!(f0, imgf0)
         project_img!(f1, imgf1)
-        # FIXME: currently dual grid only
-        #interpolate!(f0, x -> imgf0[round.(Int, x)...])
-        #interpolate!(f1, x -> imgf1[round.(Int, x)...])
-    end
-    reproject!()
 
-    fw.data .= f1.data
+        # TODO: should be warping from imgfw
+        fw.data .= f1.data
 
-    g_optflow(x; u, f0, fw, nablafw) =
-	nablafw * u - (fw - f0)
-    interpolate!(st.g, g_optflow; st.u, f0, fw, nablafw)
+        g_optflow(x; u, f0, fw, nablafw) =
+            nablafw * u - (fw - f0)
+        interpolate!(st.g, g_optflow; st.u, f0, fw, nablafw)
+    end
+    reproject!()
 
-    save_optflow(i) =
+    save_step(i) =
         output(st, joinpath(ctx.outdir, "output_$(lpad(i, 5, '0')).vtu"),
 	    st.g, st.u, st.p1, st.p2, st.est, f0, f1, fw)
 
     pvd = paraview_collection(joinpath(ctx.outdir, "output.pvd"))
-    pvd[0] = save_optflow(0)
+    pvd[0] = save_step(0)
     for i in 1:10
 	step!(st)
 	estimate!(st)
-	pvd[i] = save_optflow(i)
+	pvd[i] = save_step(i)
 	println()
     end
     vtk_save(pvd)
+    display(plot(colorflow(to_img(sample(st.u)))))
 
     #CSV.write(joinpath(ctx.outdir, "energies.csv"), df)
     #saveimg(joinpath(ctx.outdir, "output_glob.png"), fetch_u(states.glob))
     #savedata(ctx, "data.tex"; lambda=λ, beta=β, tau=τ, maxiters, energymin,
     #    width=size(fo, 2), height=size(fo, 1))
+    return st
 end
 
 function experiment_optflow_middlebury(ctx)
@@ -733,6 +751,5 @@ function experiment_optflow_middlebury(ctx)
     imgf1 = loadimg(joinpath(ctx.indir, "frame11.png"))
 
     ctx = Util.Context(ctx; imgf0, imgf1)
-    optflow(ctx)
-    return ctx
+    return optflow(ctx)
 end
diff --git a/src/operator.jl b/src/operator.jl
index b15cd60313c3d258364e7e3345c1f74b7744552b..1eab3077ae5e90e721d8a84a6d562a5444d53b08 100644
--- a/src/operator.jl
+++ b/src/operator.jl
@@ -127,7 +127,8 @@ function project_img!(u::FeFunction, img)
     opparams = (; f)
 
     nrdims = prod(space.size)
-    nldofs = ndofs(space.element) # number of element dofs (i.e. local dofs not counting range dimensions)
+    # number of element dofs (i.e. local dofs not counting range dimensions)
+    nldofs = ndofs(space.element)
 
     a(xloc, u, du, v, dv; f) = dot(u, v)
     l(xloc, v, dv; f) = dot(f, v)