diff --git a/DualTVDD/repl.jl b/DualTVDD/repl.jl
new file mode 100644
index 0000000000000000000000000000000000000000..aa11454f8b717a6d069ecfb693b8f7c16ac687d9
--- /dev/null
+++ b/DualTVDD/repl.jl
@@ -0,0 +1,7 @@
+using Pkg
+Pkg.activate(".")
+
+using BenchmarkTools
+using Revise
+
+using DualTVDD
diff --git a/DualTVDD/src/DualTVDD.jl b/DualTVDD/src/DualTVDD.jl
index 6a5fb5509311d3fdd1994c5ab44674f90b19b3a0..1150f713ede6390f8d28d4960b8f30b260682972 100644
--- a/DualTVDD/src/DualTVDD.jl
+++ b/DualTVDD/src/DualTVDD.jl
@@ -22,20 +22,24 @@ export dditer
 Create (global) partition of unity grid function θ.
 """
 function partition_function(metagrid, idx...)
-  function h(r, rp, i)
-    i <= first(r) + metagrid.overlap && return 1.0
-    i >= last(r) - metagrid.overlap && return 1.0
-    (min(i - first(rp), last(rp) - i, metagrid.overlap) + 1) / (metagrid.overlap + 1)
-  end
-
-  vidx = FD.indices(metagrid, idx...)
-  ctheta = map(x -> [h(x[1], x[2], i) for i in x[2]], zip(axes(metagrid), vidx))
-
-  dst = zeros(size(metagrid))
-  for (I,J) in zip(CartesianIndices(vidx), CartesianIndices(length.(vidx)))
-    dst[I] = prod(getindex.(ctheta, Tuple(J)))
-  end
-  FD.GridFunction(metagrid, dst)
+    function h(r, rp, i)
+        if i <= first(r) + metagrid.overlap
+            return 1.0
+        elseif i >= last(r) - metagrid.overlap
+            return 1.0
+        else
+            return (min(i - first(rp), last(rp) - i, metagrid.overlap) + 1) / (metagrid.overlap + 1)
+        end
+    end
+
+    vidx = FD.indices(metagrid, idx...)
+    ctheta = map(x -> [h(x[1], x[2], i) for i in x[2]], zip(axes(metagrid), vidx))
+
+    dst = zeros(size(metagrid))
+    for (I,J) in zip(CartesianIndices(vidx), CartesianIndices(length.(vidx)))
+        dst[I] = prod(getindex.(ctheta, Tuple(J)))
+    end
+    FD.GridFunction(metagrid, dst)
 end
 
 
@@ -45,7 +49,7 @@ end
 Compute primal solution from dual solution according to the optimality conditions.
 """
 function p2u(grid, p, A, f; α, β)
-  FD.GridFunction(grid, (A'*A + β*I) \ (FD.divergence_z(p).data[:] .+ A'*f.data[:]))
+    FD.GridFunction(grid, (A'*A + β*I) \ (FD.divergence_z(p).data[:] .+ A'*f.data[:]))
 end
 
 
@@ -56,67 +60,67 @@ Fixed point iteration (Chambolle).
 """
 # TODO: investigate matrix multiplication performance with GridFunctions
 function fpiter(grid, p, wg, A, α, β, τ)
-  x = FD.GridFunction(FD.parent(grid), (A'*A + β*I) \ (FD.divergence_z(FD.zero_continuation(p)) + wg).data[:])
-  q = FD.GridFunction(grid, view(FD.gradient(x).data, grid.indices..., :))
-  ## this misses out on the right boundary
-  #q = FD.gradient(FD.GridFunction(grid, view(x.data, grid.indices..., :)))
+    x = FD.GridFunction(FD.parent(grid), (A'*A + β*I) \ (FD.divergence_z(FD.zero_continuation(p)) + wg).data[:])
+    q = FD.GridFunction(grid, view(FD.gradient(x).data, grid.indices..., :))
+    ## this misses out on the right boundary
+    #q = FD.gradient(FD.GridFunction(grid, view(x.data, grid.indices..., :)))
 
-  p_new = FD.GridFunction(grid, FD.pwmul(FD.GridFunction(grid, α ./ (α .+ τ .* FD.pw_norm(q))),
-                                         FD.GridFunction(grid, p .+ τ .* q)))
+    p_new = FD.GridFunction(grid, FD.pwmul(FD.GridFunction(grid, α ./ (α .+ τ .* FD.pw_norm(q))),
+                                           FD.GridFunction(grid, p .+ τ .* q)))
 
-  reserr = maximum(sum((p_new .- p).^2, dims=ndims(p)))
-  (p_new, reserr)
+    reserr = maximum(sum((p_new .- p).^2, dims=ndims(p)))
+    (p_new, reserr)
 end
 
 """
-    fpalg(grid, w, A, p = 0; α, β, τ = 1/8, ϵ = 1e-4)
+fpalg(grid, w, A, p = 0; α, β, τ = 1/8, ϵ = 1e-4)
 
 Fixed point algorithm (Chambolle), calling `fpiter`.
 """
 function fpalg(grid, w, A, p = FD.GridFunction(x -> 0, ndims(grid), grid);
                α, β, τ = 1/8, ϵ = 1e-4)
-  reserr = 1
-  k = 0
-  while reserr > ϵ && k < 10e1
-    k += 1
-    (p, reserr) = fpiter(grid, p, w, A, α, β, τ)
-  end
-  return p
+    reserr = 1
+    k = 0
+    while reserr > ϵ && k < 10e1
+        k += 1
+        (p, reserr) = fpiter(grid, p, w, A, α, β, τ)
+    end
+    return p
 end
 
 
 """
-    dditer(grid, p, A, f; α, β, τ, ϵ, ρ)
+dditer(grid, p, A, f; α, β, τ, ϵ, ρ)
 
 Domain decomposition algorithm, calling `fpalg` for each part.
 """
 function dditer(grid, p, A, f; α, β, τ, ϵ, ρ)
-  p̂ = (1-ρ) * p
+    p̂ = (1-ρ) * p
 
-  i = 0
-  for part in FD.parts(grid)
-    gridv = view(grid, Tuple(part)...)
+    i = 0
+    for part in FD.parts(grid)
+        gridv = view(grid, Tuple(part)...)
 
-    fv = view(f, Tuple(part)...)
+        fv = view(f, Tuple(part)...)
 
-    p̂v = view(p̂, Tuple(part)...)
-    pv = view(p, Tuple(part)...)
+        p̂v = view(p̂, Tuple(part)...)
+        pv = view(p, Tuple(part)...)
 
-    θ = partition_function(grid, Tuple(part)...)
-    θv = view(θ, Tuple(part)...)
+        θ = partition_function(grid, Tuple(part)...)
+        θv = view(θ, Tuple(part)...)
 
-    w = FD.GridFunction(grid, A' * f.data[:]) + FD.divergence(FD.pwmul(FD.GridFunction(grid, 1 .- θ), p))
-    wv = view(w, Tuple(part)...)
+        w = FD.GridFunction(grid, A' * f.data[:]) + FD.divergence(FD.pwmul(FD.GridFunction(grid, 1 .- θ), p))
+        wv = view(w, Tuple(part)...)
 
-    p̂v .+= ρ .* fpalg(gridv, w, A, α=α*θv, β=β, τ=τ, ϵ=ϵ)
+        p̂v .+= ρ .* fpalg(gridv, w, A, α=α*θv, β=β, τ=τ, ϵ=ϵ)
 
-    i += 1
-  end
-  p .= p̂
+        i += 1
+    end
+    p .= p̂
 
-  u = p2u(grid, p, A, f, α=α, β=β)
-  FD.my_plot(u)
-  #FD.my_plot(FD.pw_norm(p))
+    u = p2u(grid, p, A, f, α=α, β=β)
+    FD.my_plot(u)
+    #FD.my_plot(FD.pw_norm(p))
 end
 
 
diff --git a/DualTVDD/src/fd.jl b/DualTVDD/src/fd.jl
index 5f76c3bf594c749e3b5bca55fe7ee45a6da33305..1d6cda9599dcb59c439f03452328fd44dc9fada9 100644
--- a/DualTVDD/src/fd.jl
+++ b/DualTVDD/src/fd.jl
@@ -1,6 +1,6 @@
 module FD
 
-using StaticArrays
+using StaticArrays: SVector
 using OffsetArrays
 import Colors: Gray
 import Plots: plot
@@ -10,6 +10,18 @@ export Grid, MetaGrid, GridView, GridFunction
 export grid
 
 
+"""
+    AbstractGrid{d}
+
+Abstract rectangular grid type, allowing positional indexing of coordinates.
+"""
+abstract type AbstractGrid{d} <: AbstractArray{SVector{d,Float64}, d} end
+
+Base.IndexStyle(::Type{<:AbstractGrid}) = IndexCartesian()
+
+Base.parent(grid::AbstractGrid) = grid
+
+
 "Finite Differences"
 module FDiff
 
@@ -18,12 +30,6 @@ using OffsetArrays
 const forward = OffsetVector([-1, 1], -1)
 const backward = OffsetVector([-1, 1], -2)
 const central = OffsetVector([-1, 0, 1], -2)
-function dirfd(fd, ndims, dir)
-  dir <= ndims || throw(ArgumentError("need dir <= ndims!"))
-  dfd = OffsetArray(zeros(ntuple(i -> i == dir ? length(fd) : 1, ndims)), ntuple(i -> i == dir ? fd.offsets[1] : -1, ndims))
-  # Workaround: LinearIndices(::OffsetVector) are not 1-based, so copyto! chockes
-  copyto!(dfd, fd.parent)
-end
 
 end
 
@@ -38,31 +44,25 @@ Create from `fd` a finite difference kernel usable in `ndims` dimension
 oriented in dimension `dir`.
 """
 # TODO: make it typestable on ndims
-function _fdkern(fd::OffsetArray{T,1,Array{T,1}}, ndims::Integer, dir::Integer) where T
-  1 <= dir <= ndims || throw(ArgumentError("dimesion $(dir) out of range 1:$(ndims)"))
-  dfd = OffsetArray{T}(undef, Tuple(i == dir ? axes(fd, 1) : (0:0) for i in 1:ndims))
-  # copyto! from OffsetArray fails, broadcasting for differing axes fails too
-  # workaround: copyto! from parent
-  copyto!(dfd, fd.parent)
+function _fdkern(fd::OffsetArray{T,1}, ndims::Integer, dir::Integer) where T
+    1 <= dir <= ndims || throw(ArgumentError("dimesion $(dir) out of range 1:$(ndims)"))
+
+    offsets = ntuple(i -> i == dir ? axes(fd, 1) : (0:0), ndims)
+    out = OffsetArray{T}(undef, offsets)
+    # copyto! from OffsetArray fails, broadcasting for differing axes fails too
+    # workaround: copyto! from parent
+    copyto!(out, fd.parent)
 end
 
 
 # Grid
 
 
-abstract type AbstractGrid{d} <: AbstractArray{Float64, d} end
-
-Base.ndims(grid::AbstractGrid{d}) where d = d
-Base.axes(grid::AbstractGrid) = Base.OneTo.(size(grid))
-Base.length(grid::AbstractGrid) = prod(size(grid))
-Base.IndexStyle(::Type{<:AbstractGrid}) = IndexCartesian()
-Base.parent(grid::AbstractGrid) = grid
-
 # aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
 const FDStepRange = typeof(0:0.1:1)
 
 struct Grid{d} <: AbstractGrid{d}
-  domain::NTuple{d, FDStepRange}
+    domain::NTuple{d, FDStepRange}
 end
 Grid(domain::FDStepRange...) = Grid(domain)
 Base.show(io::IO, grid::Grid) =
@@ -81,8 +81,8 @@ Base.getindex(grid::Grid, I::CartesianIndex) = SVector(getindex.(grid.domain, Tu
 # GridView
 
 struct GridView{d} <: AbstractGrid{d}
-  grid::AbstractGrid{d}
-  indices::NTuple{d, UnitRange{Int}}
+    grid::AbstractGrid{d}
+    indices::NTuple{d, UnitRange{Int}}
 end
 GridView(grid, indices::UnitRange{Int}...) = GridView(grid, indices)
 
@@ -101,9 +101,9 @@ Base.parent(grid::GridView) = grid.grid
 
 
 struct MetaGrid{d} <: AbstractGrid{d}
-  grid::Grid{d}
-  indices::Array{NTuple{d, UnitRange{Int}}, d}
-  overlap::Int
+    grid::Grid{d}
+    indices::Array{NTuple{d, UnitRange{Int}}, d}
+    overlap::Int
 end
 
 """
@@ -113,20 +113,20 @@ Create a grid on `domain`, partitioned along dimensions according to `pnum`
 respecting `overlap`.
 """
 function MetaGrid(domain::NTuple{d, FDStepRange}, pnum::NTuple{d, Int}, overlap::Int) where d
-  grid = Grid(domain)
-  tsize = size(grid) .+ (pnum .- 1) .* overlap
+    grid = Grid(domain)
+    tsize = size(grid) .+ (pnum .- 1) .* overlap
 
-  psize = tsize .÷ pnum
-  osize = tsize .- pnum .* psize
+    psize = tsize .÷ pnum
+    osize = tsize .- pnum .* psize
 
-  overhang(I, j) = I[j] == pnum[j] ? osize[j] : 0
+    overhang(I, j) = I[j] == pnum[j] ? osize[j] : 0
 
-  indices = Array{NTuple{d, UnitRange{Int}}, d}(undef, pnum)
-  for I in CartesianIndices(pnum)
-    indices[I] = ntuple(j -> ((I[j] - 1) * psize[j] - (I[j] - 1) * overlap + 1) :
-                             (      I[j] * psize[j] - (I[j] - 1) * overlap + overhang(I, j)), d)
-  end
-  MetaGrid{d}(grid, indices, overlap)
+    indices = Array{NTuple{d, UnitRange{Int}}, d}(undef, pnum)
+    for I in CartesianIndices(pnum)
+        indices[I] = ntuple(j -> ((I[j] - 1) * psize[j] - (I[j] - 1) * overlap + 1) :
+                            (      I[j] * psize[j] - (I[j] - 1) * overlap + overhang(I, j)), d)
+    end
+    MetaGrid{d}(grid, indices, overlap)
 end
 
 Base.size(grid::MetaGrid) = size(grid.grid)
@@ -163,40 +163,40 @@ Base.parent(f::AbstractGridFunction) = f
 `D == d + 1`
 """
 struct GridFunction{G<:AbstractGrid,T,d,n,A,D} <: AbstractGridFunction{G,T,d,n}
-  grid::G
-  data::A
-  function GridFunction{G,T,d,n,A,D}(grid::G, data::A) where {G,T,d,n,D,A<:AbstractArray{T,D}}
-    D == d + 1 || throw(ArgumentError)
-    d == ndims(grid) || throw(ArgumentError)
-    (size(grid)..., n) == size(data) || throw(ArgumentError("data does not match given size"))
-    new(grid, data)
-  end
+    grid::G
+    data::A
+    function GridFunction{G,T,d,n,A,D}(grid::G, data::A) where {G,T,d,n,D,A<:AbstractArray{T,D}}
+        D == d + 1 || throw(ArgumentError)
+        d == ndims(grid) || throw(ArgumentError)
+        (size(grid)..., n) == size(data) || throw(ArgumentError("data does not match given size"))
+        new(grid, data)
+    end
 end
 # TODO: check size(data)
 #GridFunction(grid::G, data::AbstractArray{T,D}) where {d,G<:AbstractGrid{d},T,D} =
 #    GridFunction{G,T,d,size(data, d+1),typeof(data),d+1}(grid, data)
 function GridFunction(grid::G, data::AbstractArray, ::Val{n}) where {d,G<:AbstractGrid{d}, n}
-  if length(data) == length(grid) && n > 1
-    data = repeat(data, inner=ntuple(i -> i == d+1 ? n : 1, d+1))
-  elseif size(data) != (size(grid)..., n)
-    data = reshape(data, size(grid)..., n)
-  end
-  length(data) ÷ length(grid) == n || throw(ArgumentError("dimensions don't match"))
-  GridFunction{G,eltype(data),d,n,typeof(data),d+1}(grid, data)
+    if length(data) == length(grid) && n > 1
+        data = repeat(data, inner=ntuple(i -> i == d+1 ? n : 1, d+1))
+    elseif size(data) != (size(grid)..., n)
+        data = reshape(data, size(grid)..., n)
+    end
+    length(data) ÷ length(grid) == n || throw(ArgumentError("dimensions don't match"))
+    GridFunction{G,eltype(data),d,n,typeof(data),d+1}(grid, data)
 end
 function GridFunction(grid::G, data::AbstractArray) where {d,G<:AbstractGrid{d}}
-  GridFunction(grid, data, Val(length(data) ÷ length(grid)))
+    GridFunction(grid, data, Val(length(data) ÷ length(grid)))
 end
 function GridFunction(f::Function, n::Int, grid::G) where {G<:AbstractGrid}
-  data = Array{Float64,ndims(grid)+1}(undef, size(grid)..., n)
-  for I in CartesianIndices(grid)
-    data[Tuple(I)..., :] .= f(grid[I])
-  end
-  GridFunction(grid, data)
+    data = Array{Float64,ndims(grid)+1}(undef, size(grid)..., n)
+    for I in CartesianIndices(grid)
+        data[Tuple(I)..., :] .= f(grid[I])
+    end
+    GridFunction(grid, data)
 end
 function GridFunction(grid::AbstractGrid, ::UndefInitializer, ::Val{n} = Val(1)) where n
-  data = Array{Float64}(undef, size(grid)..., 1)
-  GridFunction{typeof(grid), eltype(data), ndims(grid), n, typeof(data), ndims(grid) + 1}(grid, data)
+    data = Array{Float64}(undef, size(grid)..., 1)
+    GridFunction{typeof(grid), eltype(data), ndims(grid), n, typeof(data), ndims(grid) + 1}(grid, data)
 end
 
 #Base.show(io::IO, grid::GridFunction) =
@@ -218,95 +218,95 @@ grid(f::GridFunction) = f.grid
 
 # TODO: call is different, so maybe make thin not part of Base
 Base.view(f::AbstractGridFunction{G}, idx...) where G<:MetaGrid =
-    GridFunction(GridView(f.grid, indices(f.grid, idx...)), view(f.data, indices(f.grid, idx...)..., :))
+GridFunction(GridView(f.grid, indices(f.grid, idx...)), view(f.data, indices(f.grid, idx...)..., :))
 
 function zero_continuation(f::GridFunction{<:GridView})
-  dst = GridFunction(parent(grid(f)), zeros(size(parent(grid(f)))..., rangedim(f)), Val(rangedim(f)))
-  dst.data[grid(f).indices..., :] .= f.data
-  dst
+    dst = GridFunction(parent(grid(f)), zeros(size(parent(grid(f)))..., rangedim(f)), Val(rangedim(f)))
+    dst.data[grid(f).indices..., :] .= f.data
+    dst
 end
 
 Base.parent(f::GridFunction{<:GridView,<:Any,<:Any,<:Any,<:SubArray}) =
-    GridFunction(parent(grid(f)), parent(f.data))
+GridFunction(parent(grid(f)), parent(f.data))
 
 
 
 
 
 function _check_shape(a::GridFunction, b::GridFunction)
-  #TODO: why does fv.grid == gv.grid fail?
-  a.grid === b.grid || throw(ArgumentError("grids need to match"))
-  rangedim(a) == rangedim(b) || throw(ArgumentError("range dimensions need to match"))
+    #TODO: why does fv.grid == gv.grid fail?
+    a.grid === b.grid || throw(ArgumentError("grids need to match"))
+    rangedim(a) == rangedim(b) || throw(ArgumentError("range dimensions need to match"))
 end
 
 Base.:*(a::Number, b::GridFunction) = GridFunction(b.grid, a * b.data, Val(rangedim(b)))
 Base.:-(a::GridFunction) = GridFunction(a.grid, -a.data, Val(rangedim(a)))
 function Base.:+(a::GridFunction, b::GridFunction)
-  _check_shape(a, b)
-  GridFunction(a.grid, a.data .+ b.data, Val(rangedim(a)))
+    _check_shape(a, b)
+    GridFunction(a.grid, a.data .+ b.data, Val(rangedim(a)))
 end
 function Base.:-(a::GridFunction, b::GridFunction)
-  _check_shape(a, b)
-  GridFunction(a.grid, a.data .- b.data, Val(rangedim(a)))
+    _check_shape(a, b)
+    GridFunction(a.grid, a.data .- b.data, Val(rangedim(a)))
 end
 
 
 function Base.:+(a::GridFunction, b::GridFunction{<:GridView})
-  a.grid == parent(b.grid) || throw(ArgumentError("grids need to match"))
-  data = copy(a.data)
-  datav = view(data, b.grid.indices..., :)
-  datav .+= b.data
-  GridFunction(a.grid, data, Val(rangedim(a)))
+    a.grid == parent(b.grid) || throw(ArgumentError("grids need to match"))
+    data = copy(a.data)
+    datav = view(data, b.grid.indices..., :)
+    datav .+= b.data
+    GridFunction(a.grid, data, Val(rangedim(a)))
 end
 Base.:+(a::GridFunction{<:GridView}, b::GridFunction) = b + a
 
 # TODO: cleanup! use metaprogramming
 function Base.:+(a::GridFunction{<:GridView}, b::GridFunction{<:GridView})
-  _check_shape(a, b)
-  GridFunction(a.grid, a.data .+ b.data, Val(rangedim(a)))
+    _check_shape(a, b)
+    GridFunction(a.grid, a.data .+ b.data, Val(rangedim(a)))
 end
 # TODO: cleanup! use metaprogramming
 function Base.:-(a::GridFunction{<:GridView}, b::GridFunction{<:GridView})
-  _check_shape(a, b)
-  GridFunction(a.grid, a.data .- b.data, Val(rangedim(a)))
+    _check_shape(a, b)
+    GridFunction(a.grid, a.data .- b.data, Val(rangedim(a)))
 end
 
 function Base.:-(a::GridFunction, b::GridFunction{<:GridView})
-  a.grid == parent(b.grid) || throw(ArgumentError("grids need to match"))
-  data = copy(a.data)
-  datav = view(data, b.grid.indices..., :)
-  datav .-= b.data
-  GridFunction(a.grid, data, Val(rangedim(a)))
+    a.grid == parent(b.grid) || throw(ArgumentError("grids need to match"))
+    data = copy(a.data)
+    datav = view(data, b.grid.indices..., :)
+    datav .-= b.data
+    GridFunction(a.grid, data, Val(rangedim(a)))
 end
 Base.:-(a::GridFunction{<:GridView}, b::GridFunction) = b - a
 
 
 
 function my_plot(img::FD.GridFunction{G,T,d,1}) where {G,T,d}
-  print("Plotting ... ")
+    print("Plotting ... ")
 
-  data = reshape(copy(img.data), size(img.grid))
+    data = reshape(copy(img.data), size(img.grid))
 
-  min = minimum(data)
-  max = maximum(data)
-  @show min, max
+    min = minimum(data)
+    max = maximum(data)
+    @show min, max
 
-  #@. data = (data - min) / (max - min)
-  @. data = clamp(data, 0, 1)
+    #@. data = (data - min) / (max - min)
+    @. data = clamp(data, 0, 1)
 
-  plot(Gray.(data), show=true)
+    plot(Gray.(data), show=true)
 end
 
 
 ## Operations
 
 function fdfilter(f::GridFunction{G,T,d,n}, kern) where {G,T,d,n}
-  A = zeros(axes(f.data))
-  for k = 1:n
-    vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
-    imfilter!(view(A, vidx...), f.data[vidx...], kern)
-  end
-  GridFunction(f.grid, A)
+    A = zeros(axes(f.data))
+    for k = 1:n
+        vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
+        imfilter!(view(A, vidx...), f.data[vidx...], kern)
+    end
+    GridFunction(f.grid, A)
 end
 
 derivate(f::GridFunction, dir::Integer; fd = FDiff.forward) = fdfilter(f, _fdkern(fd, ndims(f.grid), dir))
@@ -316,73 +316,73 @@ derivate(f::GridFunction, dir::Integer; fd = FDiff.forward) = fdfilter(f, _fdker
 # chambolles algorithm), other choices may be reasonable
 
 function gradient(f::GridFunction{G,T,d,1}; fd=FDiff.forward) where {G,T,d}
-  #A = Array{T,d+1}(undef, (size(f.grid)..., d))
-  A = zeros(size(f.grid)..., d)
-  for k = 1:d
-    vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
-    vidx2 = ntuple(i -> i <= d ? Colon() : 1, d + 1)
-    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
-    # FIXME: hackish, only for forward fd
-    selectdim(selectdim(A, d+1, k), k, size(f.grid, k)) .= 0
-  end
-  GridFunction(f.grid, A)
+    #A = Array{T,d+1}(undef, (size(f.grid)..., d))
+    A = zeros(size(f.grid)..., d)
+    for k = 1:d
+        vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
+        vidx2 = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+        imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
+        # FIXME: hackish, only for forward fd
+        selectdim(selectdim(A, d+1, k), k, size(f.grid, k)) .= 0
+    end
+    GridFunction(f.grid, A)
 end
 
 function divergence(f::GridFunction{G,T,d}; fd=FDiff.backward) where {d,G<:AbstractGrid{d},T}
-  A = zeros(size(f.grid)..., 1)
-  for k = 1:d
-    vidx = ntuple(i -> i <= d ? Colon() : 1, d + 1)
-    vidx2 = ntuple(i -> i <= d ? Colon() : k, d + 1)
-    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
-  end
-  GridFunction(f.grid, A)
+    A = zeros(size(f.grid)..., 1)
+    for k = 1:d
+        vidx = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+        vidx2 = ntuple(i -> i <= d ? Colon() : k, d + 1)
+        imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
+    end
+    GridFunction(f.grid, A)
 end
 
 function _kernel_zero_right_bnd!(kernel, bidx, k)
-  if bidx[k] == 1
-    kernel[ntuple(i -> 0, ndims(kernel))...] = 0
-  end
-  kernel
+    if bidx[k] == 1
+        kernel[ntuple(i -> 0, ndims(kernel))...] = 0
+    end
+    kernel
 end
 
 function gradient_c(f::GridFunction{G,T,d,1}; fd=FDiff.forward) where {G,T,d}
-  #A = Array{T,d+1}(undef, (size(f.grid)..., d))
-  A = zeros(size(f.grid)..., d)
-  for k = 1:d
-    vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
-    vidx2 = ntuple(i -> i <= d ? Colon() : 1, d + 1)
-    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
-  end
-  GridFunction(f.grid, A)
+    #A = Array{T,d+1}(undef, (size(f.grid)..., d))
+    A = zeros(size(f.grid)..., d)
+    for k = 1:d
+        vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
+        vidx2 = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+        imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
+    end
+    GridFunction(f.grid, A)
 end
 
 function divergence_z(f::GridFunction{G,T,d}; fd=FDiff.backward) where {d,G<:AbstractGrid{d},T}
-  A = zeros(size(f.grid)..., 1)
-  for k = 1:d
-    vidx = ntuple(i -> i <= d ? Colon() : 1, d + 1)
-    vidx2 = ntuple(i -> i <= d ? Colon() : k, d + 1)
-    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> _kernel_zero_right_bnd!(trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx), bidx, k))
-  end
-  GridFunction(f.grid, A)
+    A = zeros(size(f.grid)..., 1)
+    for k = 1:d
+        vidx = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+        vidx2 = ntuple(i -> i <= d ? Colon() : k, d + 1)
+        imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> _kernel_zero_right_bnd!(trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx), bidx, k))
+    end
+    GridFunction(f.grid, A)
 end
 
 function pw_norm(f::GridFunction)
-  d = domaindim(f)
-  GridFunction(f.grid, sqrt.(sum(f.data.^2, dims=d+1)), Val(1))
+    d = domaindim(f)
+    GridFunction(f.grid, sqrt.(sum(f.data.^2, dims=d+1)), Val(1))
 end
 
 function my_norm(f::GridFunction)
-  d = domaindim(f)
-  GridFunction(f.grid, sqrt.(sum(f.data.^2, dims=d+1)), Val(rangedim(f)))
+    d = domaindim(f)
+    GridFunction(f.grid, sqrt.(sum(f.data.^2, dims=d+1)), Val(rangedim(f)))
 end
 
 function pwmul(f::GridFunction{G,T,d,1}, g::GridFunction{G,T,d}) where {G,T,d}
-  # TODO: maybe comprehension?
-  data = similar(g.data)
-  for I in CartesianIndices(f.grid)
-    data[Tuple(I)..., :] = f.data[Tuple(I)..., 1] * g.data[Tuple(I)..., :]
-  end
-  GridFunction(f.grid, data, Val(rangedim(g)))
+    # TODO: maybe comprehension?
+    data = similar(g.data)
+    for I in CartesianIndices(f.grid)
+        data[Tuple(I)..., :] = f.data[Tuple(I)..., 1] * g.data[Tuple(I)..., :]
+    end
+    GridFunction(f.grid, data, Val(rangedim(g)))
 end
 
 
@@ -397,66 +397,41 @@ Ghost nodes are eliminated through central finite differences. Currently only
 the basic 5-star Laplace is supported.
 """
 function neumann_kernel(kernel, bidx)
-  out = copy(kernel)
-  for (i, bi) in enumerate(bidx)
-    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
-    out[colrange...] .+= bi * FDiff.forward
-  end
-  out /= 2 ^ count(!iszero, bidx)
-  trim_kernel(out, bidx)
-end
-
-function divergence(grid::Grid{d}, field) where d
-  field = reshape(field, (size(grid)..., ndims(grid)))
-  size(field, d+1) == d || throw(ArgumentError("need a d-vectorfield on a d-grid"))
-  dst = zeros(size(field))
-  for k in 1:d
-    kern = FDiff.dirfd(FDiff.backward, d, k)
-    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), bidx -> trim_kernel(kern, bidx))
-  end
-  out = dropdims(sum(dst, dims=d+1), dims=d+1)
-  out[:]
-end
-
-function gradient(grid::Grid, img::AbstractArray)
-  img = reshape(img, size(grid))
-  d = ndims(img)
-  dst = zeros(size(img)..., d)
-  for k in 1:d
-    kern = FDiff.dirfd(FDiff.forward, d, k)
-    imfilter!(selectdim(dst, d+1, k), img, bidx -> trim_kernel(kern, bidx))
-    # FIXME: hack
-    selectdim(selectdim(dst, d+1, k), k, size(grid, k)) .= 0
-  end
-  reshape(dst, length(grid) * ndims(grid))
+    out = copy(kernel)
+    for (i, bi) in enumerate(bidx)
+        colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
+        out[colrange...] .+= bi * FDiff.forward
+    end
+    out /= 2 ^ count(!iszero, bidx)
+    trim_kernel(out, bidx)
 end
 
 function my_norm(grid::Grid, img::AbstractArray)
-  img = reshape(img, (size(grid)..., ndims(grid)))
-  d = ndims(grid)
+    img = reshape(img, (size(grid)..., ndims(grid)))
+    d = ndims(grid)
 
-  out = sqrt.(sum(img.^2, dims=d+1))
-  out = repeat(out, inner=ntuple(i -> i == d+1 ? size(img, d+1) : 1, d+1))
+    out = sqrt.(sum(img.^2, dims=d+1))
+    out = repeat(out, inner=ntuple(i -> i == d+1 ? size(img, d+1) : 1, d+1))
 
-  out[:]
+    out[:]
 end
 
 
 function apply(kern, flatimg, size)
-  img = reshape(flatimg, size)
-  out = imfilter(kern, img)
-  reshape(out, prod(size))
+    img = reshape(flatimg, size)
+    out = imfilter(kern, img)
+    reshape(out, prod(size))
 end
 
 function solve(pde, shape::NTuple{d, Integer}) where d
-  ndims(pde.stencil) == d || throw("dimension mismatch")
-  length(pde.f) == prod(shape) || throw("bounds mismatch")
+    ndims(pde.stencil) == d || throw("dimension mismatch")
+    length(pde.f) == prod(shape) || throw("bounds mismatch")
 
-  A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
+    A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
 
-  u = zeros(prod(shape))
-  cg!(u, A, pde.f)
-  return u
+    u = zeros(prod(shape))
+    cg!(u, A, pde.f)
+    return u
 end
 
 end
diff --git a/DualTVDD/src/filtering.jl b/DualTVDD/src/filtering.jl
index 21645ca3aa13824ab962f226dd234a1b7d8bf56a..e9adaeb586ffd37f71cd88150de2e4b7f4ea4c87 100644
--- a/DualTVDD/src/filtering.jl
+++ b/DualTVDD/src/filtering.jl
@@ -1,9 +1,13 @@
 module Filtering
 
+import OffsetArrays
+
 export imfilter, imfilter!, trim_kernel
 
+const Kernel{T, N} = OffsetArrays.OffsetArray{T, N}
+
 @inline function _shift_range(range, shift::Integer)
-  Base.UnitRange(first(range)+shift : last(range)+shift)
+    Base.UnitRange(first(range)+shift : last(range)+shift)
 end
 
 ## TODO: allow border to be tuple
@@ -18,42 +22,42 @@ end
 #end
 
 @inline function _bidx_to_range(range::AbstractUnitRange, bidx::Integer)
-  if bidx > 0
-    return last(range):last(range)
-  elseif bidx < 0
-    return first(range):first(range)
-  else
-    return first(range)+1:last(range)-1
-  end
+    if bidx > 0
+        return last(range):last(range)
+    elseif bidx < 0
+        return first(range):first(range)
+    else
+        return first(range)+1:last(range)-1
+    end
 end
 
 
-# we return Base.Slice since that is what `OffsetArray` uses for `axes()`
 function _trim_kernel_axes(kaxes, bidx)
-  ntuple(Val(length(kaxes))) do i
-    if bidx[i] < 0
-      return 0:last(kaxes[i])
-    elseif bidx[i] > 0
-      return first(kaxes[i]):0
-    elseif kaxes[i] isa Integer # FIXME: dirty!
-      return UnitRange(kaxes[i], kaxes[i])
-    else
-      return UnitRange(kaxes[i])
+    ntuple(Val(length(kaxes))) do i
+        if bidx[i] < 0
+            return 0:last(kaxes[i])
+        elseif bidx[i] > 0
+            return first(kaxes[i]):0
+        elseif kaxes[i] isa Integer # FIXME: dirty!
+            return UnitRange(kaxes[i], kaxes[i])
+        else
+            return UnitRange(kaxes[i])
+        end
     end
-  end
 end
 
 """
     trim_kernel(kernel, bidx)
 
-Trim `kernel` so that the resulting kernel is usable on the boundary
-indicated by `bidx`
+Trim `kernel` so that the resulting kernel is usable on the boundary indicated
+by `bidx`, i.e. replace evaluations that would occur outside the boundary with
+zero.
 """
 function trim_kernel(kernel, bidx)
-  dst_axes = _trim_kernel_axes(axes(kernel), bidx)
-  out = similar(kernel, dst_axes)
-  range = CartesianIndices(out)
-  copyto!(out, range, kernel, range)
+    dst_axes = _trim_kernel_axes(axes(kernel), bidx)
+    out = similar(kernel, dst_axes)
+    range = CartesianIndices(out)
+    copyto!(out, range, kernel, range)
 end
 
 
@@ -62,31 +66,36 @@ end
 
 Filters `img` applying `kern` and write to `dst`. `dst` is added to, so you
 should initialize with zeros.
+Only `dst[slice]` is written to.
 """
 # TODO: weirdly Base.Slice instead of Base.UnitRange leads to axes that are not
 # compatible when broadcasting, figure out why!
 function imfilter!(dst, img, kern, slice=_bidx_to_range.(axes(img), ntuple(i->0, ndims(img))))
-  ndims(dst) == ndims(img) == ndims(kern) == length(slice) ||
-      throw(ArgumentError("dst, image, kernel or slice dimensions disagree ($(ndims(dst)) vs $(ndims(img)) vs $(ndims(kern)) vs $(length(slice)))"))
-  axes(dst) == axes(img) || throw(ArgumentError("output and input image axes disagree"))
-  all(size(kern) .<= size(img)) || throw(ArgumentError("kernel bigger than image"))
-  last.(slice) .+ last.(axes(kern)) <= last.(axes(img)) || first.(slice) .+ first.(axes(kern)) >= first.(axes(img)) ||
-      throw(ArgumentError("kern and/or slice out of bounds for image"))
-
-  dstslice = view(dst, UnitRange.(slice)...)
-  for i in CartesianIndices(kern)
-    iszero(kern[i]) && continue
-    dstslice .+= kern[i] .* view(img, (_shift_range.(slice, Tuple(i)))...)
-  end
-  dst
+    ndims(dst) == ndims(img) == ndims(kern) == length(slice) ||
+        throw(ArgumentError("dst, image, kernel or slice dimensions disagree " *
+                            "($(ndims(dst)) vs $(ndims(img)) vs $(ndims(kern)) vs $(length(slice)))"))
+    axes(dst) == axes(img) ||
+        throw(ArgumentError("output and input image axes disagree"))
+    all(size(kern) .<= size(img)) ||
+        throw(ArgumentError("kernel bigger than image"))
+    last.(slice) .+ last.(axes(kern)) <= last.(axes(img)) ||
+    first.(slice) .+ first.(axes(kern)) >= first.(axes(img)) ||
+        throw(ArgumentError("kernel and/or slice out of bounds for image"))
+
+    dstslice = view(dst, UnitRange.(slice)...)
+    for i in CartesianIndices(kern)
+        iszero(kern[i]) && continue
+        dstslice .+= kern[i] .* view(img, (_shift_range.(slice, Tuple(i)))...)
+    end
+    dst
 end
 
 function imfilter!(dst, img, kernelf::Function)
-  for bidx in Iterators.product(ntuple(i -> -1:1, ndims(img))...)
-    range = _bidx_to_range.(axes(img), bidx)
-    imfilter!(dst, img, kernelf(bidx), range)
-  end
-  dst
+    for bidx in Iterators.product(ntuple(i -> -1:1, ndims(img))...)
+        range = _bidx_to_range.(axes(img), bidx)
+        imfilter!(dst, img, kernelf(bidx), range)
+    end
+    dst
 end
 
 """
diff --git a/DualTVDD/test/runtests.jl b/DualTVDD/test/runtests.jl
index 882d689668b3ac40ce4012d320f1ab3955cb4950..74a4996bfc7e41e8054938b19076bd1c5efdf6b1 100644
--- a/DualTVDD/test/runtests.jl
+++ b/DualTVDD/test/runtests.jl
@@ -12,10 +12,15 @@ A = I
 f = GridFunction(x -> float(norm(x .- [0.5, 0.5]) < 0.4), 1, grid)
 g = GridFunction(x -> norm(x), 2, grid)
 
+"TV parameter"
 α=100000
+"L2 parameter"
 β=0
+"???"
 τ=1/8
+"residual error tolerance for local problem"
 ϵ=1e-9
+"???"
 ρ=0.5
 
 p = GridFunction(x -> 0, 2, grid)