diff --git a/chambollefp.jl b/chambollefp.jl
index d50c1a6f9cb170e55b85ae2f9c5c754ed5661e50..34679407c43747268f6ecedb3bd337bd310a9650 100644
--- a/chambollefp.jl
+++ b/chambollefp.jl
@@ -1,129 +1,4 @@
-module FDM
-
-using StaticArrays
-using SparseArrays
-using OffsetArrays
-using LinearMaps
-using IterativeSolvers
-
-include("filtering.jl")
-
-"Finite Differences"
-module FD
-
-using OffsetArrays
-
-const forward = OffsetVector([-1, 1], -1)
-const backward = OffsetVector([-1, 1], -2)
-const central = OffsetVector([-1, 0, 1], -2)
-function dirfd(fd, ndims, dir)
-  dir <= ndims || throw(ArgumentError("need dir <= ndims!"))
-  dfd = OffsetArray(zeros(ntuple(i -> i == dir ? length(fd) : 1, ndims)), ntuple(i -> i == dir ? fd.offsets[1] : -1, ndims))
-  # Workaround: LinearIndices(::OffsetVector) are not 1-based, so copyto! chockes
-  copyto!(dfd, fd.parent)
-end
-
-end
-
-using .Filtering
-using .FD
-
-
-
-# TODO: Refactor to GridArray
-
-# aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
-const FDStepRange = typeof(0:0.1:1)
-
-struct Grid{d} <: AbstractArray{Float64, d}
-  domain::NTuple{d, FDStepRange}
-end
-Grid(domain::FDStepRange...) = Grid(domain)
-
-Base.ndims(grid::Grid{d}) where d = d
-Base.size(grid::Grid) = ntuple(i -> grid.domain[i].len, ndims(grid))
-Base.axes(grid::Grid) = Base.OneTo.(size(grid))
-Base.length(grid::Grid) = prod(size(grid))
-
-Base.iterate(grid::Grid) = iterate(Iterators.product(grid.domain...))
-Base.iterate(grid::Grid, state) = iterate(Iterators.product(grid.domain...), state)
-
-Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
-Base.IndexStyle(::Type{<:Grid}) = IndexCartesian()
-
-
-"""
-    neuman_kernel(kernel, bidx)
-
-Modify `kernel` to be usable on a boundary `bixd` in a neumannn boundary setting.
-
-Ghost nodes are eliminated through central finite differences. Currently only
-the basic 5-star Laplace is supported.
-"""
-function neumann_kernel(kernel, bidx)
-  out = copy(kernel)
-  for (i, bi) in enumerate(bidx)
-    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
-    out[colrange...] .+= bi * FD.forward
-  end
-  out /= 2 ^ count(!iszero, bidx)
-  trim_kernel(out, bidx)
-end
-
-function divergence(grid::Grid{d}, field) where d
-  field = reshape(field, (size(grid)..., ndims(grid)))
-  size(field, d+1) == d || throw(ArgumentError("need a d-vectorfield on a d-grid"))
-  dst = zeros(size(field))
-  for k in 1:d
-    kern = FD.dirfd(FD.backward, d, k)
-    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), bidx -> trim_kernel(kern, bidx))
-  end
-  out = dropdims(sum(dst, dims=d+1), dims=d+1)
-  out[:]
-end
-
-function gradient(grid::Grid, img::AbstractArray)
-  img = reshape(img, size(grid))
-  d = ndims(img)
-  dst = zeros(size(img)..., d)
-  for k in 1:d
-    kern = FD.dirfd(FD.forward, d, k)
-    imfilter!(selectdim(dst, d+1, k), img, bidx -> trim_kernel(kern, bidx))
-    # FIXME: hack
-    selectdim(selectdim(dst, d+1, k), k, size(grid, k)) .= 0
-  end
-  reshape(dst, length(grid) * ndims(grid))
-end
-
-function my_norm(grid::Grid, img::AbstractArray)
-  img = reshape(img, (size(grid)..., ndims(grid)))
-  d = ndims(grid)
-
-  out = sqrt.(sum(img.^2, dims=d+1))
-  out = repeat(out, inner=ntuple(i -> i == d+1 ? size(img, d+1) : 1, d+1))
-
-  out[:]
-end
-
-
-function apply(kern, flatimg, size)
-  img = reshape(flatimg, size)
-  out = imfilter(kern, img)
-  reshape(out, prod(size))
-end
-
-function solve(pde, shape::NTuple{d, Integer}) where d
-  ndims(pde.stencil) == d || throw("dimension mismatch")
-  length(pde.f) == prod(shape) || throw("bounds mismatch")
-
-  A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
-
-  u = zeros(prod(shape))
-  cg!(u, A, pde.f)
-  return u
-end
-
-end
+include("fd.jl")
 
 using StaticArrays
 using SparseArrays
@@ -139,17 +14,8 @@ imshow(grid, img) = plot(Gray.((reshape(float.(img), size(grid)) .- minimum(floa
 
 function _fpiter(grid, p, f, A, α, β, τ)
   x = (A'*A + β*I) \ (FDM.divergence(grid, p) .+ A'*f ./ α)
-  #x = (FDM.divergence(grid, p) .+ f ./ α)
   q = FDM.gradient(grid, x)
 
-  #normp = FDM.my_norm(grid, q)
-  #normp = q
-
-  #normp = abs.(p)
-  #normp = reshape(normp, (size(grid)..., ndims(grid)))[:,:,1]
-  #normp = normp[:]
-  #imshow(grid, normp)
-
   p_new = (p .+ τ .* q) ./ (1 .+ τ .* FDM.my_norm(grid, q))
 
   reserr = maximum(sum((p_new .- p).^2, dims=ndims(p)))
@@ -167,7 +33,6 @@ function fpalg(grid, f, A; α, β, τ = 1/8, ϵ = 1e-4)
     (p, reserr) = _fpiter(grid, p, f, A, α, β, τ)
 
     uimg = (A'*A + β*I) \ (α * FDM.divergence(grid, p) .+ A'*f)
-    #uimg = α * FDM.divergence(grid, p) .+ A'*f
 
     imshow(grid, uimg)
     println("[$(lpad(k, 4))] res = $(round(reserr, digits=5))")
@@ -197,7 +62,7 @@ f = A * f
 #imshow(grid, f)
 #sleep(5)
 
-img = fpalg(grid, f, A, α=1, β=0, τ=1/100, ϵ=1e-7)
+img = fpalg(grid, f, A, α=1, β=1, τ=1/100, ϵ=1e-7)
 
 
 #A = Array{Float64}(undef, (length(grid), length(grid)))
diff --git a/chambollefp_dd.jl b/chambollefp_dd.jl
new file mode 100644
index 0000000000000000000000000000000000000000..99be72eb5b0499df2df57092ad51f8826cbd112d
--- /dev/null
+++ b/chambollefp_dd.jl
@@ -0,0 +1,16 @@
+include("fd.jl")
+
+using LinearAlgebra
+import .FDM
+
+grid = FDM.MetaGrid((0:0.1:1, 0:0.1:1), (3, 3), 2)
+
+f = FDM.GridFunction(x -> norm(x), 1, grid)
+g = FDM.GridFunction(x -> norm(x), 2, grid)
+
+#FDM.plot(f)
+
+
+
+
+
diff --git a/fd.jl b/fd.jl
index 25428c92131bbee900fabc9629fb86093c09fb81..776d4fed0a88921c05dc504bd39822f689ad30c7 100644
--- a/fd.jl
+++ b/fd.jl
@@ -1,20 +1,31 @@
-module FiniteDifferences
+module FDM
 
+using StaticArrays
+using SparseArrays
 using OffsetArrays
-include("filtering.jl")
+using LinearMaps
+using IterativeSolvers
 
+include("filtering.jl")
 
-"FD-Kernels"
+"Finite Differences"
 module FD
 
 using OffsetArrays
 
-const forward = OffsetVector([-1, 1], 0:1)
-const backward = OffsetVector([-1, 1], -1:0)
-const central = OffsetVector([-1, 0, 1], -1:1)
+const forward = OffsetVector([-1, 1], -1)
+const backward = OffsetVector([-1, 1], -2)
+const central = OffsetVector([-1, 0, 1], -2)
+function dirfd(fd, ndims, dir)
+  dir <= ndims || throw(ArgumentError("need dir <= ndims!"))
+  dfd = OffsetArray(zeros(ntuple(i -> i == dir ? length(fd) : 1, ndims)), ntuple(i -> i == dir ? fd.offsets[1] : -1, ndims))
+  # Workaround: LinearIndices(::OffsetVector) are not 1-based, so copyto! chockes
+  copyto!(dfd, fd.parent)
+end
 
 end
 
+
 using .Filtering
 using .FD
 
@@ -34,67 +45,177 @@ function _fdkern(fd::OffsetArray{T,1,Array{T,1}}, ndims::Integer, dir::Integer)
 end
 
 
-## Implementation
+# Grid
 
 
-## Grid
+abstract type AbstractGrid{d} <: AbstractArray{Float64, d} end
+
+Base.ndims(grid::AbstractGrid{d}) where d = d
+Base.axes(grid::AbstractGrid) = Base.OneTo.(size(grid))
+Base.length(grid::AbstractGrid) = prod(size(grid))
+Base.IndexStyle(::Type{<:AbstractGrid}) = IndexCartesian()
 
 # aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
 const FDStepRange = typeof(0:0.1:1)
 
-struct Grid{d}
+struct Grid{d} <: AbstractGrid{d}
   domain::NTuple{d, FDStepRange}
 end
-
 Grid(domain::FDStepRange...) = Grid(domain)
-Grid(ndims::Integer, range::FDStepRange) = Grid(ntuple(i -> range, ndims))
-
 Base.show(io::IO, grid::Grid) =
-    print("FDGrid from $(first.(grid.domain)) to $(last.(grid.domain)) ",
+    print("Grid from $(first.(grid.domain)) to $(last.(grid.domain)) ",
           "with spacing $(step.(grid.domain))")
 
-# this is not type-stable?
-#Base.size(grid::Grid) = getproperty.(grid.domain, :len)
 Base.size(grid::Grid) = ntuple(i -> grid.domain[i].len, ndims(grid))
-Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
 
-Base.ndims(grid::Grid{d}) where d = d
-Base.axes(grid::Grid) = Base.OneTo.(size(grid))
-Base.length(grid::Grid) = prod(size(grid))
-
-# TODO: there might be a better way to wrap that product iterator
 Base.iterate(grid::Grid) = iterate(Iterators.product(grid.domain...))
 Base.iterate(grid::Grid, state) = iterate(Iterators.product(grid.domain...), state)
 
+Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
+Base.getindex(grid::Grid, I::CartesianIndex) = SVector(getindex.(grid.domain, Tuple(I)))
+
+
+# MetaGrid
 
-## FDFunction
 
-struct FDFunction{T,d} <: AbstractArray{T,1}
+struct MetaGrid{d} <: AbstractGrid{d}
   grid::Grid{d}
-  data::Array{T,1}
+  indices::Array{NTuple{d, UnitRange{Int}}, d}
+end
+
+function MetaGrid(domain::NTuple{d, FDStepRange}, pnum::NTuple{d, Int}, overlap::Int) where d
+  grid = Grid(domain)
+  tsize = size(grid) .+ (pnum .- 1) .* overlap
+
+  psize = tsize .÷ pnum
+  osize = tsize .- pnum .* psize
+
+  overhang(I, j) = I[j] == pnum[j] ? osize[j] : 0
+
+  indices = Array{NTuple{d, UnitRange{Int}}, d}(undef, pnum)
+  for I in CartesianIndices(pnum)
+    indices[I] = ntuple(j -> ((I[j] - 1) * psize[j] - (I[j] - 1) * overlap + 1) :
+                             (      I[j] * psize[j] - (I[j] - 1) * overlap + overhang(I, j)), d)
+  end
+  MetaGrid{d}(grid, indices)
+end
+
+Base.size(grid::MetaGrid) = size(grid.grid)
+
+Base.iterate(grid::MetaGrid) = iterate(grid.grid)
+Base.iterate(grid::MetaGrid, state) = iterate(grid.grid, state)
+
+Base.getindex(grid::MetaGrid, idx...) = getindex(grid.grid, idx...)
+
+indices(grid::MetaGrid, idx...) = getindex(grid.indices, idx...)
+
+
+## GridFunction
+
+"""
+    GridFunction{G<:AbstractGrid,T,d,n,D}
+
+`n`-dimensional function values of type `T` on a Grid `G` of dimension `d`.
+
+`D == d + 1`
+"""
+struct GridFunction{G<:AbstractGrid,T,d,n,D} <: AbstractArray{T,1}
+  grid::G
+  data::Array{T,D}
+  function GridFunction{G,T,d,n,D}(grid::G, data::AbstractArray{T,D}) where {G,T,d,n,D}
+    D == d + 1 || throw(ArgumentError)
+    d == ndims(grid) || throw(ArgumentError)
+    (size(grid)..., n) == size(data) || throw(ArgumentError)
+    new(grid, data)
+  end
+end
+GridFunction(grid::G, data::AbstractArray{T,D}) where {d,G<:AbstractGrid{d},T,D} =
+    GridFunction{G,T,d,size(data, d+1),d+1}(grid, data)
+function GridFunction(grid::G, data::AbstractArray{T,1}) where {G,T}
+  d = ndims(grid)
+  n = length(data) ÷ d
+  data = reshape(data, size(grid)..., n)
+  GridFunction{G,T,d,n,d+1}(grid, data)
 end
+function GridFunction(f::Function, n::Int, grid::G) where {G<:AbstractGrid}
+  data = Array{Float64,ndims(grid)+1}(undef, size(grid)..., n)
+  for I in CartesianIndices(grid)
+    data[Tuple(I)..., :] .= f(grid[I])
+  end
+  GridFunction(grid, data)
+end
+
+#Base.show(io::IO, grid::GridFunction) =
+#    print("GridFunction on $(length(grid)) grid points")
+
+Base.size(f::GridFunction) = (length(f.data),)
+#Base.axes(f::GridFunction) = (Base.OneTo(length(f.data)),)
+Base.getindex(f::GridFunction, idx::Int) = getindex(f.data, idx)
+Base.setindex!(f::GridFunction, v, idx::Int) = setindex!(f.data, v, idx)
+
+## TODO: using this prints #undef for display(f)
+#Base.getindex(f::GridFunction, idx...) = getindex(f.data, idx...)
+#Base.setindex!(f::GridFunction, v, idx...) = setindex!(f.data, v, idx...)
+
+Base.IndexStyle(::Type{<:GridFunction}) = IndexLinear()
+#Base.similar(f::GridFunction{G,d,n}, ::Type{S}, size::Integer...) where {G,d,n,S} =
+#    GridFunction{G,d,n}(f.grid, Array{S}(undef, size))
+#
+rangedim(f::GridFunction{G,T,d,n}) where {G,T,d,n} = n
+domaindim(f::GridFunction{G,T,d}) where {G,T,d} = d
+
+#component(f::GridFunction, k::Integer) = view(reshape(f.data,
+
+
 
-Base.show(io::IO, grid::FDFunction) =
-    print("FDFunction on $(length(grid)) grid points")
 
-Base.size(f::FDFunction) = size(f.data)
-Base.getindex(f::FDFunction, idx...) = getindex(f.data, idx...)
-Base.setindex!(f::FDFunction, v, idx...) = setindex!(f.data, v, idx...)
 
-Base.IndexStyle(::Type{<:FDFunction}) = IndexLinear()
-Base.similar(f::FDFunction, ::Type{S}, size::Int...) where S =
-    FDFunction(f.grid, Array{S}(undef, size))
+
+import Colors: Gray
+import Plots: plot
+
+plot(img::GridFunction{G,T,d,1}) where {G,T,d} =
+    plot(reshape(Gray.((img .- minimum(img)) ./ (maximum(img) - minimum(img))), size(img.grid)), show=true)
 
 ## Operations
 
-function fdfilter(f::FDFunction, kern)
-  A = imfilter(reshape(f.data, size(f.grid)), kern)
-  FDFunction(f.grid, A[:])
+function fdfilter(f::GridFunction{G,T,d,n}, kern) where {G,T,d,n}
+  A = zeros(axes(f.data))
+  for k = 1:n
+    vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
+    imfilter!(view(A, vidx...), f.data[vidx...], kern)
+  end
+  GridFunction(f.grid, A)
 end
 
-derivate(f::FDFunction, dir::Integer; fd = FD.forward) = fdfilter(f, _fdkern(fd, ndims(f.grid), dir))
+derivate(f::GridFunction, dir::Integer; fd = FD.forward) = fdfilter(f, _fdkern(fd, ndims(f.grid), dir))
+
+
+# currently gradient and divergence are specific implementations (as needed for
+# chambolles algorithm), other choices may be reasonable
+
+function gradient(f::GridFunction{G,T,d,1}; fd=FD.forward) where {G,T,d}
+  A = Array{T,d+1}(undef, (size(f.grid)..., d))
+  for k = 1:d
+    vidx = ntuple(i -> i <= d ? Colon() : k, d + 1)
+    vidx2 = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
+    # FIXME: hackish, only for forward fd
+    selectdim(selectdim(A, d+1, k), k, size(f.grid, k)) .= 0
+  end
+  GridFunction(f.grid, A)
+end
+
+function divergence(f::GridFunction{G,T,d}; fd=FD.backward) where {d,G<:AbstractGrid{d},T}
+  A = zeros(size(f.grid)..., 1)
+  for k = 1:d
+    vidx = ntuple(i -> i <= d ? Colon() : 1, d + 1)
+    vidx2 = ntuple(i -> i <= d ? Colon() : k, d + 1)
+    imfilter!(view(A, vidx...), f.data[vidx2...], bidx -> trim_kernel(_fdkern(fd, ndims(f.grid), k), bidx))
+  end
+  GridFunction(f.grid, A)
+end
 
-#gradient(f::FDFunction) = FDFunction(f.grid,
 
 
 """
@@ -121,10 +242,10 @@ function divergence(grid::Grid{d}, field) where d
   dst = zeros(size(field))
   for k in 1:d
     kern = FD.dirfd(FD.backward, d, k)
-    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), kern)
+    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), bidx -> trim_kernel(kern, bidx))
   end
   out = dropdims(sum(dst, dims=d+1), dims=d+1)
-  reshape(out, length(grid))
+  out[:]
 end
 
 function gradient(grid::Grid, img::AbstractArray)
@@ -133,9 +254,168 @@ function gradient(grid::Grid, img::AbstractArray)
   dst = zeros(size(img)..., d)
   for k in 1:d
     kern = FD.dirfd(FD.forward, d, k)
-    imfilter!(selectdim(dst, d+1, k), img, kern)
+    imfilter!(selectdim(dst, d+1, k), img, bidx -> trim_kernel(kern, bidx))
+    # FIXME: hack
+    selectdim(selectdim(dst, d+1, k), k, size(grid, k)) .= 0
   end
   reshape(dst, length(grid) * ndims(grid))
 end
 
+function my_norm(grid::Grid, img::AbstractArray)
+  img = reshape(img, (size(grid)..., ndims(grid)))
+  d = ndims(grid)
+
+  out = sqrt.(sum(img.^2, dims=d+1))
+  out = repeat(out, inner=ntuple(i -> i == d+1 ? size(img, d+1) : 1, d+1))
+
+  out[:]
+end
+
+
+function apply(kern, flatimg, size)
+  img = reshape(flatimg, size)
+  out = imfilter(kern, img)
+  reshape(out, prod(size))
+end
+
+function solve(pde, shape::NTuple{d, Integer}) where d
+  ndims(pde.stencil) == d || throw("dimension mismatch")
+  length(pde.f) == prod(shape) || throw("bounds mismatch")
+
+  A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
+
+  u = zeros(prod(shape))
+  cg!(u, A, pde.f)
+  return u
+end
+
 end
+
+
+#module FD
+#
+#using OffsetArrays
+#include("filtering.jl")
+#
+#
+#"FD-Kernels"
+#module Kernels
+#
+#using OffsetArrays
+#
+#const forward = OffsetVector([-1, 1], 0:1)
+#const backward = OffsetVector([-1, 1], -1:0)
+#const central = OffsetVector([-1, 0, 1], -1:1)
+#
+#end
+#
+#using .Filtering
+#using .Kernels
+#
+#
+#
+### Implementation
+#
+#
+### Grid
+#
+## aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
+#const FDStepRange = typeof(0:0.1:1)
+#
+#struct Grid{d}
+#  domain::NTuple{d, FDStepRange}
+#end
+#
+#Grid(domain::FDStepRange...) = Grid(domain)
+#Grid(ndims::Integer, range::FDStepRange) = Grid(ntuple(i -> range, ndims))
+#
+#Base.show(io::IO, grid::Grid) =
+#    print("FDGrid from $(first.(grid.domain)) to $(last.(grid.domain)) ",
+#          "with spacing $(step.(grid.domain))")
+#
+## this is not type-stable?
+##Base.size(grid::Grid) = getproperty.(grid.domain, :len)
+#Base.size(grid::Grid) = ntuple(i -> grid.domain[i].len, ndims(grid))
+#Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
+#
+#Base.ndims(grid::Grid{d}) where d = d
+#Base.axes(grid::Grid) = Base.OneTo.(size(grid))
+#Base.length(grid::Grid) = prod(size(grid))
+#
+## TODO: there might be a better way to wrap that product iterator
+#Base.iterate(grid::Grid) = iterate(Iterators.product(grid.domain...))
+#Base.iterate(grid::Grid, state) = iterate(Iterators.product(grid.domain...), state)
+#
+#
+### GridFunction
+#
+#struct GridFunction{T,d} <: AbstractArray{T,1}
+#  grid::Grid{d}
+#  data::Array{T,1}
+#end
+#
+#Base.show(io::IO, grid::GridFunction) =
+#    print("GridFunction on $(length(grid)) grid points")
+#
+#Base.size(f::GridFunction) = size(f.data)
+#Base.getindex(f::GridFunction, idx...) = getindex(f.data, idx...)
+#Base.setindex!(f::GridFunction, v, idx...) = setindex!(f.data, v, idx...)
+#
+#Base.IndexStyle(::Type{<:GridFunction}) = IndexLinear()
+#Base.similar(f::GridFunction, ::Type{S}, size::Int...) where S =
+#    GridFunction(f.grid, Array{S}(undef, size))
+#
+### Operations
+#
+#function fdfilter(f::GridFunction, kern)
+#  A = imfilter(reshape(f.data, size(f.grid)), kern)
+#  GridFunction(f.grid, A[:])
+#end
+#
+#derivate(f::GridFunction, dir::Integer; fd = FD.forward) = fdfilter(f, _fdkern(fd, ndims(f.grid), dir))
+#
+##gradient(f::GridFunction) = GridFunction(f.grid,
+#
+#
+#"""
+#    neuman_kernel(kernel, bidx)
+#
+#Modify `kernel` to be usable on a boundary `bixd` in a neumannn boundary setting.
+#
+#Ghost nodes are eliminated through central finite differences. Currently only
+#the basic 5-star Laplace is supported.
+#"""
+#function neumann_kernel(kernel, bidx)
+#  out = copy(kernel)
+#  for (i, bi) in enumerate(bidx)
+#    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
+#    out[colrange...] .+= bi * FD.forward
+#  end
+#  out /= 2 ^ count(!iszero, bidx)
+#  trim_kernel(out, bidx)
+#end
+#
+#function divergence(grid::Grid{d}, field) where d
+#  field = reshape(field, (size(grid)..., ndims(grid)))
+#  size(field, d+1) == d || throw(ArgumentError("need a d-vectorfield on a d-grid"))
+#  dst = zeros(size(field))
+#  for k in 1:d
+#    kern = FD.dirfd(FD.backward, d, k)
+#    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), kern)
+#  end
+#  out = dropdims(sum(dst, dims=d+1), dims=d+1)
+#  reshape(out, length(grid))
+#end
+#
+#function gradient(grid::Grid, img::AbstractArray)
+#  img = reshape(img, size(grid))
+#  d = ndims(img)
+#  dst = zeros(size(img)..., d)
+#  for k in 1:d
+#    kern = FD.dirfd(FD.forward, d, k)
+#    imfilter!(selectdim(dst, d+1, k), img, kern)
+#  end
+#  reshape(dst, length(grid) * ndims(grid))
+#end
+#
+#end
diff --git a/filtering.jl b/filtering.jl
index bf7710bc2ff757ee5a50ecbdc74e89afd0b902fc..215e591c740658338dd1e52f06892ca1bd98b757 100644
--- a/filtering.jl
+++ b/filtering.jl
@@ -88,7 +88,7 @@ end
 Filter `img` using a kernel function `kernelf(bidx) = kernel` specifying a
 kernel for each type of node.
 """
-imfilter(img, kernelf::Function) = imfilter(zeros(axes(img)), img, kernelf)
+imfilter(img, kernelf::Function) = imfilter!(similar(img), img, kernelf)
 
 
 """
@@ -97,8 +97,8 @@ imfilter(img, kernelf::Function) = imfilter(zeros(axes(img)), img, kernelf)
 Filter `img` by `kern` using correlation.
 Boundary is zeroed out.
 """
-imfilter(img, kern) = imfilter(bidx -> trim_kernel(kern, bidx), img)
+imfilter(img, kern) = imfilter(img, bidx -> trim_kernel(kern, bidx))
 # TODO: use keyword arguments
-imfilter_inner(img, kern) = imfilter(bidx -> all(bidx .== 0) ? kern : zeros(ntuple(i -> 0, ndims(img))), img)
+imfilter_inner(img, kern) = imfilter(img, bidx -> all(bidx .== 0) ? kern : zeros(ntuple(i -> 0, ndims(img))))
 
 end