diff --git a/chambollefp.jl b/chambollefp.jl
new file mode 100644
index 0000000000000000000000000000000000000000..d50c1a6f9cb170e55b85ae2f9c5c754ed5661e50
--- /dev/null
+++ b/chambollefp.jl
@@ -0,0 +1,234 @@
+module FDM
+
+using StaticArrays
+using SparseArrays
+using OffsetArrays
+using LinearMaps
+using IterativeSolvers
+
+include("filtering.jl")
+
+"Finite Differences"
+module FD
+
+using OffsetArrays
+
+const forward = OffsetVector([-1, 1], -1)
+const backward = OffsetVector([-1, 1], -2)
+const central = OffsetVector([-1, 0, 1], -2)
+function dirfd(fd, ndims, dir)
+  dir <= ndims || throw(ArgumentError("need dir <= ndims!"))
+  dfd = OffsetArray(zeros(ntuple(i -> i == dir ? length(fd) : 1, ndims)), ntuple(i -> i == dir ? fd.offsets[1] : -1, ndims))
+  # Workaround: LinearIndices(::OffsetVector) are not 1-based, so copyto! chockes
+  copyto!(dfd, fd.parent)
+end
+
+end
+
+using .Filtering
+using .FD
+
+
+
+# TODO: Refactor to GridArray
+
+# aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
+const FDStepRange = typeof(0:0.1:1)
+
+struct Grid{d} <: AbstractArray{Float64, d}
+  domain::NTuple{d, FDStepRange}
+end
+Grid(domain::FDStepRange...) = Grid(domain)
+
+Base.ndims(grid::Grid{d}) where d = d
+Base.size(grid::Grid) = ntuple(i -> grid.domain[i].len, ndims(grid))
+Base.axes(grid::Grid) = Base.OneTo.(size(grid))
+Base.length(grid::Grid) = prod(size(grid))
+
+Base.iterate(grid::Grid) = iterate(Iterators.product(grid.domain...))
+Base.iterate(grid::Grid, state) = iterate(Iterators.product(grid.domain...), state)
+
+Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
+Base.IndexStyle(::Type{<:Grid}) = IndexCartesian()
+
+
+"""
+    neuman_kernel(kernel, bidx)
+
+Modify `kernel` to be usable on a boundary `bixd` in a neumannn boundary setting.
+
+Ghost nodes are eliminated through central finite differences. Currently only
+the basic 5-star Laplace is supported.
+"""
+function neumann_kernel(kernel, bidx)
+  out = copy(kernel)
+  for (i, bi) in enumerate(bidx)
+    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
+    out[colrange...] .+= bi * FD.forward
+  end
+  out /= 2 ^ count(!iszero, bidx)
+  trim_kernel(out, bidx)
+end
+
+function divergence(grid::Grid{d}, field) where d
+  field = reshape(field, (size(grid)..., ndims(grid)))
+  size(field, d+1) == d || throw(ArgumentError("need a d-vectorfield on a d-grid"))
+  dst = zeros(size(field))
+  for k in 1:d
+    kern = FD.dirfd(FD.backward, d, k)
+    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), bidx -> trim_kernel(kern, bidx))
+  end
+  out = dropdims(sum(dst, dims=d+1), dims=d+1)
+  out[:]
+end
+
+function gradient(grid::Grid, img::AbstractArray)
+  img = reshape(img, size(grid))
+  d = ndims(img)
+  dst = zeros(size(img)..., d)
+  for k in 1:d
+    kern = FD.dirfd(FD.forward, d, k)
+    imfilter!(selectdim(dst, d+1, k), img, bidx -> trim_kernel(kern, bidx))
+    # FIXME: hack
+    selectdim(selectdim(dst, d+1, k), k, size(grid, k)) .= 0
+  end
+  reshape(dst, length(grid) * ndims(grid))
+end
+
+function my_norm(grid::Grid, img::AbstractArray)
+  img = reshape(img, (size(grid)..., ndims(grid)))
+  d = ndims(grid)
+
+  out = sqrt.(sum(img.^2, dims=d+1))
+  out = repeat(out, inner=ntuple(i -> i == d+1 ? size(img, d+1) : 1, d+1))
+
+  out[:]
+end
+
+
+function apply(kern, flatimg, size)
+  img = reshape(flatimg, size)
+  out = imfilter(kern, img)
+  reshape(out, prod(size))
+end
+
+function solve(pde, shape::NTuple{d, Integer}) where d
+  ndims(pde.stencil) == d || throw("dimension mismatch")
+  length(pde.f) == prod(shape) || throw("bounds mismatch")
+
+  A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
+
+  u = zeros(prod(shape))
+  cg!(u, A, pde.f)
+  return u
+end
+
+end
+
+using StaticArrays
+using SparseArrays
+using OffsetArrays
+using LinearAlgebra
+using Colors
+using Plots
+
+
+imf(f, grid) = [f(x) for x in grid]
+
+imshow(grid, img) = plot(Gray.((reshape(float.(img), size(grid)) .- minimum(float.(img))) / (maximum(float.(img)) - minimum(float.(img))) ), show=true)
+
+function _fpiter(grid, p, f, A, α, β, τ)
+  x = (A'*A + β*I) \ (FDM.divergence(grid, p) .+ A'*f ./ α)
+  #x = (FDM.divergence(grid, p) .+ f ./ α)
+  q = FDM.gradient(grid, x)
+
+  #normp = FDM.my_norm(grid, q)
+  #normp = q
+
+  #normp = abs.(p)
+  #normp = reshape(normp, (size(grid)..., ndims(grid)))[:,:,1]
+  #normp = normp[:]
+  #imshow(grid, normp)
+
+  p_new = (p .+ τ .* q) ./ (1 .+ τ .* FDM.my_norm(grid, q))
+
+  reserr = maximum(sum((p_new .- p).^2, dims=ndims(p)))
+  (p_new, reserr)
+end
+
+function fpalg(grid, f, A; α, β, τ = 1/8, ϵ = 1e-4)
+  p = zeros(length(grid) * ndims(grid))
+
+  reserr = 1
+  k = 0
+  while reserr > ϵ && k < 3e2
+    k += 1
+    #sleep(0.1 / k)
+    (p, reserr) = _fpiter(grid, p, f, A, α, β, τ)
+
+    uimg = (A'*A + β*I) \ (α * FDM.divergence(grid, p) .+ A'*f)
+    #uimg = α * FDM.divergence(grid, p) .+ A'*f
+
+    imshow(grid, uimg)
+    println("[$(lpad(k, 4))] res = $(round(reserr, digits=5))")
+    println("min/max: $(minimum(uimg)) / $(maximum(uimg))")
+    println("min/max: $(minimum(p)) / $(maximum(p))")
+  end
+end
+
+#st = OffsetArray([0 -1 0; -1 4 -1; 0 -1 0], (-2, -2))
+
+grid = FDM.Grid(0:0.01:1, 0:0.01:1)
+#grid = FDM.Grid(0:0.5:1, 0:0.5:1)
+
+n = 1
+#A = spdiagm(collect(k => fill(1/(2n+1), length(grid)-abs(k)) for k in -n:n)...)
+
+# circle, scalar
+f = reshape([float(norm(x .- [0.5, 0.5]) < 0.4) for x in grid], length(grid))
+#f += clamp.(0.2 * rand(eltype(f), size(f)), 0, 1)
+
+
+#A = I
+A = spdiagm(0 => ones(length(grid)), 1 => ones(length(grid)-1) )
+
+f = A * f
+
+#imshow(grid, f)
+#sleep(5)
+
+img = fpalg(grid, f, A, α=1, β=0, τ=1/100, ϵ=1e-7)
+
+
+#A = Array{Float64}(undef, (length(grid), length(grid)))
+#for i in 1:length(grid)
+#  ei = float.(1:length(grid) .== i)
+#  #A[:, i] = FDM.divergence(grid, FDM.gradient(grid, ei))
+#  #A[:, i] = FDM.divergence(grid, append!(ei, zeros(length(grid))))
+#  A[:, i] = reshape(reshape(FDM.gradient(grid, ei), size(grid)..., 2)[:,:,2], length(grid))
+#end
+#A
+
+#display(A)
+
+
+#display(f2)
+#imshow(grid, f2)
+
+
+
+#@benchmark solve((stencil=st, f=zeros(10000)), (100, 100))
+
+#u = zeros(100, 100)
+
+#A = LinearMap{Float64}(u -> apply(st, u, size(uinner)), length(uinner), issymmetric=true, isposdef=true)
+
+#cg!(uflat, A, zeros(length(uinner)))
+
+
+#A = sparse(A)
+
+#@benchmark cg($A, zeros(10000))
+
+# TODO: A* as sparse matrix?
+
diff --git a/fd.jl b/fd.jl
new file mode 100644
index 0000000000000000000000000000000000000000..25428c92131bbee900fabc9629fb86093c09fb81
--- /dev/null
+++ b/fd.jl
@@ -0,0 +1,141 @@
+module FiniteDifferences
+
+using OffsetArrays
+include("filtering.jl")
+
+
+"FD-Kernels"
+module FD
+
+using OffsetArrays
+
+const forward = OffsetVector([-1, 1], 0:1)
+const backward = OffsetVector([-1, 1], -1:0)
+const central = OffsetVector([-1, 0, 1], -1:1)
+
+end
+
+using .Filtering
+using .FD
+
+"""
+    _fdkern(fd, ndims, dir)
+
+Create from `fd` a finite difference kernel usable in `ndims` dimension
+oriented in dimension `dir`.
+"""
+# TODO: make it typestable on ndims
+function _fdkern(fd::OffsetArray{T,1,Array{T,1}}, ndims::Integer, dir::Integer) where T
+  1 <= dir <= ndims || throw(ArgumentError("dimesion $(dir) out of range 1:$(ndims)"))
+  dfd = OffsetArray{T}(undef, Tuple(i == dir ? axes(fd, 1) : (0:0) for i in 1:ndims))
+  # copyto! from OffsetArray fails, broadcasting for differing axes fails too
+  # workaround: copyto! from parent
+  copyto!(dfd, fd.parent)
+end
+
+
+## Implementation
+
+
+## Grid
+
+# aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
+const FDStepRange = typeof(0:0.1:1)
+
+struct Grid{d}
+  domain::NTuple{d, FDStepRange}
+end
+
+Grid(domain::FDStepRange...) = Grid(domain)
+Grid(ndims::Integer, range::FDStepRange) = Grid(ntuple(i -> range, ndims))
+
+Base.show(io::IO, grid::Grid) =
+    print("FDGrid from $(first.(grid.domain)) to $(last.(grid.domain)) ",
+          "with spacing $(step.(grid.domain))")
+
+# this is not type-stable?
+#Base.size(grid::Grid) = getproperty.(grid.domain, :len)
+Base.size(grid::Grid) = ntuple(i -> grid.domain[i].len, ndims(grid))
+Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
+
+Base.ndims(grid::Grid{d}) where d = d
+Base.axes(grid::Grid) = Base.OneTo.(size(grid))
+Base.length(grid::Grid) = prod(size(grid))
+
+# TODO: there might be a better way to wrap that product iterator
+Base.iterate(grid::Grid) = iterate(Iterators.product(grid.domain...))
+Base.iterate(grid::Grid, state) = iterate(Iterators.product(grid.domain...), state)
+
+
+## FDFunction
+
+struct FDFunction{T,d} <: AbstractArray{T,1}
+  grid::Grid{d}
+  data::Array{T,1}
+end
+
+Base.show(io::IO, grid::FDFunction) =
+    print("FDFunction on $(length(grid)) grid points")
+
+Base.size(f::FDFunction) = size(f.data)
+Base.getindex(f::FDFunction, idx...) = getindex(f.data, idx...)
+Base.setindex!(f::FDFunction, v, idx...) = setindex!(f.data, v, idx...)
+
+Base.IndexStyle(::Type{<:FDFunction}) = IndexLinear()
+Base.similar(f::FDFunction, ::Type{S}, size::Int...) where S =
+    FDFunction(f.grid, Array{S}(undef, size))
+
+## Operations
+
+function fdfilter(f::FDFunction, kern)
+  A = imfilter(reshape(f.data, size(f.grid)), kern)
+  FDFunction(f.grid, A[:])
+end
+
+derivate(f::FDFunction, dir::Integer; fd = FD.forward) = fdfilter(f, _fdkern(fd, ndims(f.grid), dir))
+
+#gradient(f::FDFunction) = FDFunction(f.grid,
+
+
+"""
+    neuman_kernel(kernel, bidx)
+
+Modify `kernel` to be usable on a boundary `bixd` in a neumannn boundary setting.
+
+Ghost nodes are eliminated through central finite differences. Currently only
+the basic 5-star Laplace is supported.
+"""
+function neumann_kernel(kernel, bidx)
+  out = copy(kernel)
+  for (i, bi) in enumerate(bidx)
+    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
+    out[colrange...] .+= bi * FD.forward
+  end
+  out /= 2 ^ count(!iszero, bidx)
+  trim_kernel(out, bidx)
+end
+
+function divergence(grid::Grid{d}, field) where d
+  field = reshape(field, (size(grid)..., ndims(grid)))
+  size(field, d+1) == d || throw(ArgumentError("need a d-vectorfield on a d-grid"))
+  dst = zeros(size(field))
+  for k in 1:d
+    kern = FD.dirfd(FD.backward, d, k)
+    imfilter!(selectdim(dst, d+1, k), selectdim(field, d+1, k), kern)
+  end
+  out = dropdims(sum(dst, dims=d+1), dims=d+1)
+  reshape(out, length(grid))
+end
+
+function gradient(grid::Grid, img::AbstractArray)
+  img = reshape(img, size(grid))
+  d = ndims(img)
+  dst = zeros(size(img)..., d)
+  for k in 1:d
+    kern = FD.dirfd(FD.forward, d, k)
+    imfilter!(selectdim(dst, d+1, k), img, kern)
+  end
+  reshape(dst, length(grid) * ndims(grid))
+end
+
+end
diff --git a/filtering.jl b/filtering.jl
index e34a3b0618713f13ae4ac2b8b1a2fbc4bbed20ff..bf7710bc2ff757ee5a50ecbdc74e89afd0b902fc 100644
--- a/filtering.jl
+++ b/filtering.jl
@@ -74,20 +74,21 @@ function imfilter!(dst, img, kern, slice=_bidx_to_range.(axes(img), ntuple(i->0,
   dst
 end
 
+function imfilter!(dst, img, kernelf::Function)
+  for bidx in Iterators.product(ntuple(i -> -1:1, ndims(img))...)
+    range = _bidx_to_range.(axes(img), bidx)
+    imfilter!(dst, img, kernelf(bidx), range)
+  end
+  dst
+end
+
 """
-    filter(kernelf, img)
+    imfilter(img, kernelf)
 
 Filter `img` using a kernel function `kernelf(bidx) = kernel` specifying a
 kernel for each type of node.
 """
-function imfilter(kernelf::Function, img)
-  out = zeros(axes(img))
-  for bidx in Iterators.product(ntuple(i -> -1:1, ndims(img))...)
-    range = _bidx_to_range.(axes(img), bidx)
-    imfilter!(out, img, kernelf(bidx), range)
-  end
-  out
-end
+imfilter(img, kernelf::Function) = imfilter(zeros(axes(img)), img, kernelf)
 
 
 """
diff --git a/test.jl b/test.jl
deleted file mode 100644
index 49ccdc73db278b0c2ddafecd5573f8eed06b6df7..0000000000000000000000000000000000000000
--- a/test.jl
+++ /dev/null
@@ -1,140 +0,0 @@
-module FDM
-
-using StaticArrays
-using SparseArrays
-using OffsetArrays
-using LinearMaps
-using IterativeSolvers
-
-include("filtering.jl")
-
-"Finite Differences"
-module FD
-
-using OffsetArrays
-
-const forward = OffsetVector([-1, 0, 1], -2)
-const central = OffsetVector([-1, 0, 1], -2)
-function dirfd(fd, ndims, dir)
-  dir <= ndims || throw(ArgumentError("need dir <= ndims!"))
-  dfd = OffsetArray(zeros(ntuple(i -> i == dir ? length(fd) : 1, ndims)), ntuple(i -> i == dir ? fd.offsets[1] : -1, ndims))
-  # Workaround: LinearIndices(::OffsetVector) are not 1-based, so copyto! chockes
-  copyto!(dfd, fd.parent)
-end
-
-end
-
-using .Filtering
-using .FD
-
-
-
-
-# aka `StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}`
-const FDStepRange = typeof(0:0.1:1)
-
-struct Grid{d}
-  domain::NTuple{d, FDStepRange}
-end
-
-Base.getindex(grid::Grid, idx...) = SVector(getindex.(grid.domain, idx))
-
-
-"""
-    neuman_kernel(kernel, bidx)
-
-Modify `kernel` to be usable on a boundary `bixd` in a neumannn boundary setting.
-
-Ghost nodes are eliminated through central finite differences. Currently only
-the basic 5-star Laplace is supported.
-"""
-function neumann_kernel(kernel, bidx)
-  out = copy(kernel)
-  for (i, bi) in enumerate(bidx)
-    colrange = (i == j ? Colon() : 0 for j in 1:ndims(kernel))
-    out[colrange...] .+= bi * FD.forward
-  end
-  out /= 2 ^ count(!iszero, bidx)
-  trim_kernel(out, bidx)
-end
-
-function divergence(field::AbstractArray{T, d}) where {T, d}
-  size(field, d) == d - 1 || throw(ArgumentError("need a d-vectorfield on a d-grid"))
-  dst = zeros(size(field))
-  for k in 1:d-1
-    kern = FD.dirfd(FD.central, d-1, k)
-    imfilter!(selectdim(dst, d, k), selectdim(field, d, k), kern)
-  end
-  dropdims(sum(dst, dims=d), dims=d)
-end
-
-function gradient(img::AbstractArray)
-  d = ndims(img)
-  dst = zeros(size(img)..., d)
-  for k in 1:d
-    kern = FD.dirfd(FD.central, d, k)
-    imfilter!(selectdim(dst, d+1, k), img, kern)
-  end
-  dst
-end
-
-
-function apply(kern, flatimg, size)
-  img = reshape(flatimg, size)
-  out = imfilter(kern, img)
-  reshape(out, prod(size))
-end
-
-function solve(pde, shape::NTuple{d, Integer}) where d
-  ndims(pde.stencil) == d || throw("dimension mismatch")
-  length(pde.f) == prod(shape) || throw("bounds mismatch")
-
-  A = LinearMap{Float64}(u -> apply(pde.stencil, u, shape), prod(shape), issymmetric=true, isposdef=true)
-
-  u = zeros(prod(shape))
-  cg!(u, A, pde.f)
-  return u
-end
-
-end
-
-using StaticArrays
-using OffsetArrays
-using LinearAlgebra
-
-st = MMatrix{3, 3}([0 -1 0; -1 4 -1; 0 -1 0])
-st = OffsetArray(st, (-2, -2))
-
-img = rand(1000, 1000)
-
-n = 100
-d = 2
-
-β = 1e-3
-A = I
-τ = 0.5
-α = 1
-
-f = rand(n, n)
-p = rand(n, n, d)
-
-x = (A'*A + β*I) \ - FDM.divergence(p) - A'*f
-w = FDM.gradient(x)
-
-p = (p - τ*w) ./ (1 .+ τ/α * abs.(w))
-
-
-#@benchmark solve((stencil=st, f=zeros(10000)), (100, 100))
-
-#u = zeros(100, 100)
-
-#A = LinearMap{Float64}(u -> apply(st, u, size(uinner)), length(uinner), issymmetric=true, isposdef=true)
-
-#cg!(uflat, A, zeros(length(uinner)))
-
-
-#A = sparse(A)
-
-#@benchmark cg($A, zeros(10000))
-
-