diff --git a/src/DualTVDD.jl b/src/DualTVDD.jl
index aaa447ecdc662be95f79f64f485c63787c92b320..97475b975aa7b2bbdb6abd3290f3178d39a13476 100644
--- a/src/DualTVDD.jl
+++ b/src/DualTVDD.jl
@@ -2,7 +2,8 @@ module DualTVDD
 
 
 
-include("types.jl")
+include("common.jl")
+include("models.jl")
 include("chambolle.jl")
 include("dualtvdd.jl")
 include("projgrad.jl")
@@ -24,7 +25,7 @@ function run()
 
     display(norm(B))
 
-    md = DualTVDD.OpROFModel(g, B, α)
+    md = DualTVDD.DualTVDDModel(g, B, α)
     ctx = DualTVDD.init(md, DualTVDD.ChambolleAlgorithm())
     ctx2 = DualTVDD.init(md, DualTVDD.ProjGradAlgorithm(λ=1/sqrt(8)/norm(B)))
 
@@ -81,10 +82,10 @@ function rundd()
     α = 1/4
 
     md = DualTVDD.DualTVDDModel(f, A, α, 0., 0.)
-    alg = DualTVDD.DualTVDDAlgorithm(M=(2,2), overlap=(2,2), σ=1)
+    alg = DualTVDD.DualTVDDAlgorithm(M=(2,2), overlap=(2,2), σ=0.25)
     ctx = DualTVDD.init(md, alg)
 
-    md2 = DualTVDD.OpROFModel(g, B, α)
+    md2 = DualTVDD.DualTVDDModel(g, B, α)
     alg2 = DualTVDD.ChambolleAlgorithm()
     ctx2 = DualTVDD.init(md2, alg2)
 
@@ -141,7 +142,7 @@ function run3()
     α = 0.1
 
     # Chambolle
-    md = DualTVDD.OpROFModel(g, B, α)
+    md = DualTVDD.DualTVDDModel(g, B, α)
     alg = DualTVDD.ChambolleAlgorithm()
     ctx = DualTVDD.init(md, alg)
 
diff --git a/src/chambolle.jl b/src/chambolle.jl
index 62d838fa9bbef57a7c39c3c6ea3084d3c8acfc93..bd165c5a488c0b2efc585fdd8b591ce4d46a3628 100644
--- a/src/chambolle.jl
+++ b/src/chambolle.jl
@@ -4,8 +4,8 @@
 # https://doi.org/10.1023/B:JMIV.0000011325.36760.1e
 #
 # Implementation Notes:
-# - λ is not a scalar but a scalar field
-# - pointwise feasibility constraint |p|<=λ instead of |p|<=1
+# - λ is called α and is not a scalar but a scalar field
+# - pointwise feasibility constraint |p|<=α instead of |p|<=1
 # - B is introduced, the original Chambolle algorithm has B=Id
 
 using Base: @_inline_meta
@@ -21,35 +21,32 @@ struct ChambolleAlgorithm <: Algorithm
     end
 end
 
-struct ChambolleContext{M,A,G,Λ,T,R,S,Sv,K1,K2} <: Context
+struct ChambolleContext{M,A,T,R,S,Sv,K1,K2} <: Context
     "model data"
     model::M
     "algorithm data"
     algorithm::A
     "matrix view on model.f"
-    g::G
-    "matrix view on model.λ"
-    λ::Λ
-    "matrix view on pv"
     p::T
+
     "matrix view on rv"
     r::R
     "matrix view on sv"
     s::S
+
     "dual variable as vector"
     rv::Sv
     "scalar temporary variable"
     sv::Sv
+
     "div(p) + g kernel"
     k1::K1
     "(p + (τ/normB)*grad(q))/(1 + (τ/normB)/λ|grad(q)|) kernel"
     k2::K2
 end
 
-function init(md::OpROFModel, alg::ChambolleAlgorithm)
+function init(md::DualTVDDModel, alg::ChambolleAlgorithm)
     d = ndims(md.g)
-    g = extend(md.g, StaticKernels.ExtensionNothing())
-    λ = extend(md.λ, StaticKernels.ExtensionNothing())
     pv = zeros(d * length(md.g))
     rv = zeros(length(md.g))
     sv = zero(rv)
@@ -57,69 +54,38 @@ function init(md::OpROFModel, alg::ChambolleAlgorithm)
     r = reshape(reinterpret(Float64, rv), size(md.g))
     s = extend(reshape(reinterpret(Float64, sv), size(md.g)), StaticKernels.ExtensionReplicate())
 
-    z = zero(CartesianIndex{d})
-    @inline kf1(pw, gw) = @inbounds divergence(pw) + gw[z]
+    @inline kf1(pw) = @inbounds divergence(pw) + md.g[pw.position]
     k1 = Kernel{ntuple(_->-1:1, d)}(kf1)
 
-    normB = norm(md.B)
+    my_norm(B::UniformScaling{Bool}) = B.λ * sqrt(length(md.g))
+    my_norm(B) = norm(B)
 
-    @inline function kf2(pw, sw)
-        @inbounds iszero(λ[pw.position]) && return zero(pw[z])
+    normB = my_norm(md.B)
+
+    @inline function kf2(sw)
+        @inbounds iszero(md.α[sw.position]) && return zero(p[sw.position])
         sgrad = (alg.τ / normB) * gradient(sw)
-        return @inbounds (pw[z] + sgrad) / (1 + norm(sgrad) / λ[pw.position])
+        return @inbounds (p[sw.position] + sgrad) / (1 + norm(sgrad) / md.α[sw.position])
     end
     k2 = Kernel{ntuple(_->0:1, d)}(kf2)
 
-    return ChambolleContext(md, alg, g, λ, p, r, s, rv, sv, k1, k2)
+    return ChambolleContext(md, alg, p, r, s, rv, sv, k1, k2)
 end
 
 function reset!(ctx::ChambolleContext)
     fill!(ctx.p, zero(eltype(ctx.p)))
 end
 
-@generated function gradient(w::StaticKernels.Window{<:Any,N}) where N
-    i0 = ntuple(_->0, N)
-    i1(k) = ntuple(i->Int(k==i), N)
-
-    wi = (:(w[$(i1(k)...)] - w[$(i0...)]) for k in 1:N)
-    return quote
-        Base.@_inline_meta
-        return @inbounds SVector($(wi...))
-    end
-end
-
-@generated function divergence(w::StaticKernels.Window{SVector{N,T},N}) where {N,T}
-    i0 = ntuple(_->0, N)
-    i1(k) = ntuple(i->Int(k==i), N)
-
-    wi = (:((isnothing(w[$(i1(k)...)]) ? zero(T) : w[$(i0...)][$k]) -
-            (isnothing(w[$((.-i1(k))...)]) ? zero(T) : w[$((.-i1(k))...)][$k])) for k in 1:N)
-    return quote
-        Base.@_inline_meta
-        return @inbounds +($(wi...))
-    end
-end
-
-#  x = (A'*A + β*I) \ (FD.divergence(grid, p) .+ A'*f)
-#  q = FD.gradient(grid, x)
-
 function step!(ctx::ChambolleContext)
     # r = div(p) + g
-    map!(ctx.k1, ctx.r, ctx.p, ctx.g)
+    map!(ctx.k1, ctx.r, ctx.p)
     # s = B * r
     mul!(ctx.sv, ctx.model.B, ctx.rv)
     # p = (p + τ*grad(s)) / (1 + τ/λ|grad(s)|)
-    map!(ctx.k2, ctx.p, ctx.p, ctx.s)
+    map!(ctx.k2, ctx.p, ctx.s)
 end
 
-function recover_u!(ctx)
-    # r = div(p) + g
-    map!(ctx.k1, ctx.r, ctx.p, ctx.g)
-    # s = B * r
-    mul!(ctx.sv, ctx.model.B, ctx.rv)
-
-    return ctx.s
-end
+recover_u(ctx::ChambolleContext) = recover_u(ctx.p, ctx.model)
 
 #
 #function solve(md::ROFModel, alg::Chambolle;
diff --git a/src/common.jl b/src/common.jl
new file mode 100644
index 0000000000000000000000000000000000000000..3befb14c5a65083b7728a93efdc51fca63ee2d71
--- /dev/null
+++ b/src/common.jl
@@ -0,0 +1,65 @@
+using StaticArrays
+using StaticKernels
+
+# Solver Interface Notes:
+#
+# - a concrete subtype `<:Model` describes one model type and each instance
+#   represents a fully specified problem, i.e. including data and model parameters
+# - a concrete subtype `<:Algorithm` describes an algorithm type, maybe
+#   applicable to different models with special implementations
+# - abstract subtypes `<:Model`, `<:Algorithm` may exist
+# - `init(::Model, ::Algorithm)::Context` initializes a runnable algorithm context
+# - `step!(::Context)::Context` performs one non-allocating step
+# - `solve(::Model, ::Algorithm)` must lead to the same deterministic
+#   outcome depending only on the arguments given.
+# - `run(::Context)` must continue on from the given context
+# - `(<:Algorithm)(...)` constructors must accept keyword arguments only
+
+abstract type Model end
+abstract type Algorithm end
+abstract type Context end
+
+
+"helper struct to prevent allocations"
+struct AppliedLinearKernel{K,A}
+    kernel::K
+    data::A
+end
+
+compute!(dst, op::AppliedLinearKernel) = map!(dst, op.kernel, op.a)
+
+
+@generated function gradient(w::StaticKernels.Window{<:Any,N}) where N
+    i0 = ntuple(_->0, N)
+    i1(k) = ntuple(i->Int(k==i), N)
+
+    wi = (:(w[$(i1(k)...)] - w[$(i0...)]) for k in 1:N)
+    return quote
+        Base.@_inline_meta
+        return @inbounds SVector($(wi...))
+    end
+end
+
+@generated function divergence(w::StaticKernels.Window{SVector{N,T},N}) where {N,T}
+    i0 = ntuple(_->0, N)
+    i1(k) = ntuple(i->Int(k==i), N)
+
+    wi = (:((isnothing(w[$(i1(k)...)]) ? zero(T) : w[$(i0...)][$k]) -
+            (isnothing(w[$((.-i1(k))...)]) ? zero(T) : w[$((.-i1(k))...)][$k])) for k in 1:N)
+    return quote
+        Base.@_inline_meta
+        return @inbounds +($(wi...))
+    end
+end
+
+function div_op(a::AbstractArray{<:StaticVector{N},N}) where N
+    k = Kernel{ntuple(_->-1:1, ndims(a))}(k)
+    ae = extend(a, StaticKernels.ExtensionNothing())
+    return AppliedLinearKernel(k, ae)
+end
+
+function grad_op(a::AbstractArray)
+    k = Kernel{ntuple(_->0:1, ndims(a))}(k)
+    ae = extend(a, StaticKernels.ExtensionReplicate())
+    return AppliedLinearKernel(k, ae)
+end
diff --git a/src/dualtvdd.jl b/src/dualtvdd.jl
index 7136f22b9314ab17ff4fec2e495d32cd8b2e2815..de7d1b041b93e5917f56ac03954800704f01a057 100644
--- a/src/dualtvdd.jl
+++ b/src/dualtvdd.jl
@@ -49,18 +49,20 @@ function init(md::DualTVDDModel, alg::DualTVDDAlgorithm)
     ptmp = extend(zeros(SVector{d,Float64}, size(md.f)), StaticKernels.ExtensionNothing())
 
     # precomputed global B
-    #B = inv(md.A' * md.A + md.β * I)
-    B = diagm(ones(length(md.f))) + md.β * I
+    B = inv(md.A' * md.A + md.β * I)
+    #B = diagm(ones(length(md.f))) + md.β * I
 
     # create subproblem contexts
     li = LinearIndices(size(md.f))
-    submds = [OpROFModel(subg[i], B, subα[i])
+    submds = [DualTVDDModel(subg[i], B, subα[i])
         for i in CartesianIndices(subax)]
-    subalg = ChambolleAlgorithm()
+
+    subalg = ProjGradAlgorithm(λ=1/sqrt(8)/norm(B))
+    #subalg = ChambolleAlgorithm()
     subctx = [init(submds[i], subalg) for i in CartesianIndices(subax)]
 
     # subcontext B is identity
-    B = inv(md.A' * md.A + md.β * I)
+    #B = inv(md.A' * md.A + md.β * I)
 
     return DualTVDDContext(md, alg, g, p, ptmp, B, subax, subg, subctx)
 end
@@ -80,8 +82,8 @@ function step!(ctx::DualTVDDContext)
 
         # TODO: make p computation local!
         ctx.ptmp .= (1 .- theta.(Ref(ax), Ref(sax), Ref(overlap), CartesianIndices(ctx.p))) .* ctx.p
-        map!(kΛ, sg, ctx.ptmp)
-        sg .+= ctx.g
+        #map!(kΛ, sg, ctx.ptmp)
+        #sg .+= ctx.g
 
         for j in 1:1000
             step!(ctx.subctx[i])
diff --git a/src/models.jl b/src/models.jl
new file mode 100644
index 0000000000000000000000000000000000000000..f88fb66045d9283e16d34a33757a3c0c3e0ab1d9
--- /dev/null
+++ b/src/models.jl
@@ -0,0 +1,28 @@
+"min_p 1/2 * |-div(p)-g|_B^2 + χ_{|p|_2<α}"
+struct DualROFOpModel{Tg,TB,Tα} <: Model
+    "dual data"
+    g::Tg
+    "B operator"
+    B::TB
+    "total variation parameter"	                                       
+    α::Tα
+end
+
+DualTVDDModel(g, B, α) = DualTVDDModel(g, B, α, nothing)
+DualTVDDModel(g, B, α::Real) = DualTVDDModel(g, B, fill!(similar(g), α))
+
+function recover_u(p, md::DualTVDDModel)
+    d = ndims(md.g)
+    u = similar(md.g)
+    v = similar(md.g)
+
+    @inline kfΛ(w) = @inbounds divergence(w)
+    kΛ = Kernel{ntuple(_->-1:1, d)}(kfΛ)
+
+    # v = div(p) + A'*f
+    map!(kΛ, v, p) # extension: nothing
+    v .+= md.g
+    # u = B * v
+    mul!(vec(u), md.B, vec(v))
+    return u
+end
diff --git a/src/projgrad.jl b/src/projgrad.jl
index af0d14d92dbdf7945e90108d990a8d7d8ee06f2b..07cfa262f643aab2635807fbba7259d7a6e5cf43 100644
--- a/src/projgrad.jl
+++ b/src/projgrad.jl
@@ -1,27 +1,21 @@
 struct ProjGradAlgorithm <: Algorithm
     "gradient step size"
-    λ::Float64
-    function ProjGradAlgorithm(; λ)
-        return new(λ)
+    τ::Float64
+    function ProjGradAlgorithm(; τ)
+        return new(τ)
     end
 end
 
-struct ProjGradContext{M,A,V,W,Wv,WvWv,R,S,K1,K2}
+struct ProjGradContext{M,A,Tp,Wv,R,S,K1,K2}
     model::M
     algorithm::A
     "dual optimization variable"
-    p::V
-    "precomputed A'f"
-    g::W
-    "extended model.λ"
-    λ::W
+    p::Tp
 
     "scalar temporary 1"
     rv::Wv
     "scalar temporary 2"
     sv::Wv
-    "precomputed (A'A + βI)^(-1)"
-    B::WvWv
 
     "matrix view on rv"
     r::R
@@ -32,13 +26,12 @@ struct ProjGradContext{M,A,V,W,Wv,WvWv,R,S,K1,K2}
     k2::K2
 end
 
-function init(md::OpROFModel, alg::ProjGradAlgorithm)
+function init(md::DualTVDDModel, alg::ProjGradAlgorithm)
     d = ndims(md.g)
     ax = axes(md.g)
 
     p = extend(zeros(SVector{d,Float64}, ax), StaticKernels.ExtensionNothing())
     g = extend(md.g, StaticKernels.ExtensionNothing())
-    λ = extend(md.λ, StaticKernels.ExtensionNothing())
 
     rv = zeros(length(md.g))
     sv = zeros(length(md.g))
@@ -47,40 +40,26 @@ function init(md::OpROFModel, alg::ProjGradAlgorithm)
     s = extend(reshape(sv, ax), StaticKernels.ExtensionReplicate())
 
     z = zero(CartesianIndex{d})
-    @inline kf1(pw, gw) = @inbounds -divergence(pw) - gw[z]
+
+    @inline kf1(pw) = @inbounds -divergence(pw) - md.g[pw.position]
     k1 = Kernel{ntuple(_->-1:1, d)}(kf1)
 
     @inline function kf2(pw, sw)
-        Base.@_inline_meta
-        q = pw[z] - alg.λ * gradient(sw)
-        return q / max(norm(q) / md.λ[sw.position], 1)
+        @inbounds q = pw[z] - alg.τ * gradient(sw)
+        return @inbounds q / max(norm(q) / md.α[sw.position], 1)
     end
     k2 = Kernel{ntuple(_->0:1, d)}(kf2)
 
-    return ProjGradContext(md, alg, p, g, λ, rv, sv, md.B, r, s, k1, k2)
+    return ProjGradContext(md, alg, p, rv, sv, r, s, k1, k2)
 end
 
 function step!(ctx::ProjGradContext)
     # r = Λ*p - g
-    map!(ctx.k1, ctx.r, ctx.p, ctx.g)
+    map!(ctx.k1, ctx.r, ctx.p)
     # s = B * r
-    mul!(ctx.sv, ctx.B, ctx.rv)
+    mul!(ctx.sv, ctx.model.B, ctx.rv)
     # p = proj(p - λΛ's)
     map!(ctx.k2, ctx.p, ctx.p, ctx.s)
 end
 
-function recover_u!(ctx::ProjGradContext)
-    d = ndims(ctx.g)
-    u = similar(ctx.g)
-    v = similar(ctx.g)
-
-    @inline kfΛ(w) = @inbounds divergence(w)
-    kΛ = Kernel{ntuple(_->-1:1, d)}(kfΛ)
-
-    # v = div(p) + A'*f
-    map!(kΛ, v, ctx.p) # extension: nothing
-    v .+= ctx.g
-    # u = B * v
-    mul!(vec(u), ctx.B, vec(v))
-    return u
-end
+recover_u(ctx::ProjGradContext) = recover_u(ctx.p, ctx.model)
diff --git a/src/types.jl b/src/types.jl
deleted file mode 100644
index c5248b30f7f56731beac13b87edba74e52d26dc0..0000000000000000000000000000000000000000
--- a/src/types.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-# Solver Interface Notes:
-#
-# - a concrete subtype `<:Model` describes one model type and each instance
-#   represents a partical solvable problem, including model parameters and data
-# - a concrete subtype `<:Algorithm` describes an algorithm type, maybe
-#   applicable to different models with special implementations
-# - abstract subtypes `<:Model`, `<:Algorithm` may exist
-# - `solve(::Model, ::Algorithm)` must lead to the same deterministic
-#   outcome depending only on the arguments given.
-# - `init(::Model, ::Algorithm)::Context` initializes a runnable algorithm context
-# - `run(::Context)` must continue on from the given context
-# - `(<:Algorithm)(...)` constructors must accept keyword arguments only
-
-abstract type Model end
-abstract type Algorithm end
-abstract type Context end
-
-"min_u  1/2 * |Au-f|_2^2 + α*TV(u) + β/2 |u|_2 + γ |u|_1"
-struct DualTVDDModel{U,VV} <: Model
-    "given data"
-    f::U
-    "forward operator"
-    A::VV
-    "total variation parameter"
-    α::Float64
-    "L2 regularization parameter"
-    β::Float64
-    "L1 regularization parameter"
-    γ::Float64
-end
-
-"min_p  1/2 * |-div(p) - g|_B^2 + χ_{|p|<=λ}"
-struct OpROFModel{U,VV,Λ} <: Model
-    "given data"
-    g::U
-    "B norm operator"
-    B::VV
-    "total variation parameter"
-    λ::Λ
-end
-
-OpROFModel(g, B, λ::Real) = OpROFModel(g, B, fill!(similar(g, typeof(λ)), λ))
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000000000000000000000000000000000000..bdc0181ffd97881da7425ffaf13d95dd1d3ed33f
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,16 @@
+using Test
+using LinearAlgebra
+using DualTVDD:
+    DualTVDDModel, ProjGradAlgorithm, ChambolleAlgorithm,
+    init, step!, recover_u
+
+g = Float64[0 2; 1 0]
+md = DualTVDDModel(g, I, 1e-10)
+
+for alg in (ProjGradAlgorithm(τ=1/8), ChambolleAlgorithm())
+    ctx = init(md, alg)
+    for i in 1:100
+        step!(ctx)
+    end
+    @test recover_u(ctx) ≈ g
+end