diff --git a/scripts/run_experiments.jl b/scripts/run_experiments.jl
index 954c1935c72e48cc823ee4e687d802d8bf6ec222..6b29bfa0fc17d4b90e2b74b7d6107dfce34b2281 100644
--- a/scripts/run_experiments.jl
+++ b/scripts/run_experiments.jl
@@ -17,7 +17,7 @@ using StaticArrays: MVector, SArray, SMatrix, SVector, SA
 
 using SemiSmoothNewton
 using SemiSmoothNewton: HMesh, ncells, refine, area, mesh_size, ndofs, diam,
-    elmap
+    elmap, assemble_lumped
 using SemiSmoothNewton: project!, project_l2_lagrange!, project_qi_lagrange!,
     project_l2_pixel!
 using SemiSmoothNewton: vtk_mesh, vtk_append!, vtk_save
@@ -257,6 +257,31 @@ function step!(st::L1L2TVState)
 	return a1 + a2 + aB
     end
 
+    function du_au(x_, du, nabladu, phi, nablaphi; g, u, nablau, p1, p2, tdata)
+	aB = alpha2 * dot(T(tdata, du), T(tdata, phi)) +
+	    beta * dot(S(du, nabladu), S(phi, nablaphi))
+
+	return aB
+    end
+
+    function du_ap(x_, du, nabladu, phi, nablaphi; g, u, nablau, p1, p2, tdata)
+	m1 = max(gamma1, norm(T(tdata, u) - g))
+	cond1 = norm(T(tdata, u) - g) > gamma1 ?
+	    dot(T(tdata, u) - g, T(tdata, du)) / norm(T(tdata, u) - g)^2 * p1 :
+	    zero(p1)
+	a1 = alpha1 / m1 * dot(T(tdata, du), T(tdata, phi)) -
+	    dot(cond1, T(tdata, phi))
+
+	m2 = max(gamma2, norm(nablau))
+	cond2 = norm(nablau) > gamma2 ?
+	    dot(nablau, nabladu) / norm(nablau)^2 * p2 :
+	    zero(p2)
+	a2 = lambda / m2 * dot(nabladu, nablaphi) -
+	    dot(cond2, nablaphi)
+
+	return a1 + a2
+    end
+
     function du_l(x_, phi, nablaphi; g, u, nablau, p1, p2, tdata)
 	aB = alpha2 * dot(T(tdata, u), T(tdata, phi)) +
 	    beta * dot(S(u, nablau), S(phi, nablaphi))
@@ -269,11 +294,32 @@ function step!(st::L1L2TVState)
 	return -aB - p1part - p2part + gpart
     end
 
+    function du_lu(x_, phi, nablaphi; g, u, nablau, p1, p2, tdata)
+	aB = alpha2 * dot(T(tdata, u), T(tdata, phi)) +
+	    beta * dot(S(u, nablau), S(phi, nablaphi))
+	gpart = alpha2 * dot(g, T(tdata, phi))
+
+	return -aB + gpart
+    end
+
+    function du_lp(x_, phi, nablaphi; g, u, nablau, p1, p2, tdata)
+	m1 = max(gamma1, norm(T(tdata, u) - g))
+	p1part = alpha1 / m1 * dot(T(tdata, u) - g, T(tdata, phi))
+	m2 = max(gamma2, norm(nablau))
+	p2part = lambda / m2 * dot(nablau, nablaphi)
+
+	return - p1part - p2part
+    end
+
     # solve du
     print("assemble ... ")
-    A, b = assemble(st.du.space, du_a, du_l;
+    Au, bu = assemble(st.du.space, du_au, du_lu;
+        st.g, st.u, nablau = nabla(st.u), st.p1, st.p2, st.tdata)
+    Ap, bp = assemble_lumped(st.du.space, du_ap, du_lp;
         st.g, st.u, nablau = nabla(st.u), st.p1, st.p2, st.tdata)
     print("solve ... ")
+    A = Au + Ap
+    b = bu + bp
     st.du.data .= A \ b
 
 
diff --git a/src/operator.jl b/src/operator.jl
index 153ea67429cdac47e251a30d3a12e7f8db7c3ce0..634b2e18e1944fc8f58e0cc9dd6b61afaeb846b9 100644
--- a/src/operator.jl
+++ b/src/operator.jl
@@ -34,7 +34,8 @@ a(op::Poisson, xloc, u, du, v, dv) = dot(du, dv)
 
 # degree 2 quadrature on triangle
 quadrature() = SA[1/6, 1/6, 1/6], SA[1/6 4/6 1/6; 1/6 1/6 4/6]
-#quadrature() = SA[1/3, 1/3, 1/3], SA[0. 0. 1.; 0. 1. 0.]
+# degree 1 quadrature on triangle using corner points
+quadrature_lumped() = SA[1/3, 1/3, 1/3], SA[0. 0. 1.; 0. 1. 0.]
 
 
 assemble(op::Operator) = assemble(op.space,
@@ -42,13 +43,19 @@ assemble(op::Operator) = assemble(op.space,
     (x...; y...) -> l(op, x...; y...);
     params(op)...)
 
-function assemble(space::FeSpace, a, l; params...)
+assemble(space::FeSpace, a, l; params...) =
+    assemble(space::FeSpace, quadrature(), a, l; params...)
+
+assemble_lumped(space::FeSpace, a, l; params...) =
+    assemble(space::FeSpace, quadrature_lumped(), a, l; params...)
+
+function assemble(space::FeSpace, quadrature, a, l; params...)
     mesh = space.mesh
     opparams = NamedTuple(params)
 
     # precompute basis at quadrature points
 
-    qw, qx = quadrature()
+    qw, qx = quadrature
 
     d = size(qx, 1) # domain dimension
     nrdims = prod(space.size)