diff --git a/scripts/run_experiments.jl b/scripts/run_experiments.jl
index a266a0c68e74aeca24f56264208e5269f06face1..1dbe7ad6d03784bd0e7f315e9ba75fe58145c7a1 100644
--- a/scripts/run_experiments.jl
+++ b/scripts/run_experiments.jl
@@ -12,7 +12,8 @@ using Plots
 
 using SemiSmoothNewton
 using SemiSmoothNewton: HMesh, ncells, refine, area, mesh_size, ndofs
-using SemiSmoothNewton: project_l2_lagrange!, project_qi_lagrange!, project!
+using SemiSmoothNewton: project!, project_l2_lagrange!, project_qi_lagrange!,
+    project_l2_pixel!
 using SemiSmoothNewton: vtk_mesh, vtk_append!, vtk_save
 
 include("util.jl")
diff --git a/src/image.jl b/src/image.jl
index 1a291fd5c06735ace13a3fafb35f43815568e699..20ca3732a7620b2e53984b739e391d275ca3814d 100644
--- a/src/image.jl
+++ b/src/image.jl
@@ -295,6 +295,55 @@ function quadrature_cell_pixels(mesh, cell; m0)
     qx = reshape!(vertices, (d, :))
 end
 
+struct PixelIterator{A, G}
+    "cell vertices"
+    A::A
+    "CartesianIndices covering the cell"
+    grid::G
+end
+
+Base.IteratorSize(::PixelIterator) = Base.SizeUnknown()
+Base.eltype(it::PixelIterator) = eltype(it.grid)
+
+"""
+    PixelIterator(mesh, cell)
+    PixelIterator(mesh, A)
+
+Return an iterator over all integer cartesian indices (corresponding to pixel
+centers) intersecting a given `cell` or geometry `A`.
+"""
+function PixelIterator(mesh::Mesh, cell::Int)
+    A = SArray{Tuple{ndims_space(mesh), nvertices_cell(mesh)}}(
+        view(mesh.vertices, :, view(mesh.cells, :, cell)))
+    return PixelIterator(mesh, A)
+end
+
+function PixelIterator(mesh::Mesh, A)
+    # rounding is probably correct since we don't miss any coordinates even
+    # when we round inwards to the element
+    I0 = round.(Int, SVector(minimum(A[1, :]), minimum(A[2, :])))
+    I1 = round.(Int, SVector(maximum(A[1, :]), maximum(A[2, :])))
+    return PixelIterator(A, CartesianIndex(I0...):CartesianIndex(I1...))
+end
+
+function _intersects(it::PixelIterator, I)
+    x = SVector(Tuple(I))
+    # TODO: move to global-to-local function or inside-triangle test
+    xloc = (it.A[:, SUnitRange(2, 3)] .- it.A[:, 1])::SArray \
+        (x .- it.A[:, 1])
+    ε = eps()
+    return all(xloc .>= -ε) && sum(xloc) <= 1 + ε
+end
+
+function Base.iterate(it::PixelIterator, state...)
+    y = iterate(it.grid, state...)
+    isnothing(y) && return nothing
+    while !_intersects(it, first(y))
+        y = iterate(it.grid, last(y))
+        isnothing(y) && return nothing
+    end
+    return y
+end
 
 function project_l2_pixel!(u::FeFunction, img)
     d = 2 # domain dimension
@@ -307,6 +356,20 @@ function project_l2_pixel!(u::FeFunction, img)
     # number of element dofs (i.e. local dofs not counting range dimensions)
     nldofs = ndofs(space.element)
 
+    # first loop to count number of triangles intersecting pixel evaluation
+    # points
+    ncells = zeros(axes(img))
+    for cell in cells(mesh)
+        pixels = PixelIterator(mesh, cell)
+        for I in pixels
+            ncells[I] += 1
+        end
+    end
+    all(!iszero, ncells) ||
+        throw(ErrorException("not all pixels are covered by a cell"))
+    pixelweights = inv.(ncells)
+
+    # typical L2 projection of f
     a(xloc, u, du, v, dv; f) = dot(u, v)
     l(xloc, v, dv; f) = dot(f, v)
 
@@ -315,7 +378,10 @@ function project_l2_pixel!(u::FeFunction, img)
     V = Float64[]
     b = zeros(ndofs(space))
 
-    # mesh cells
+    # buffer for basis value/derivative at a single point
+    qphi = zero(MArray{Tuple{nrdims, nrdims, nldofs}})
+    dqphi = zero(MArray{Tuple{nrdims, d, nrdims, nldofs}})
+
     for cell in cells(mesh)
 	foreach(f -> bind!(f, cell), opparams)
 	# cell map is assumed to be constant per cell
@@ -323,45 +389,49 @@ function project_l2_pixel!(u::FeFunction, img)
 	delmapinv = inv(delmap)
 	intel = abs(det(delmap))
 
-	p = ceil(Int, diam(mesh, cell))
-	qw, qx = quadrature_composite_lagrange_midpoint(p)
-	nqpts = length(qw) # number of quadrature points
+	#p = ceil(Int, diam(mesh, cell))
+	#qw, qx = quadrature_composite_lagrange_midpoint(p)
+	#nqpts = length(qw) # number of quadrature points
 
-	qphi = zeros(nrdims, nrdims, nldofs, nqpts)
-	dqphi = zeros(nrdims, d, nrdims, nldofs, nqpts)
-	for k in axes(qx, 2)
+        # pixels as quadrature points
+        pixels = PixelIterator(mesh, cell)
+        for P in pixels
+            x = SVector(Tuple(P))
+            # TODO: move to global-to-local function or inside-triangle test
+            xhat = (pixels.A[:, SUnitRange(2, 3)] .- pixels.A[:, 1])::SArray \
+                (x .- pixels.A[:, 1])
+	    opvalues = map(f -> evaluate(f, xhat), opparams)
+
+            # fill basis value/derivative buffer
 	    for r in 1:nrdims
-		qphi[r, r, :, k] .= evaluate_basis(space.element, SVector{d}(view(qx, :, k)))
-		dqphi[r, :, r, :, k] .= transpose(jacobian(x -> evaluate_basis(space.element, x), SVector{d}(view(qx, :, k))))
+		qphi[r, r, :] .=
+                    evaluate_basis(space.element, SVector{d}(xhat))
+                dqphi[r, :, r, :]::MArray{Tuple{d, nldofs}, Float64, 2, d * nldofs} .=
+                    transpose(jacobian(
+                        x -> evaluate_basis(space.element, x),
+                        SVector{d}(xhat)))
 	    end
-	end
-
-	# quadrature points
-	for k in axes(qx, 2)
-	    xhat = SVector{d}(view(qx, :, k))
-	    x = elmap(mesh, cell)(xhat)
-	    opvalues = map(f -> evaluate(f, xhat), opparams)
 
 	    # local test-function dofs
 	    for jdim in 1:nrdims, ldofj in 1:nldofs
 		gdofj = space.dofmap[jdim, ldofj, cell]
 
-		phij = SArray{Tuple{space.size...}}(view(qphi, :, jdim, ldofj, k))
+		phij = SArray{Tuple{space.size...}}(view(qphi, :, jdim, ldofj))
 		dphij = SArray{Tuple{space.size..., d}}(
-		    SArray{Tuple{nrdims, d}}(view(dqphi, :, :, jdim, ldofj, k)) * delmapinv)
+		    SArray{Tuple{nrdims, d}}(view(dqphi, :, :, jdim, ldofj)) * delmapinv)
 
-		lv = qw[k] * l(x, phij, dphij; opvalues...) * intel
+                lv = pixelweights[P] * l(x, phij, dphij; opvalues...) * intel
 		b[gdofj] += lv
 
 		# local trial-function dofs
 		for idim in 1:nrdims, ldofi in 1:nldofs
 		    gdofi = space.dofmap[idim, ldofi, cell]
 
-		    phii = SArray{Tuple{space.size...}}(view(qphi, :, idim, ldofi, k))
+		    phii = SArray{Tuple{space.size...}}(view(qphi, :, idim, ldofi))
 		    dphii = SArray{Tuple{space.size..., d}}(
-			SArray{Tuple{nrdims, d}}(view(dqphi, :, :, idim, ldofi, k)) * delmapinv)
+			SArray{Tuple{nrdims, d}}(view(dqphi, :, :, idim, ldofi)) * delmapinv)
 
-		    av = qw[k] * a(x, phii, dphii, phij, dphij; opvalues...) * intel
+		    av = pixelweights[P] * a(x, phii, dphii, phij, dphij; opvalues...) * intel
 		    push!(I, gdofi)
 		    push!(J, gdofj)
 		    push!(V, av)