diff --git a/examples/example.py b/examples/example.py
index 42062e526c9ee3f26ff8f965426f5423122b776e..c084275651613aa53fb326e6b7bb07999d211390 100644
--- a/examples/example.py
+++ b/examples/example.py
@@ -6,7 +6,7 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-from vkoga_pde.kernels_PDE import Gaussian_laplace
+from vkoga_pde.kernels_PDE import Gaussian_laplace, wendland32_laplace, cubicMatern_laplace
 from vkoga_pde.vkoga_PDE import VKOGA_PDE
 
 np.random.seed(1)
@@ -28,17 +28,19 @@ f = lambda x: -6 * x[:, [0]] * x[:, [1]]
 
 # Define right hand side values
 y1 = f(X1)
+y1_sol = u(X1)
 y2 = u(X2)
 
 # Initialize and run models
-kernel = Gaussian_laplace(dim=2)
+# kernel = Gaussian_laplace(dim=2)
+kernel = cubicMatern_laplace(dim=2)
 maxIter = 200
 
 model_f = VKOGA_PDE(kernel=kernel, greedy_type='f_greedy')
 model_p = VKOGA_PDE(kernel=kernel, greedy_type='p_greedy')
 
-_ = model_f.fit(X1, y1, X2, y2, maxIter=maxIter)
-_ = model_p.fit(X1, y1, X2, y2, maxIter=maxIter)
+_ = model_f.fit(X1, y1, X2, y2, maxIter=maxIter, y1_sol=y1_sol)
+_ = model_p.fit(X1, y1, X2, y2, maxIter=maxIter, y1_sol=y1_sol)
 
 # Plot some results
 plt.figure(1)
@@ -48,24 +50,34 @@ plt.plot(np.array(model_p.train_hist['f'])**.5, 'b')
 plt.yscale('log')
 plt.xscale('log')
 plt.legend(['f-greedy', 'p-greedy'])
-plt.title('residuals')
+plt.title('L(u-s_n) residuals')
 plt.draw()
 
 plt.figure(2)
 plt.clf()
+plt.plot(np.array(model_f.train_hist['f sol']), 'r')
+plt.plot(np.array(model_p.train_hist['f sol']), 'b')
+plt.yscale('log')
+plt.xscale('log')
+plt.legend(['f-greedy', 'p-greedy'])
+plt.title('(u-s_n) residuals')
+plt.draw()
+
+plt.figure(3)
+plt.clf()
 plt.plot(model_f.ctrs_[model_f.ind_ctrs1, 0], model_f.ctrs_[model_f.ind_ctrs1, 1], 'rx')
 plt.plot(model_f.ctrs_[model_f.ind_ctrs2, 0], model_f.ctrs_[model_f.ind_ctrs2, 1], 'ro')
 plt.title('Selected centers of f-greedy')
 plt.draw()
 
-plt.figure(3)
+plt.figure(4)
 plt.clf()
 plt.plot(model_p.ctrs_[model_p.ind_ctrs1, 0], model_p.ctrs_[model_p.ind_ctrs1, 1], 'bx')
 plt.plot(model_p.ctrs_[model_p.ind_ctrs2, 0], model_p.ctrs_[model_p.ind_ctrs2, 1], 'bo')
 plt.title('Selected centers of P-greedy')
 plt.draw()
 
-plt.figure(4)
+plt.figure(5)
 plt.clf()
 plt.plot(X1[:, 0], X1[:, 1], 'm.')
 plt.plot(X2[:, 0], X2[:, 1], 'r.')
diff --git a/setup.py b/setup.py
index 015eeee67e3cf594cf234edb7692c4f8bcc4f4ff..66eadb8ea275362e86a3b3dd4ce7b258ca539956 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup
 
 setup(
     name='PDE-VKOGA',
-    version='0.1.0',    
+    version='0.1.1',    
     description='Python implementation of PDE-VKOGA to approximate the solution of linear PDEs.',
     url='https://gitlab.mathematik.uni-stuttgart.de/pub/ians-anm/pde-vkoga',
     author='Tizian Wenzel',
diff --git a/vkoga_pde/kernels_PDE.py b/vkoga_pde/kernels_PDE.py
index 5caac657bbc141b1f45ff04f796fae119c2f4b3b..b17da096c56332edfab6c15b6d56c5eeaa41a401 100644
--- a/vkoga_pde/kernels_PDE.py
+++ b/vkoga_pde/kernels_PDE.py
@@ -89,34 +89,130 @@ class L_RBF(Kernel):
         # Since the LaPlace is radial, the formula is the same as d2_eval
         return self.d2_eval(x, y)
 
-    def mixed_eval_L(self, x, ctrs, list_ind_interior):
-        # Kernel evaluation L^x k(x, y), whereby y1 := y[ind_interior, :] are centers in the interior,
-        # y2 := y[~ind_interior, :] are centers on the boundary.
-        # Therefore ddeval needs to be used with respect to y1, but deval with respect to y2.
+    def mixed_L_eval(self, x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # L evaluations in x of Riesz representers which are located in ctrs.
+        # list_ind_L, list_ind_k and list_ind_n indicate to which set of functionals the Riesz
+        # representer belong to:
+        # y1 := y[list_ind_L, :] corresponds to the interior, y2 := y[list_ind_k, :] to Dirichlet
+        # boundary centers and y3 := y[list_ind_n, :] to Neumann boundary centers.
+        # Different eval-methods need to be used with respect to y1, y2, y3.
 
-        list_not_ind_interior = [idx for idx in range(x.shape[0]) if idx not in list_ind_interior]
+        x = np.atleast_2d(x)
+        ctrs = np.atleast_2d(ctrs)
 
         array_result = np.zeros((x.shape[0], ctrs.shape[0]))
 
-        array_result[list_ind_interior, :] = self.dd_eval(x[list_ind_interior, :], ctrs)
-        array_result[list_not_ind_interior, :] = self.d1_eval(x[list_not_ind_interior, :], ctrs)
+        array_result[:, list_ind_L] = self.dd_eval(x, ctrs[list_ind_L])
+        array_result[:, list_ind_k] = self.d1_eval(x, ctrs[list_ind_k])
+        array_result[:, list_ind_n] = self.mixed_L_n(x, ctrs[list_ind_n], n_ctrs[list_ind_n])
 
         return array_result
 
-    def mixed_eval(self, x, ctrs, list_ind_interior):
-        # Kernel evaluation k(x, y), whereby y1 := y[ind_interior, :] are centers in the interior,
-        # y2 := y[~ind_interior, :] are centers on the boundary.
-        # Therefore deval needs to be used with respect to y1, but eval with respect to y2.
+    def mixed_k_eval(self, x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # Point evaluations in x of Riesz representers which are located in ctrs.
+        # list_ind_L, list_ind_k and list_ind_n indicate to which set of functionals the Riesz
+        # representer belong to:
+        # y1 := y[list_ind_L, :] corresponds to the interior, y2 := y[list_ind_k, :] to Dirichlet
+        # boundary centers and y3 := y[list_ind_n, :] to Neumann boundary centers.
+        # Different eval-methods need to be used with respect to y1, y2, y3.
 
-        list_not_ind_interior = [idx for idx in range(x.shape[0]) if idx not in list_ind_interior]
+        x = np.atleast_2d(x)
+        ctrs = np.atleast_2d(ctrs)
 
         array_result = np.zeros((x.shape[0], ctrs.shape[0]))
 
-        array_result[list_ind_interior, :] = self.d2_eval(x[list_ind_interior, :], ctrs)
-        array_result[list_not_ind_interior, :] = self.eval(x[list_not_ind_interior, :], ctrs)
+        array_result[:, list_ind_L] = self.d2_eval(x, ctrs[list_ind_L])
+        array_result[:, list_ind_k] = self.eval(x, ctrs[list_ind_k])
+        array_result[:, list_ind_n] = self.mixed_k_n(x, ctrs[list_ind_n], n_ctrs[list_ind_n])
 
         return array_result
 
+    def mixed_n_eval(self, x, n_x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # Neumann derivative evaluations in x with normal vector n_x of Riesz
+        # representers which are located in ctrs. n_ctrs contains normal vectors
+        # to those centers listed in list_ind_n - if this list is empty, then the
+        # array n_ctrs should be a zero array.
+        # list_ind_L, list_ind_k and list_ind_n indicate to which set of functionals the Riesz
+        # representer belong to:
+        # y1 := y[list_ind_L, :] corresponds to the interior, y2 := y[list_ind_k, :] to Dirichlet
+        # boundary centers and y3 := y[list_ind_n, :] to Neumann boundary centers.
+        # Different eval-methods need to be used with respect to y1, y2, y3.
+
+        x = np.atleast_2d(x)
+        ctrs = np.atleast_2d(ctrs)
+
+        array_result = np.zeros((x.shape[0], ctrs.shape[0]))
+
+        array_result[:, list_ind_L] = self.mixed_L_n(ctrs[list_ind_L], x, n_x).T
+        array_result[:, list_ind_k] = self.mixed_k_n(ctrs[list_ind_k], x, n_x).T
+        array_result[:, list_ind_n] = self.mixed_n_n(
+            x, ctrs[list_ind_n], n_x, n_ctrs[list_ind_n])
+
+        return array_result
+
+    # Now implement some functions related to Neumann boundary conditions
+    def nn_diagonal(self, X):
+        # Evaluation of < d/dn k(*, x), d/dn k(*, y) > for x = y,
+        # i.e. dot product between two Neumann boundary functionals.
+        # Value is independent of direction on n, therefore it is not provided/required.
+
+        X = np.atleast_2d(X)
+
+        return - np.ones(X.shape[0]) * self.rbf1_divided_by_r(self.ep, 0)
+
+    def mixed_L_n(self, x, y, n_y):
+        # Evaluation of < L^{2}k(*, x), d/dn k(*, y) >,  i.e. dot product of L evaluation and Neumann boundary
+        # See 14.04.22\C, especially \C4.
+        # n_y is the normal vectors for y.
+
+        x = np.atleast_2d(x)
+        y = np.atleast_2d(y)
+        n_y = np.atleast_2d(n_y)
+
+        array_dists = distance_matrix(x, y)
+
+        array_result1 = x @ n_y.T - np.sum(y * n_y, axis=1, keepdims=True).T
+        array_result2 = (self.dim - 1) * self.rbf_util_neuman(self.ep, array_dists) \
+                        + self.rbf3_divided_by_r(self.ep, array_dists)
+
+        return array_result1 * array_result2
+
+    def mixed_k_n(self, x, y, n_y):
+        # Evaluation of < k(*, x), d/dn k(*, y) >,  i.e. dot product of k evaluation and Neumann boundary
+        # See 14.04.22\C, especially \C2.
+        # n_y is the normal vectors for y.
+
+        x = np.atleast_2d(x)
+        y = np.atleast_2d(y)
+        n_y = np.atleast_2d(n_y)
+
+        array_dists = distance_matrix(x, y)
+
+        array_result1 = np.sum(y * n_y, axis=1, keepdims=True).T - x @ n_y.T
+        array_result2 = self.rbf1_divided_by_r(self.ep, array_dists)
+
+        return array_result1 * array_result2
+
+    def mixed_n_n(self, x, y, n_x, n_y):
+        # Evaluation of < d/dn k(*, x), d/dn k(*, y) >,  i.e. dot product between two Neumann boundary functionals
+        # See 14.04.22\C, especially \C2.
+        # n_y is the normal vectors for y, same for n_x.
+
+        x = np.atleast_2d(x)
+        y = np.atleast_2d(y)
+        n_x = np.atleast_2d(n_x)
+        n_y = np.atleast_2d(n_y)
+
+        array_dists = distance_matrix(x, y)
+
+        array_1 = - self.rbf1_divided_by_r(self.ep, array_dists) * (n_x @ n_y.T)
+
+        array_2 = - self.rbf_util_neuman(self.ep, array_dists) * \
+                  (x @ n_y.T - np.sum(y * n_y, axis=1, keepdims=True).T) \
+                  * (np.sum(x * n_x, axis=1, keepdims=True) - n_x @ y.T)
+
+        return array_1 + array_2
+
 
 # Implementation of concrete RBFs
 class Gaussian_laplace(L_RBF):
@@ -139,10 +235,15 @@ class Gaussian_laplace(L_RBF):
 
         self.rbf2 = lambda ep, r: (4 * r ** 2 - 2) * np.exp(-r**2)
 
+        self.rbf3_divided_by_r = lambda ep, r: -4 * (2 * r**2 - 3) * np.exp(-r**2)
+
         # Utility RBF for dd_eval: Double LaPlacian
         self.rbf_utility = lambda ep, r: \
                 np.exp(-r ** 2) * (16 * r ** 4 - (16 * self.dim + 32) * r ** 2 + (4 * self.dim ** 2 + 8 * self.dim))
 
+        # Utility RBF for neumann: See 14.04.22\C4, this is Phi'' / r^2 - Phi' / r^3
+        self.rbf_util_neuman = lambda ep, r: 4 * np.exp(-r ** 2)
+
 
 class wendland32_laplace(L_RBF):
     # Wendland kernel 3, 2: phi(r) = (1-r)_+^6 * (35r**2 + 18r + 3)
@@ -174,6 +275,10 @@ class wendland32_laplace(L_RBF):
                    + r**2*(10080*self.dim**2 + 60480*self.dim + 80640)
                    + r*(-6720*self.dim**2 - 26880*self.dim - 20160)) * (r < 1)
 
+        # Utility RBF for neumann: See 14.04.22\C4, this is Phi'' / r^2 - Phi' / r^3
+        self.rbf3_divided_by_r = lambda ep, r: np.zeros_like(r)     # ToDo: Not (yet) implemented
+        self.rbf_util_neuman = lambda ep, r: np.zeros_like(r)       # ToDo: Not (yet) implemented
+
 
 class wendland33_laplace(L_RBF):
     # Wendland kernel 3, 3: phi(r) = (1-r)_+^8 * (32r**3+ 25r^2 + 8r + 1)
@@ -197,6 +302,8 @@ class wendland33_laplace(L_RBF):
 
         self.rbf2 = lambda ep, r: np.maximum(1 - r, 0) ** 6 * 22 * (160 * r ** 3 + 15 * r ** 2 - 6 * r - 1)
 
+        self.rbf3_divided_by_r = lambda ep, r: -np.maximum(1-r, 0) ** 5 * 1584 * (20 * r ** 2 - 5 * r - 1)
+
         # Utility RBF for dd_eval: Double LaPlacian
         self.rbf_utility = lambda ep, r: \
             (528 * self.dim ** 2 + 1056 * self.dim + r ** 7 * (3168 * self.dim ** 2 + 50688 * self.dim + 199584)
@@ -206,6 +313,8 @@ class wendland33_laplace(L_RBF):
              + r ** 3 * (36960 * self.dim ** 2 + 295680 * self.dim + 554400)
              + r ** 2 * (-11088 * self.dim ** 2 - 66528 * self.dim - 88704)) * (r < 1)
 
+        self.rbf_util_neuman = lambda ep, r: 528 * np.maximum(1 - r, 0) ** 6 * (6*r + 1)
+
 
 
 
@@ -232,6 +341,221 @@ class quadrMatern_laplace(L_RBF):
         self.rbf_utility = lambda ep, r: \
             np.exp(-r) * (r**2 - (2 * self.dim + 3) * r + self.dim ** 2 + 2 * self.dim)
 
+        # Utility RBF for neumann: See 14.04.22\C4, this is Phi'' / r^2 - Phi' / r^3
+        self.rbf3_divided_by_r = lambda ep, r: np.zeros_like(r)  # ToDo: Not (yet) implemented
+        self.rbf_util_neuman = lambda ep, r: np.zeros_like(r)       # ToDo: Not (yet) implemented
+
+
+class cubicMatern_laplace(L_RBF):
+    # Quadratic matern kernel phi(r) = (3 + 3r + r^2)*exp(-r),
+    # used in conjunction with L = -LaPlace
+
+    def __init__(self, dim, shift=0, ep=1):
+        super().__init__()
+        self.dim = dim
+        self.shift = shift
+        self.ep = 1  # ep ToDo: Implement general case!
+        self.name = 'cubic Matern'
+
+        # Implement not only the rbf, but also its derivatives
+        self.rbf = lambda ep, r: np.exp(-r) * (15 + 15 * r + 6 * r ** 2 + r ** 3)
+
+        self.rbf1 = lambda ep, r: np.exp(-r) * (-r) * (r ** 2 + 3 * r + 3)
+        self.rbf1_divided_by_r = lambda ep, r: np.exp(-r) * (-1) * (r ** 2 + 3 * r + 3)
+
+        self.rbf2 = lambda ep, r: np.exp(-r) * (r ** 3 - 3 * r - 3)
+
+        # Utility RBF for dd_eval: Double LaPlacian
+        self.rbf_utility = lambda ep, r: \
+            np.exp(-r) * ((self.dim ** 2 + 2 * self.dim)
+                          + (self.dim ** 2 + 2 * self.dim) * r
+                          - (4 + 2 * self.dim) * r ** 2 + r ** 3)
+
+
+        # Utility RBF for neumann: See 14.04.22\C4, this is Phi'' / r^2 - Phi' / r^3
+        self.rbf3_divided_by_r = lambda ep, r: np.zeros_like(r)  # ToDo: Not (yet) implemented
+        self.rbf_util_neuman = lambda ep, r: np.zeros_like(r)       # ToDo: Not (yet) implemented
+
+
+
+
+class L_RBF_derivative(Kernel):
+    # L = d/dx, i.e. only first derivative
+
+    @abstractmethod
+    def __init__(self):
+        super(L_RBF_derivative, self).__init__()
+
+    def eval(self, x, y):
+        return self.rbf(self.ep, distance_matrix(np.atleast_2d(x), np.atleast_2d(y)))
+
+    def diagonal(self, X):
+        return np.ones(X.shape[0]) * self.rbf(self.ep, 0)
+
+    def __str__(self):
+        return self.name + ' [gamma = %2.2e]' % self.ep
+
+    def set_params(self, par):
+        self.ep = par
+
+    def dd_diagonal(self, X):
+        # Evaluation of (L^x - shift) (L^y - shift) k(x,y) for x=y: Likely some constant
+
+        return np.ones(X.shape[0]) * self.dd_eval(np.zeros((1, 1)), np.zeros((1, 1))).item()
+
+    def dd_eval(self, x, y):
+        # Evaluation of (L^x - shift) (L^y - shift) k(x,y)
+
+        array_dists = distance_matrix(np.atleast_2d(x), np.atleast_2d(y))
+
+        return - self.rbf2(self.ep, array_dists)
+
+    def d2_eval(self, x, y):
+        # Evaluation of (L^z - shift * Id) k(*,z)|_{x,y}
+        # Required exactly like this for "compute the nth basis", see also 14.12.21\A1
+
+        array_dists = distance_matrix(np.atleast_2d(x), np.atleast_2d(y))
+        array_x_minus_y = np.atleast_2d(x) - np.atleast_2d(y).transpose()
+
+        # Introduce minus since derivative wrt y instead of x
+        array_result = - np.sign(array_x_minus_y) * self.rbf1(self.ep, array_dists)
+
+        return array_result
+
+    def d1_eval(self, x, y):
+        # Evaluation of (L - shift * Id) k(*,y)|_{x}
+        # Required exactly like this for "compute the nth basis", see also 14.12.21\A1
+
+        # Need a minus here since first derivative depends (in sign) on whether x or y
+        return - self.d2_eval(x, y)
+
+    def mixed_L_eval(self, x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # L evaluations in x of Riesz representers which are located in ctrs.
+        # list_ind_L, list_ind_k and list_ind_n indicate to which set of functionals the Riesz
+        # representer belong to:
+        # y1 := y[list_ind_L, :] corresponds to the interior, y2 := y[list_ind_k, :] to Dirichlet
+        # boundary centers and y3 := y[list_ind_n, :] to Neumann boundary centers.
+        # Different eval-methods need to be used with respect to y1, y2, y3.
+
+        x = np.atleast_2d(x)
+        ctrs = np.atleast_2d(ctrs)
+
+        array_result = np.zeros((x.shape[0], ctrs.shape[0]))
+
+        array_result[:, list_ind_L] = self.dd_eval(x, ctrs[list_ind_L])
+        array_result[:, list_ind_k] = self.d1_eval(x, ctrs[list_ind_k])
+        array_result[:, list_ind_n] = self.mixed_L_n(x, ctrs[list_ind_n], n_ctrs[list_ind_n])
+
+        return array_result
+
+    def mixed_k_eval(self, x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # Point evaluations in x of Riesz representers which are located in ctrs.
+        # list_ind_L, list_ind_k and list_ind_n indicate to which set of functionals the Riesz
+        # representer belong to:
+        # y1 := y[list_ind_L, :] corresponds to the interior, y2 := y[list_ind_k, :] to Dirichlet
+        # boundary centers and y3 := y[list_ind_n, :] to Neumann boundary centers.
+        # Different eval-methods need to be used with respect to y1, y2, y3.
+
+        x = np.atleast_2d(x)
+        ctrs = np.atleast_2d(ctrs)
+
+        array_result = np.zeros((x.shape[0], ctrs.shape[0]))
+
+        array_result[:, list_ind_L] = self.d2_eval(x, ctrs[list_ind_L])
+        array_result[:, list_ind_k] = self.eval(x, ctrs[list_ind_k])
+        array_result[:, list_ind_n] = self.mixed_k_n(x, ctrs[list_ind_n], n_ctrs[list_ind_n])
+
+        return array_result
+
+    def mixed_n_eval(self, x, n_x, ctrs, n_ctrs, list_ind_L, list_ind_k, list_ind_n):
+        # not required therefore just return zeros
+
+        return np.zeros((x.shape[0], ctrs.shape[0]))
+
+    def nn_diagonal(self, X):
+        # not required therefore just return zeros
+
+        return np.zeros(X.shape[0])
+
+    def mixed_L_n(self, x, y, n_y):
+        # not required therefore just return zeros
+
+        return np.zeros((x.shape[0], y.shape[0]))
+
+    def mixed_k_n(self, x, y, n_y):
+        # not required, therefore just return zeros
+
+        return np.zeros((x.shape[0], y.shape[0]))
+
+    def mixed_n_n(self, x, y, n_x, n_y):
+        # not required, therefore just return zeros
+
+        return np.zeros((x.shape[0], y.shape[0]))
+
+
+class Gaussian_derivative(L_RBF_derivative):
+    # # Gaussian kernel phi(r) = exp(-r^2),
+    # used in conjunction with L = d/dx in 1D
+
+    def __init__(self, dim, shift=0, ep=1):
+        super().__init__()
+        self.dim = dim
+        self.shift = shift
+        self.ep = 1     # ToDo: Generalize this one!!!
+        self.name = 'gauss'
+
+        # Implement not only the rbf, but also its derivatives
+        self.rbf = lambda ep, r: np.exp(-r**2)
+        self.rbf1 = lambda ep, r: -2 * r * np.exp(-r**2)
+        self.rbf2 = lambda ep, r: (4 * r ** 2 - 2) * np.exp(-r**2)
+
+
+class LinearMatern_derivative(L_RBF_derivative):
+    # used in conjunction with L = d/dx in 1D
+
+    def __init__(self, dim, shift=0, ep=1):
+        super().__init__()
+        self.dim = dim
+        self.shift = shift
+        self.ep = 1     # ToDo: Generalize this one!!!
+        self.name = 'quadratic Matern'
+
+        # Implement not only the rbf, but also its derivatives
+        self.rbf = lambda ep, r:  np.exp(-r) * (1 + r)
+        self.rbf1 = lambda ep, r: - np.exp(-r) * r
+        self.rbf2 = lambda ep, r: np.exp(-r) * (r - 1)
+
+class QuadraticMatern_derivative(L_RBF_derivative):
+    # used in conjunction with L = d/dx in 1D
+
+    def __init__(self, dim, shift=0, ep=1):
+        super().__init__()
+        self.dim = dim
+        self.shift = shift
+        self.ep = 1     # ToDo: Generalize this one!!!
+        self.name = 'quadratic Matern'
+
+        # Implement not only the rbf, but also its derivatives
+        self.rbf = lambda ep, r: np.exp(-r) * (3 + 3 * r + r ** 2)
+        self.rbf1 = lambda ep, r: np.exp(-r) * (-r) * (r+1)
+        self.rbf2 = lambda ep, r: np.exp(-r) * (r ** 2 - r - 1)
+
+
+
+class CubicMatern_derivative(L_RBF_derivative):
+    # used in conjunction with L = d/dx in 1D
+
+    def __init__(self, dim, shift=0, ep=1):
+        super().__init__()
+        self.dim = dim
+        self.shift = shift
+        self.ep = 1     # ToDo: Generalize this one!!!
+        self.name = 'cubic Matern'
+
+        # Implement not only the rbf, but also its derivatives
+        self.rbf = lambda ep, r: np.exp(-r) * (15 + 15 * r + 6 * r ** 2 + r ** 3)
+        self.rbf1 = lambda ep, r: np.exp(-r) * (-r) * (r ** 2 + 3 * r + 3)
+        self.rbf2 = lambda ep, r: np.exp(-r) * (r ** 3 - 3 * r - 3)
 
 
 
diff --git a/vkoga_pde/vkoga_PDE.py b/vkoga_pde/vkoga_PDE.py
index 534434682dacf791e1e9af8ef5611810d242dac8..9c0c341b8cdbcc40bb4342f7bbbec101db8a7e96 100644
--- a/vkoga_pde/vkoga_PDE.py
+++ b/vkoga_pde/vkoga_PDE.py
@@ -1,5 +1,6 @@
 # First approach to develope code for greedy PDE.
 # ToDo: Not sure whether vector valued output makes sense, but let's try to implement it ..
+# ToDo: Remove reg_par !! Does not really make sense for PDE approximation, the RHS is exact!
 
 from vkoga_pde.kernels_PDE import wendland32_laplace
 import numpy as np
@@ -13,7 +14,9 @@ class VKOGA_PDE(BaseEstimator):
     def __init__(self, kernel=wendland32_laplace(dim=2), kernel_par=1,
                  verbose=True, n_report=10,
                  greedy_type='p_greedy', reg_par=0, restr_par=0,
-                 tol_f=1e-10, tol_p=1e-10, beta=None):
+                 tol_f=1e-10, tol_p=1e-10, beta=None, weight_dirichlet=1):
+
+        self.weight_dirichlet = weight_dirichlet
 
         # Set the verbosity on/off
         self.verbose = verbose
@@ -34,6 +37,8 @@ class VKOGA_PDE(BaseEstimator):
                 self.beta = 1.0
             elif self.greedy_type == 'p_greedy':
                 self.beta = 0.0
+            elif self.greedy_type == 'random':
+                self.beta = 1.0         # use any value, should not matter!
         else:
             self.beta = float(beta)
             if self.beta == 1.0:
@@ -41,7 +46,7 @@ class VKOGA_PDE(BaseEstimator):
             elif self.beta == 0.0:
                 self.greedy_type = 'p_greedy'
             else:
-                self.greedy_type = 'beta'
+                self.greedy_type = '{}'.format(beta)
 
         # ToDo: Did never think about the regularization parameter ..
         assert reg_par == 0, 'reg_par > 0 might not be implemented correctly!'
@@ -55,7 +60,8 @@ class VKOGA_PDE(BaseEstimator):
         # Some further settings
         self.ctrs_ = None  # centers for the interior
         self.ind_ctrs1 = []  # self.ctrs1_ = self.ctrs_[self.indI_ctrs1], i.e. interior
-        self.ind_ctrs2 = []  # self.ctrs2_ = self.ctrs_[self.indI_ctrs2], i.e. boundary
+        self.ind_ctrs2 = []  # self.ctrs2_ = self.ctrs_[self.indI_ctrs2], i.e. boundary Dirichlet
+        self.ind_ctrs3 = []  # self.ctrs3_ = self.ctrs_[self.indI_ctrs3], i.e. boundary Neuman
 
         self.Cut_ = None
         self.c = None
@@ -63,161 +69,294 @@ class VKOGA_PDE(BaseEstimator):
         # Initialize the convergence history
         self.train_hist = {}
         self.train_hist['n'] = []
-        self.train_hist['f'] = []
-        self.train_hist['f1'] = []
-        self.train_hist['f2'] = []
+        self.train_hist['f'] = []           # max residual (squared)
+        self.train_hist['f sol'] = []       # max residual (non-squared)
+        self.train_hist['f1'] = []          # max residual inside with L
+        self.train_hist['f2'] = []          # max residual boundary Dirichlet
+        self.train_hist['f3'] = []          # max residual boundary Neumann
         self.train_hist['p'] = []
-        self.train_hist['p1'] = []
-        self.train_hist['p2'] = []
+        self.train_hist['p1'] = []          # max Power function inside with L
+        self.train_hist['p2'] = []          # max Power function boundary Dirichlet
+        self.train_hist['p3'] = []          # max Power function boundary Neumann
+        self.train_hist['p2_interior'] = []
         self.train_hist['p selected'] = []  # list of selected power vals
         self.train_hist['f val'] = []
         self.train_hist['p val'] = []
 
         # Initialize variables for the maximal kernel diagonal value in the interior and the boundary
-        # ToDo: So far only f-greedy and P-greedy is implemented in a meaningful way,
-        #  for all other \beta values the weighting of the power function (via the kernel
-        #  diagonal values) is not implemented.
         self.max_kernel_diag1 = None
         self.max_kernel_diag2 = None
+        self.max_kernel_diag3 = None
 
-    def selection_rule(self, f1, f2, p1, p2):
-        # Generalization of selection rule: We have residual values and power function values
-        # for both the interior (f1, p1) as well as for the boundary (p1, p2).
-
-        # ToDo: So far only f-greedy and P-greedy is implemented in a meaningful way,
-        #  for all other \beta values the weighting of the power function (via the kernel
-        #  diagonal values) is not implemented.
-
-        if self.restr_par > 0:
-            # ToDo: Not sure whether this will really work, restr_idx might be empty
+        self.flag_neumann = None
 
-            p_ = max([np.max(p1), np.max(p2)])
-            restr_idx1 = np.nonzero(p1 >= self.restr_par * p_)[0]
-            restr_idx2 = np.nonzero(p2 >= self.restr_par * p_)[0]
-        else:
-            restr_idx1 = np.arange(len(p1))
-            restr_idx2 = np.arange(len(p2))
+    def selection_rule(self, f1, p1, f2, p2, f3, p3, flag_select_dirichlet=False):
+        # Generalization of selection rule: We have residual values and power function values
+        # for both the interior (f1, p1) as well as for the boundary (Dirichlet + Neumann) (f2, f3, p2, p3).
+
+        # if self.restr_par > 0:
+        #     # ToDo: Not sure whether this will really work, restr_idx might be empty
+        #
+        #     p_ = max([np.max(p1), np.max(p2), np.max(p3)])
+        #     restr_idx1 = np.nonzero(p1 >= self.restr_par * p_)[0]
+        #     restr_idx2 = np.nonzero(p2 >= self.restr_par * p_)[0]
+        #     restr_idx3 = np.nonzero(p3 >= self.restr_par * p_)[0]
+        # else:
+        restr_idx1 = np.arange(len(p1))
+        restr_idx2 = np.arange(len(p2))
+        restr_idx3 = np.arange(len(p3))
+
+        # Ensure that power function does not have negative values
+        p1 = np.maximum(p1, 0, p1)
+        p2 = np.maximum(p2, 0, p2)
+        p3 = np.maximum(p3, 0, p3)
 
         f1 = np.sum(f1 ** 2, axis=1)
         f2 = np.sum(f2 ** 2, axis=1)
+        f3 = np.sum(f3 ** 2, axis=1)
+
+        if flag_select_dirichlet:
+            f1 = 0 * f1
+            f3 = 0 * f3
 
         # Case distinction required for fp_greedy
         if self.greedy_type == 'fp_greedy':
-            array_weighted1 = f1[restr_idx1] / p1[restr_idx1]
-            idx1 = np.argmax(array_weighted1)
+            # Extra implementation for f/P greedy because it's a limit case
 
+            # Points in the interior
+            if len(f1[restr_idx1]) != 0:    
+                array_weighted1 = f1[restr_idx1] / p1[restr_idx1]
+                idx1 = np.argmax(array_weighted1)
+                array_weighted1_max = array_weighted1[idx1]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted1_max = 0.0
+
+            # Dirichlet boundary
             if len(f2[restr_idx2]) != 0:  # required especially in 1D
                 array_weighted2 = f2[restr_idx2] / p2[restr_idx2]
                 idx2 = np.argmax(array_weighted2)
-
-                if array_weighted1[idx1] > array_weighted2[idx2]:
-                    one_or_two = 1
-                    idx = restr_idx1[idx1]
-                else:
-                    one_or_two = 2
-                    idx = restr_idx2[idx2]
+                array_weighted2_max = array_weighted2[idx2]
             else:
-                one_or_two = 1
-                idx = restr_idx1[idx1]
+                # Set value to zero, such that it is the smallest possible
+                array_weighted2_max = 0.0
+
+            # Neumann boundary
+            if len(f3[restr_idx3]) != 0:  # required especially in 1D
+                array_weighted3 = f3[restr_idx3] / p3[restr_idx3]
+                idx3 = np.argmax(array_weighted3)
+                array_weighted3_max = array_weighted3[idx3]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted3_max = 0.0
+
         elif self.greedy_type == 'p_greedy':
-            # extra implementation for P-greedy to address weighting of powerfunc (interior, boundary)
-            array_weighted1 = p1[restr_idx1]
-            idx1 = np.argmax(array_weighted1)
+            # Extra implementation for P-greedy to address weighting of powerfunc (interior, boundary)
 
+            # Points in the interior
+            if len(f1[restr_idx1]) != 0:    
+                array_weighted1 = p1[restr_idx1] / self.max_kernel_diag1
+                idx1 = np.argmax(array_weighted1)
+                array_weighted1_max = array_weighted1[idx1]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted1_max = 0.0
+
+            # Dirichlet boundary
             if len(f2[restr_idx2]) != 0:  # required especially in 1D
-                array_weighted2 = p2[restr_idx2]
+                array_weighted2 = p2[restr_idx2] / self.max_kernel_diag2
                 idx2 = np.argmax(array_weighted2)
+                array_weighted2_max = self.weight_dirichlet * array_weighted2[idx2]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted2_max = 0.0
+
+            # Neumann boundary
+            if len(f3[restr_idx3]) != 0:  # required especially in 1D
+                array_weighted3 = p3[restr_idx3] / self.max_kernel_diag3
+                idx3 = np.argmax(array_weighted3)
+                array_weighted3_max = array_weighted3[idx3]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted3_max = 0.0
 
-                # Pay attention at the weighting of the power functions
-                if array_weighted1[idx1] / self.max_kernel_diag1 \
-                        > array_weighted2[idx2] / self.max_kernel_diag2:
-                    one_or_two = 1
-                    idx = restr_idx1[idx1]
-                else:
-                    one_or_two = 2
-                    idx = restr_idx2[idx2]
+        elif self.greedy_type == 'random':
+            # Copied from below (beta-greedy) and argmax replaced by randint
+
+            if len(f1[restr_idx1]) != 0:   
+                array_weighted1 = f1[restr_idx1] ** self.beta * p1[restr_idx1] ** (1 - self.beta)
+                # idx1 = np.argmax(array_weighted1)
+                idx1 = np.random.randint(len(array_weighted1))
+                array_weighted1_max = array_weighted1[idx1]
             else:
-                one_or_two = 1
-                idx = restr_idx1[idx1]
+                # Set value to zero, such that it is the smallest possible
+                array_weighted1_max = 0.0
+
+            # Dirichlet boundary
+            if len(f2[restr_idx2]) != 0:  # required especially in 1D
+                array_weighted2 = f2[restr_idx2] ** self.beta * p2[restr_idx2] ** (1 - self.beta)
+                # idx2 = np.argmax(array_weighted2)
+                idx2 = np.random.randint(len(array_weighted2))
+                array_weighted2_max = self.weight_dirichlet * array_weighted2[idx2]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted2_max = 0.0
+
+            # Neumann boundary
+            if len(f3[restr_idx3]) != 0:  # required especially in 1D
+                array_weighted3 = f3[restr_idx3] ** self.beta * p3[restr_idx3] ** (1 - self.beta)
+                # idx3 = np.argmax(array_weighted3)
+                idx3 = np.random.randint(len(array_weighted3))
+                array_weighted3_max = array_weighted3[idx3]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted3_max = 0.0
 
         else:
-            array_weighted1 = f1[restr_idx1] ** self.beta * p1[restr_idx1] ** (1 - self.beta)
-            idx1 = np.argmax(array_weighted1)
+            # Points in the interior
+            if len(f1[restr_idx1]) != 0:    
+                array_weighted1 = f1[restr_idx1] ** self.beta * p1[restr_idx1] ** (1 - self.beta)
+                idx1 = np.argmax(array_weighted1)
+                array_weighted1_max = array_weighted1[idx1]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted1_max = 0.0
 
+            # Dirichlet boundary
             if len(f2[restr_idx2]) != 0:  # required especially in 1D
                 array_weighted2 = f2[restr_idx2] ** self.beta * p2[restr_idx2] ** (1 - self.beta)
                 idx2 = np.argmax(array_weighted2)
-
-                if array_weighted1[idx1] > array_weighted2[idx2]:
-                    one_or_two = 1
-                    idx = restr_idx1[idx1]
-                else:
-                    one_or_two = 2
-                    idx = restr_idx2[idx2]
+                array_weighted2_max = self.weight_dirichlet * array_weighted2[idx2]
             else:
-                one_or_two = 1
-                idx = restr_idx1[idx1]
+                # Set value to zero, such that it is the smallest possible
+                array_weighted2_max = 0.0
+
+            # Neumann boundary
+            if len(f3[restr_idx3]) != 0:  # required especially in 1D
+                array_weighted3 = f3[restr_idx3] ** self.beta * p3[restr_idx3] ** (1 - self.beta)
+                idx3 = np.argmax(array_weighted3)
+                array_weighted3_max = array_weighted3[idx3]
+            else:
+                # Set value to zero, such that it is the smallest possible
+                array_weighted3_max = 0.0
+
+        # Check where maximum is attained and set correspondingly
+        list_max = [1 * array_weighted1_max, 1 * array_weighted2_max, 1 * array_weighted3_max]
+        one_two_or_three = np.argmax(np.array(list_max)) + 1      # 1 = interior, 2 = Dirichlet, 3 = Neumann
+
+        if one_two_or_three == 1:
+            idx = restr_idx1[idx1]
+        elif one_two_or_three == 2:
+            idx = restr_idx2[idx2]
+        elif one_two_or_three == 3:
+            idx = restr_idx3[idx3]
+
+        # The following implementation is quite laborious
+        if len(f1[restr_idx1]) != 0:
+            if self.greedy_type == 'p_greedy':
 
-        # ToDo: This implementation is quite laborious ..
-        if self.greedy_type == 'p_greedy':
-            f_max1, p_max1 = np.max(f1), np.max(p1) / self.max_kernel_diag1
+                f_max1, p_max1 = np.max(f1), np.max(p1) / self.max_kernel_diag1
+            else:
+                f_max1, p_max1 = np.max(f1), np.max(p1)
         else:
-            f_max1, p_max1 = np.max(f1), np.max(p1)
+            f_max1, p_max1 = 0.0, 0.0
 
-        if len(f2[restr_idx2]) != 0:
+        # Dirichlet boundary
+        if len(f2[restr_idx2]) != 0:  # required especially in 1D
             if self.greedy_type == 'p_greedy':
                 f_max2, p_max2 = np.max(f2), np.max(p2) / self.max_kernel_diag2
             else:
                 f_max2, p_max2 = np.max(f2), np.max(p2)
         else:
-            f_max2, p_max2 = 0, 0
+            f_max2, p_max2 = 0.0, 0.0
 
-        return (one_or_two, idx), f_max1, f_max2, p_max1, p_max2
+        # Neumann boundary
+        if len(f3[restr_idx3]) != 0:  # required especially in 1D
+            if self.greedy_type == 'p_greedy':
+                f_max3, p_max3 = np.max(f3), np.max(p3) / self.max_kernel_diag3
+            else:
+                f_max3, p_max3 = np.max(f3), np.max(p3)
+        else:
+            f_max3, p_max3 = 0., 0.0
 
-    def fit(self, X1, y1, X2, y2,
-            X1_val=None, y1_val=None, X2_val=None, y2_val=None, maxIter=None):
-        # Points in X1 are considered as interior points, points in X2 as boundary points.
-        # They can coincide (partly), but for X1 we use the L-evaluation, for X2 we use the point evaluation.
+        return (one_two_or_three, idx), f_max1, f_max2, f_max3, p_max1, p_max2, p_max3
 
-        # Compute the maximal kernel diagonal value in the interior and the boundary
-        self.max_kernel_diag2 = np.max(self.kernel.diagonal(X2))
-        self.max_kernel_diag1 = np.max(self.kernel.dd_diagonal(X1))
+    def fit(self, X1, y1, X2, y2, X3=None, y3=None, n_X3=None,
+            X1_val=None, y1_val=None, X2_val=None, y2_val=None, X3_val=None, y3_val=None, n_X3_val=None,
+            maxIter=100, y1_sol=None):
+        # ToDo: Tell the user what to do in case there are either no Dirichlet or no Neumann
+        # ToDo: boundary points.
+
+        # Points in X1 are considered as interior points, points in X2 as Dirichlet boundary points
+        # and points in X3 are Neumann boundary points (whereby n3 are the normalized normal vectors).
+        # They can coincide (partly), but for X1 we use the L-evaluation, for X2 we use the point
+        # evaluation and for X3 we use evaluation of the normal derivative.
+        # y1_sol is optional and contains the solution values at the points X1 - this is usefull to
+        # track the convergence to the true solution in case it is known.
 
         list_ctrs = []  # not that beautiful, but required to store the final centers
 
         # Check the datasets
         X1, y1 = check_X_y(X1, y1, multi_output=True)
-        X2, y2 = check_X_y(X2, y2, multi_output=True)  # ToDo: Check for consistency of X1, X2, X1_val, X2_val
-
-        if len(y1.shape) == 1:
-            y1 = y1[:, None]
-        if len(y2.shape) == 1:
-            y2 = y2[:, None]
+        X2, y2 = check_X_y(X2, y2, multi_output=True)
+        # ToDo: Not sure whether this work properly if either X2 or X3 is empty
 
         # Check the validation datasets (if provided, otherwise set to None)
-        if X1_val is None or y1_val is None or X2_val is None or y2_val is None:
+        if X1_val is None or y1_val is None \
+                or X2_val is None or y2_val is None\
+                or X3_val is None or y3_val is None:
             X1_val = None
             y1_val = None
             X2_val = None
             y2_val = None
+            X3_val = None
+            y3_val = None
             self.flag_val = False
         else:
             self.flag_val = True
             X1_val, y1_val = check_X_y(X1_val, y2_val, multi_output=True)
             X2_val, y2_val = check_X_y(X2_val, y2_val, multi_output=True)
+            X3_val, y3_val = check_X_y(X3_val, y3_val, multi_output=True)
             # We will assume in the following that X_val and X are disjoint
 
             if len(y1_val.shape) == 1:
                 y1_val = y1_val[:, None]
             if len(y2_val.shape) == 1:
                 y2_val = y2_val[:, None]
+            if len(y3_val.shape) == 1:
+                y3_val = y3_val[:, None]
+
+        if X3 is None or y3 is None or n_X3 is None:
+            X3 = np.zeros((0, X1.shape[1]))
+            y3 = np.zeros((0, X1.shape[1]))
+            n_X3 = np.zeros((0, X1.shape[1]))
+            self.flag_neumann = False
+        else:
+            X3, y3 = check_X_y(X3, y3, multi_output=True)
+            self.flag_neumann = True
+
+        if y1_sol is not None:
+            y1_sol = check_array(y1_sol)
+
+        if len(y1.shape) == 1:
+            y1 = y1[:, None]
+        if len(y2.shape) == 1:
+            y2 = y2[:, None]
+        if len(y3.shape) == 1:
+            y3 = y3[:, None]
+
+        # Compute the maximal kernel diagonal value in the interior and the boundary
+        self.max_kernel_diag1 = np.max(self.kernel.dd_diagonal(X1))
+        self.max_kernel_diag2 = np.max(self.kernel.diagonal(X2))
+        if self.flag_neumann:
+            self.max_kernel_diag3 = np.max(self.kernel.nn_diagonal(X3))
 
         # Check whether already fitted
         if self.ctrs_ is None:
             self.ctrs_ = np.zeros((0, X1.shape[1]))
             self.Cut_ = np.zeros((0, 0))
             self.c = np.zeros((0, y1.shape[1]))
-
+            self.n_ctrs_ = np.zeros((maxIter, X1.shape[1]))       # stores normal vectors (only for Neumann ctrs)
             # ToDo: If self.ctrs_ is not None, does this work?
 
         # Check whether "new X1" and previously chosen centers overlap
@@ -243,28 +382,44 @@ class VKOGA_PDE(BaseEstimator):
                     list_truly_new_X2.append(idx_x)
         else:
             list_truly_new_X2 = list(range(X2.shape[0]))
-
         X2 = X2[list_truly_new_X2, :]
         y2 = y2[list_truly_new_X2, :]
 
+        # Check whether "new X3" and previously chosen centers overlap
+        if self.flag_neumann:
+            list_truly_new_X3 = []
+            if self.ctrs_.shape[0] > 0:
+                for idx_x in range(X3.shape[0]):
+                    if min(np.linalg.norm(self.ctrs_ - X3[idx_x, :], axis=1)) < 1e-10:
+                        continue
+                    else:
+                        list_truly_new_X3.append(idx_x)
+            else:
+                list_truly_new_X3 = list(range(X3.shape[0]))
+            X3 = X3[list_truly_new_X3, :]
+            y3 = y3[list_truly_new_X3, :]
+
         # Initialize the residual and update the given y values by substracting the current model
         y1 = y1 - self.predict_Ls(X1)
         y2 = y2 - self.predict_s(X2)
+        if self.flag_neumann:
+            y3 = y3 - self.predict_ns(X3, n_X3)
 
-        if self.flag_val:
-            y1_val = y1_val - self.predict_Ls(X1_val)
-            y2_val = y2_val - self.predict_s(X2_val)
+        # if self.flag_val:
+        #     y1_val = y1_val - self.predict_Ls(X1_val)
+        #     y2_val = y2_val - self.predict_s(X2_val)
+        #     y3_val = y3_val - self.predict_ns(X3_val, n_X3_val)
 
         # Get the data dimension
-        N, q = y1.shape[0] + y2.shape[0], y1.shape[1]
-        N1, N2 = y1.shape[0], y2.shape[0]
-        if self.flag_val:
-            N_val = y1_val.shape[0] + y2_val.shape[0]
-            N1_val, N2_val = y1_val.shape[0], y2_val.shape[0]
+        N, q = y1.shape[0] + y2.shape[0] + y3.shape[0], y1.shape[1]
+        N1, N2, N3 = y1.shape[0], y2.shape[0], y3.shape[0]
+        # if self.flag_val:
+        #     N_val = y1_val.shape[0] + y2_val.shape[0] + y3_val.shape[0]
+        #     N1_val, N2_val, N3_val = y1_val.shape[0], y2_val.shape[0], y3_val.shape[0]
 
         # Set maxIter_continue
-        if maxIter is None or maxIter > N:
-            self.maxIter = min([N, 100])
+        if maxIter > N:
+            self.maxIter = N
         else:
             self.maxIter = maxIter
 
@@ -275,60 +430,79 @@ class VKOGA_PDE(BaseEstimator):
             self.restr_par = 0
 
         # Initialize list for the chosen and non-chosen indices - seperately for interior and boundary
-        indI1_, indI2_ = [], []
-        notIndI1, notIndI2 = list(range(y1.shape[0])), list(range(y2.shape[0]))
+        indI1_, indI2_, indI3_ = [], [], []
+        notIndI1, notIndI2, notIndI3 = list(range(y1.shape[0])), list(range(y2.shape[0])), list(range(y3.shape[0]))
         c = np.zeros((self.maxIter, q))
 
         # Compute the Newton basis values (related to the old centers) on the new X
-        # ToDo: I think restart training etc does not work properly, since it is not debugged propely!!
-        #  Thus the following lines of codes may be irrelevant
-        Vx_new_X1_old_ctrs = self.kernel.mixed_eval(X1, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-        Vx_new_X2_old_ctrs = self.kernel.mixed_eval(X2, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-        if self.flag_val:
-            Vx_val_new_X1_old_ctrs = self.kernel.mixed_eval(
-                X1_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-            Vx_val_new_X2_old_ctrs = self.kernel.mixed_eval(
-                X2_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
+        Vx_new_X1_old_ctrs = self.kernel.mixed_k_eval(
+            X1, self.ctrs_, self.n_ctrs_, self.ind_ctrs1, self.ind_ctrs2, self.ind_ctrs3) @ self.Cut_.transpose()
+        Vx_new_X2_old_ctrs = self.kernel.mixed_k_eval(
+            X2, self.ctrs_, self.n_ctrs_, self.ind_ctrs1, self.ind_ctrs2, self.ind_ctrs3) @ self.Cut_.transpose()
+        # if self.flag_val:
+        #     Vx_val_new_X1_old_ctrs = self.kernel.mixed_k_eval(
+        #         X1_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
+        #     Vx_val_new_X2_old_ctrs = self.kernel.mixed_k_eval(
+        #         X2_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
+
 
         # Compute the application of L to the Newton basis values (related to the old centers) on the new X
-        LVx_new_X1_old_ctrs = self.kernel.mixed_eval_L(X1, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-        LVx_new_X2_old_ctrs = self.kernel.mixed_eval_L(X2, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-        if self.flag_val:
-            LVx_val_new_X1_old_ctrs = self.kernel.mixed_eval_L(
-                X1_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-            LVx_val_new_X2_old_ctrs = self.kernel.mixed_eval_L(
-                X2_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
-
-        # Initialize arrays for the (L-evaluation of the) Newton basis values ..
+        LVx_new_X1_old_ctrs = self.kernel.mixed_L_eval(
+            X1, self.ctrs_, self.n_ctrs_, self.ind_ctrs1, self.ind_ctrs2, self.ind_ctrs3) @ self.Cut_.transpose()
+        # if self.flag_val:
+        #     LVx_val_new_X1_old_ctrs = self.kernel.mixed_L_eval(
+        #         X1_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
+        #     LVx_val_new_X2_old_ctrs = self.kernel.mixed_L_eval(
+        #         X2_val, self.ctrs_, self.ind_ctrs1) @ self.Cut_.transpose()
+
+        # Compute the Neumann derivatives of the Newton basis values (related to the old centers) on the new X
+        if self.flag_neumann:
+            nVx_new_X3_old_ctrs = self.kernel.mixed_n_eval(
+                X3, n_X3, self.ctrs_, self.n_ctrs_, self.ind_ctrs1, self.ind_ctrs2, self.ind_ctrs3) @ self.Cut_.transpose()
+        else:
+            nVx_new_X3_old_ctrs = np.zeros((0, self.ctrs_.shape[0]))
+
+
+        # Initialize arrays for the (L-, n-evaluation of the) Newton basis values ..
         # .. (related to the new centers) on the new X1, X2
         Vx1 = np.zeros((N1, self.maxIter))
         Vx2 = np.zeros((N2, self.maxIter))
+
         LVx1 = np.zeros((N1, self.maxIter))
-        LVx2 = np.zeros((N2, self.maxIter))
+        nVx3 = np.zeros((N3, self.maxIter))
 
-        if self.flag_val:
-            Vx1_val = np.zeros((N1_val, self.maxIter))
-            Vx2_val = np.zeros((N2_val, self.maxIter))
-            LVx1_val = np.zeros((N1_val, self.maxIter))
-            LVx2_val = np.zeros((N2_val, self.maxIter))
+        # if self.flag_val:
+        #     Vx1_val = np.zeros((N1_val, self.maxIter))
+        #     Vx2_val = np.zeros((N2_val, self.maxIter))
+        #     LVx1_val = np.zeros((N1_val, self.maxIter))
+        #     LVx2_val = np.zeros((N2_val, self.maxIter))
 
         # Compute the powervals on X1, X2 and X1_val, X2_val
+        # ToDo: Vielleicht quadrieren?
         p1 = self.kernel.dd_diagonal(X1) + self.reg_par
         p1 = p1 - np.sum(LVx_new_X1_old_ctrs ** 2, axis=1)
 
         p2 = self.kernel.diagonal(X2) + self.reg_par
         p2 = p2 - np.sum(Vx_new_X2_old_ctrs ** 2, axis=1)
 
-        if self.flag_val:
-            p1_val = self.kernel.dd_diagonal(X1_val) + self.reg_par
-            p1_val = p1_val - np.sum(LVx_val_new_X1_old_ctrs ** 2, axis=1)
+        p3 = self.kernel.nn_diagonal(X3) + self.reg_par
+        p3 = p3 - np.sum(nVx_new_X3_old_ctrs ** 2, axis=1)
+
 
-            p2_val = self.kernel.diagonal(X2_val) + self.reg_par
-            p2_val = p2_val - np.sum(Vx_val_new_X2_old_ctrs ** 2, axis=1)
+        # The following is a newly implemented quantity which is interesting for 1D
+        p2_interior = self.kernel.diagonal(X1) + self.reg_par
+        p2_interior = p2_interior - np.sum(Vx_new_X1_old_ctrs ** 2, axis=1)
+
+        # if self.flag_val:
+        #     p1_val = self.kernel.dd_diagonal(X1_val) + self.reg_par
+        #     p1_val = p1_val - np.sum(LVx_val_new_X1_old_ctrs ** 2, axis=1)
+        #
+        #     p2_val = self.kernel.diagonal(X2_val) + self.reg_par
+        #     p2_val = p2_val - np.sum(Vx_val_new_X2_old_ctrs ** 2, axis=1)
 
         # Extend Cut_ matrix, i.e. continue to build on old self.Cut_ matrix
-        N_ctrs_so_far, N_ctrs_so_far1, N_ctrs_so_far2 = \
-            self.Cut_.shape[0], len(self.ind_ctrs1), len(self.ind_ctrs2)
+        N_ctrs_so_far, N_ctrs_so_far1, N_ctrs_so_far2, N_ctrs_so_far3 = \
+            self.Cut_.shape[0], len(self.ind_ctrs1), len(self.ind_ctrs2), len(self.ind_ctrs3)
 
         Cut_ = np.zeros((N_ctrs_so_far + self.maxIter, N_ctrs_so_far + self.maxIter))
         Cut_[:N_ctrs_so_far, :N_ctrs_so_far] = self.Cut_
@@ -336,161 +510,239 @@ class VKOGA_PDE(BaseEstimator):
         # Iterative selection of new points
         self.print_message('begin')
 
-        n1, n2 = 0, 0  # number of centers in interior or boundary
+        n1, n2, n3 = 0, 0, 0  # number of centers in interior or boundary (Dirichlet, Neumann)
         for n in range(self.maxIter):
             # prepare
             self.train_hist['n'].append(self.ctrs_.shape[0] + n + 1)
-            self.train_hist['f'].append([])
+
+            self.train_hist['f sol'].append([])     # tracking of approx of solution (if solution values are provided)
+            self.train_hist['f'].append([])         # max of f1, f2, f3 below
             self.train_hist['f1'].append([])
             self.train_hist['f2'].append([])
-            self.train_hist['p'].append([])
-            self.train_hist['p1'].append([])
-            self.train_hist['p2'].append([])
-            self.train_hist['p selected'].append([])
-            if self.flag_val:
-                self.train_hist['p val'].append([])
-                self.train_hist['f val'].append([])
+            self.train_hist['f3'].append([])
+
+            self.train_hist['p'].append([])         # max of p1, p2, p3 below
+            self.train_hist['p1'].append([])        # power function of L evaluation in interior
+            self.train_hist['p2'].append([])        # power function of Dirichlet evaluation on boundary
+            self.train_hist['p3'].append([])        # power function of Neumann evaluation on boundary
+            self.train_hist['p2_interior'].append(np.max(p2_interior))      # novel tracking, interestind in 1D
+            self.train_hist['p selected'].append([])        # selected power val from either p1, p2, p3
+            # if self.flag_val:
+            #     self.train_hist['p val'].append([])
+            #     self.train_hist['f val'].append([])
 
             # select the current index
-            (one_or_two, idx), self.train_hist['f1'][-1], self.train_hist['f2'][-1], \
-            self.train_hist['p1'][-1], self.train_hist['p2'][-1] \
-                = self.selection_rule(y1[notIndI1], y2[notIndI2], p1[notIndI1], p2[notIndI2])
-            self.train_hist['f'][-1] = max(self.train_hist['f1'][-1], self.train_hist['f2'][-1])
-            self.train_hist['p'][-1] = max(self.train_hist['p1'][-1], self.train_hist['p2'][-1])
+            flag_dirichlet = False      # quickly played around a little bit
+            # if n < 40:
+            #     flag_dirichlet = True
+            # else:
+            #     flag_dirichlet = False
+
+            (one_two_or_three, idx), \
+                self.train_hist['f1'][-1], self.train_hist['f2'][-1], self.train_hist['f3'][-1], \
+                self.train_hist['p1'][-1], self.train_hist['p2'][-1], self.train_hist['p3'][-1] \
+                = self.selection_rule(y1[notIndI1], p1[notIndI1], y2[notIndI2], p2[notIndI2], y3[notIndI3], p3[notIndI3],
+                                      flag_select_dirichlet=flag_dirichlet)
+            self.train_hist['f'][-1] = max(self.train_hist['f1'][-1], self.train_hist['f2'][-1], self.train_hist['f3'][-1])
+            self.train_hist['p'][-1] = max(self.train_hist['p1'][-1], self.train_hist['p2'][-1], self.train_hist['p3'][-1])
+
+            # Append residual value (compared to true solution which was provided optionally in y1_sol)
+            if y1_sol is not None:
+                if n == 0:          # zero prediction for empty model
+                    y1_sol_pred = np.zeros_like(y1_sol)
+                else:               # make use of Newton basis elements
+                    y1_sol_pred = Vx1[:, :n] @ c[:n, :]
+
+                self.train_hist['f sol'][-1] = np.max(np.abs(y1_sol - y1_sol_pred))
+
 
             # add the current index and store selected p value and store the selected center
-            if one_or_two == 1:
+            if one_two_or_three == 1:
                 self.train_hist['p selected'][-1] = p1[notIndI1[idx]]
                 indI1_.append(notIndI1[idx])
 
                 self.ind_ctrs1.append(N_ctrs_so_far + n)
                 list_ctrs.append(X1[[notIndI1[idx]], :])
-
-            elif one_or_two == 2:
+            elif one_two_or_three == 2:
                 self.train_hist['p selected'][-1] = p2[notIndI2[idx]]
                 indI2_.append(notIndI2[idx])
 
                 self.ind_ctrs2.append(N_ctrs_so_far + n)
                 list_ctrs.append(X2[[notIndI2[idx]], :])
+            elif one_two_or_three == 3:
+                self.train_hist['p selected'][-1] = p3[notIndI3[idx]]
+                indI3_.append(notIndI3[idx])
 
-            if self.flag_val:
-                self.train_hist['p val'][-1] = max([np.max(p1_val), np.max(p2_val)])
-                self.train_hist['f val'][-1] = max(
-                    [np.max(np.sum(y1_val ** 2, axis=1)), np.max(np.sum(y2_val ** 2, axis=1))])
+                self.ind_ctrs3.append(N_ctrs_so_far + n)
+                list_ctrs.append(X3[[notIndI3[idx]], :])
+
+                # Add normal vector
+                # ToDo: This does not work correctly if there already exist chosen centers
+                self.n_ctrs_[n, :] = n_X3[notIndI3[idx]]
+
+            # if self.flag_val:
+            #     self.train_hist['p val'][-1] = max([np.max(p1_val), np.max(p2_val)])
+            #     self.train_hist['f val'][-1] = max(
+            #         [np.max(np.sum(y1_val ** 2, axis=1)), np.max(np.sum(y2_val ** 2, axis=1))])
 
             # check if the tolerances are reacheded
             if self.train_hist['f'][n] <= self.tol_f or self.train_hist['p'][n] <= self.tol_p:
                 n = n - 1
-                if one_or_two == 1:
+                if one_two_or_three == 1:
                     n1 = n1 - 1
-                elif one_or_two == 2:
+                elif one_two_or_three == 2:
                     n2 = n2 - 1
+                elif one_two_or_three == 3:
+                    n3 = n3 - 1
                 self.print_message('end')
                 break
 
             # compute the nth basis (including normalization), see also 14.12.21\A1
-            if one_or_two == 1:
+            if one_two_or_three == 1:
                 # Computations (this is simply expansion in Newton basis) ..
-                Vx1[notIndI1, n] = self.kernel.d2_eval(X1[notIndI1, :], X1[indI1_[n1], :])[:, 0] \
-                                   - Vx_new_X1_old_ctrs[notIndI1, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
-                                   - Vx1[notIndI1, :] @ LVx1[indI1_[n1], :].transpose()
-                Vx2[notIndI2, n] = self.kernel.d2_eval(X2[notIndI2, :], X1[indI1_[n1], :])[:, 0] \
-                                   - Vx_new_X2_old_ctrs[notIndI2, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
-                                   - Vx2[notIndI2, :] @ LVx1[indI1_[n1], :].transpose()
-
-                LVx1[notIndI1, n] = self.kernel.dd_eval(X1[notIndI1, :], X1[indI1_[n1], :])[:, 0] \
-                                    - LVx_new_X1_old_ctrs[notIndI1, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
-                                    - LVx1[notIndI1, :] @ LVx1[indI1_[n1], :].transpose()
-                LVx2[notIndI2, n] = self.kernel.dd_eval(X2[notIndI2, :], X1[indI1_[n1], :])[:, 0] \
-                                    - LVx_new_X2_old_ctrs[notIndI2, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
-                                    - LVx2[notIndI2, :] @ LVx1[indI1_[n1], :].transpose()
-
-                Vx1[indI1_[n1], n] += self.reg_par  # ToDo: This is likely wrong/incomplete/..
+                Vx1[:, n] = self.kernel.d2_eval(X1[:, :], X1[indI1_[n1], :])[:, 0] \
+                            - Vx_new_X1_old_ctrs[:, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
+                            - Vx1[:, :] @ LVx1[indI1_[n1], :].transpose()
+                Vx2[:, n] = self.kernel.d2_eval(X2[:, :], X1[indI1_[n1], :])[:, 0] \
+                            - Vx_new_X2_old_ctrs[:, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
+                            - Vx2[:, :] @ LVx1[indI1_[n1], :].transpose()
+
+                LVx1[:, n] = self.kernel.dd_eval(X1[:, :], X1[indI1_[n1], :])[:, 0] \
+                             - LVx_new_X1_old_ctrs[:, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
+                             - LVx1[:, :] @ LVx1[indI1_[n1], :].transpose()
+
+                nVx3[:, n] = self.kernel.mixed_L_n(X1[indI1_[n1], :], X3, n_X3)[0, :].transpose() \
+                            - nVx_new_X3_old_ctrs[:, :] @ LVx_new_X1_old_ctrs[indI1_[n1], :].transpose() \
+                            - nVx3[:, :] @ LVx1[indI1_[n1], :].transpose()
+
+                # Normalizations ..
+                Vx1[:, n] = Vx1[:, n] / np.sqrt(p1[indI1_[n1]])
+                Vx2[:, n] = Vx2[:, n] / np.sqrt(p1[indI1_[n1]])
+                LVx1[:, n] = LVx1[:, n] / np.sqrt(p1[indI1_[n1]])
+                nVx3[:, n] = nVx3[:, n] / np.sqrt(p1[indI1_[n1]])
+
+                # Calculations for validation sets
+
+            elif one_two_or_three == 2:
+                # Computations (this is simply expansion in Newton basis) ..
+                Vx1[:, n] = self.kernel.eval(X1[:, :], X2[indI2_[n2], :])[:, 0] \
+                            - Vx_new_X1_old_ctrs[:, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
+                            - Vx1[:, :] @ Vx2[indI2_[n2], :].transpose()
+                Vx2[:, n] = self.kernel.eval(X2[:, :], X2[indI2_[n2], :])[:, 0] \
+                            - Vx_new_X2_old_ctrs[:, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
+                            - Vx2[:, :] @ Vx2[indI2_[n2], :].transpose()
+
+                LVx1[:, n] = self.kernel.d1_eval(X1[:, :], X2[indI2_[n2], :])[:, 0] \
+                             - LVx_new_X1_old_ctrs[:, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
+                             - LVx1[:, :] @ Vx2[indI2_[n2], :].transpose()
+
+                nVx3[:, n] = self.kernel.mixed_k_n(X2[indI2_[n2], :], X3, n_X3)[0, :].transpose() \
+                            - nVx_new_X3_old_ctrs[:, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
+                            - nVx3[:, :] @ Vx2[indI2_[n2], :].transpose()
 
                 # Normalizations ..
-                Vx1[notIndI1, n] = Vx1[notIndI1, n] / np.sqrt(p1[indI1_[n1]])
-                Vx2[notIndI2, n] = Vx2[notIndI2, n] / np.sqrt(p1[indI1_[n1]])
-                LVx1[notIndI1, n] = LVx1[notIndI1, n] / np.sqrt(p1[indI1_[n1]])
-                LVx2[notIndI2, n] = LVx2[notIndI2, n] / np.sqrt(p1[indI1_[n1]])
-
-                # ToDo: Take care of the validation sets ..
-                # if self.flag_val:
-                #     Vx_val[:, n] = self.kernel.eval(X_val, X[indI_[n], :])[:, 0]\
-                #         - Vx_val_new_X_old_ctrs[:, :] @ Vx_new_X_old_ctrs[indI_[n], :].transpose()\
-                #         - Vx_val[:, :n+1] @ Vx[indI_[n], 0:n+1].transpose()
-                #     Vx_val[:, n] = Vx_val[:, n] / np.sqrt(p[indI_[n]])
-
-            elif one_or_two == 2:
+                if p2[indI2_[n2]] < 0:
+                    asdf = 1+2
+                    print('hallo')
+                Vx1[:, n] = Vx1[:, n] / np.sqrt(p2[indI2_[n2]])
+                Vx2[:, n] = Vx2[:, n] / np.sqrt(p2[indI2_[n2]])
+                LVx1[:, n] = LVx1[:, n] / np.sqrt(p2[indI2_[n2]])
+                nVx3[:, n] = nVx3[:, n] / np.sqrt(p2[indI2_[n2]])
+
+                # Calculations for validation sets
+
+            elif one_two_or_three == 3:
                 # Computations (this is simply expansion in Newton basis) ..
-                Vx1[notIndI1, n] = self.kernel.eval(X1[notIndI1, :], X2[indI2_[n2], :])[:, 0] \
-                                   - Vx_new_X1_old_ctrs[notIndI1, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
-                                   - Vx1[notIndI1, :] @ Vx2[indI2_[n2], :].transpose()
-                Vx2[notIndI2, n] = self.kernel.eval(X2[notIndI2, :], X2[indI2_[n2], :])[:, 0] \
-                                   - Vx_new_X2_old_ctrs[notIndI2, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
-                                   - Vx2[notIndI2, :] @ Vx2[indI2_[n2], :].transpose()
-
-                LVx1[notIndI1, n] = self.kernel.d1_eval(X1[notIndI1, :], X2[indI2_[n2], :])[:, 0] \
-                                    - LVx_new_X1_old_ctrs[notIndI1, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
-                                    - LVx1[notIndI1, :] @ Vx2[indI2_[n2], :].transpose()
-                LVx2[notIndI2, n] = self.kernel.d1_eval(X2[notIndI2, :], X2[indI2_[n2], :])[:, 0] \
-                                    - LVx_new_X2_old_ctrs[notIndI2, :] @ Vx_new_X2_old_ctrs[indI2_[n2], :].transpose() \
-                                    - LVx2[notIndI2, :] @ Vx2[indI2_[n2], :].transpose()
-
-                Vx2[indI2_[n2], n] += self.reg_par  # ToDo: This is likely wrong/incomplete/..
+                Vx1[:, n] = self.kernel.mixed_k_n(X1[:, :], X3[indI3_[n3], :], self.n_ctrs_[[n], :])[:, 0] \
+                            - Vx1[:, :] @ nVx3[indI3_[n3], :].transpose()
+                            # - Vx_new_X1_old_ctrs[:, :] @ nVx_new_X3_old_ctrs[indI3_[n3], :].transpose() \
+                Vx2[:, n] = self.kernel.mixed_k_n(X2[:, :], X3[indI3_[n3], :], self.n_ctrs_[[n], :])[:, 0] \
+                            - Vx2[:, :] @ nVx3[indI3_[n3], :].transpose()
+                            # - Vx_new_X2_old_ctrs[:, :] @ nVx_new_X3_old_ctrs[indI3_[n3], :].transpose() \
+
+                LVx1[:, n] = self.kernel.mixed_L_n(X1[:, :], X3[indI3_[n3], :], self.n_ctrs_[[n], :])[:, 0] \
+                            - LVx1[:, :] @ nVx3[indI3_[n3], :].transpose()
+                            # - LVx_new_X1_old_ctrs[:, :] @ nVx_new_X3_old_ctrs[indI3_[n3], :].transpose() \
+
+                nVx3[:, n] = self.kernel.mixed_n_n(X3[[indI3_[n3]], :], X3, self.n_ctrs_[[n], :], n_X3)[0, :].transpose() \
+                            - nVx3[:, :] @ nVx3[indI3_[n3], :].transpose()
+                            # - nVx_new_X3_old_ctrs[:, :] @ nVx_new_X3_old_ctrs[indI3_[n3], :].transpose() \
 
                 # Normalizations ..
-                Vx1[notIndI1, n] = Vx1[notIndI1, n] / np.sqrt(p2[indI2_[n2]])
-                Vx2[notIndI2, n] = Vx2[notIndI2, n] / np.sqrt(p2[indI2_[n2]])
-                LVx1[notIndI1, n] = LVx1[notIndI1, n] / np.sqrt(p2[indI2_[n2]])
-                LVx2[notIndI2, n] = LVx2[notIndI2, n] / np.sqrt(p2[indI2_[n2]])
+                Vx1[:, n] = Vx1[:, n] / np.sqrt(p3[indI3_[n3]])
+                Vx2[:, n] = Vx2[:, n] / np.sqrt(p3[indI3_[n3]])
+                LVx1[:, n] = LVx1[:, n] / np.sqrt(p3[indI3_[n3]])
+                nVx3[:, n] = nVx3[:, n] / np.sqrt(p3[indI3_[n3]])
 
-                # ToDo: Take care of the validation sets
+                # Calculations for validation sets
 
             # update the change of basis (see 14.12.21\B)
             Cut_new_row = np.ones(N_ctrs_so_far + n + 1)  # required only for the final one
 
-            if one_or_two == 1:
+            if one_two_or_three == 1:
                 Cut_new_row[:N_ctrs_so_far + n] = \
                     -np.concatenate((LVx_new_X1_old_ctrs[indI1_[n1], :], LVx1[indI1_[n1], :n])) \
                     @ Cut_[:N_ctrs_so_far + n, :N_ctrs_so_far + n]
 
                 Cut_[N_ctrs_so_far + n, :N_ctrs_so_far + n + 1] = Cut_new_row / LVx1[indI1_[n1], n]
-            elif one_or_two == 2:
+            elif one_two_or_three == 2:
                 Cut_new_row[:N_ctrs_so_far + n] = \
                     -np.concatenate((Vx_new_X2_old_ctrs[indI2_[n2], :], Vx2[indI2_[n2], :n])) \
                     @ Cut_[:N_ctrs_so_far + n, :N_ctrs_so_far + n]
 
                 Cut_[N_ctrs_so_far + n, :N_ctrs_so_far + n + 1] = Cut_new_row / Vx2[indI2_[n2], n]
 
+            elif one_two_or_three == 3:
+                Cut_new_row[:N_ctrs_so_far + n] = \
+                    -np.concatenate((nVx_new_X3_old_ctrs[indI3_[n3], :], nVx3[indI3_[n3], :n])) \
+                    @ Cut_[:N_ctrs_so_far + n, :N_ctrs_so_far + n]
+
+                Cut_[N_ctrs_so_far + n, :N_ctrs_so_far + n + 1] = Cut_new_row / nVx3[indI3_[n3], n]
+
             # compute the nth coefficient
-            if one_or_two == 1:
+            if one_two_or_three == 1:
                 c[n] = y1[indI1_[n1]] / np.sqrt(p1[indI1_[n1]])
-            elif one_or_two == 2:
+            elif one_two_or_three == 2:
                 c[n] = y2[indI2_[n2]] / np.sqrt(p2[indI2_[n2]])
+            elif one_two_or_three == 3:
+                c[n] = y3[indI3_[n3]] / np.sqrt(p3[indI3_[n3]])
 
             # update the power functions: ToDo: For Gaussian in 1D we obtain negative power vals :(
             p1[notIndI1] = p1[notIndI1] - LVx1[notIndI1, n] ** 2
             p2[notIndI2] = p2[notIndI2] - Vx2[notIndI2, n] ** 2
 
-            if self.flag_val:
-                p1_val = p1_val - LVx1_val[:, n] ** 2
-                p2_val = p2_val - Vx2_val[:, n] ** 2
+            if np.min(p2) < -0.0005:
+                aasdfff = 1 + 2
+                print('debug: min(p2), median(p2) = {:.3e}, {:.3f}'.format(np.min(p2), np.median(p2)))
+
+            p3[notIndI3] = p3[notIndI3] - nVx3[notIndI3, n] ** 2
+            p2_interior = p2_interior - Vx1[:, n] ** 2
+
+            # Calculations for validation sets
+            # if self.flag_val:
+            #     p1_val = p1_val - LVx1_val[:, n] ** 2
+            #     p2_val = p2_val - Vx2_val[:, n] ** 2
 
             # update the residual
             y1[notIndI1] = y1[notIndI1] - LVx1[notIndI1, n][:, None] * c[n]
             y2[notIndI2] = y2[notIndI2] - Vx2[notIndI2, n][:, None] * c[n]
+            y3[notIndI3] = y3[notIndI3] - nVx3[notIndI3, n][:, None] * c[n]
 
-            if self.flag_val:
-                y1_val = y1_val - LVx1_val[:, n][:, None] * c[n]
-                y2_val = y2_val - Vx2_val[:, n][:, None] * c[n]
+            # Calculations for validation sets
+            # if self.flag_val:
+            #     y1_val = y1_val - LVx1_val[:, n][:, None] * c[n]
+            #     y2_val = y2_val - Vx2_val[:, n][:, None] * c[n]
 
             # remove the nth index from the dictionary
-            if one_or_two == 1:
+            if one_two_or_three == 1:
                 notIndI1.pop(idx)
                 n1 += 1
-            elif one_or_two == 2:
+            elif one_two_or_three == 2:
                 notIndI2.pop(idx)
                 n2 += 1
+            elif one_two_or_three == 3:
+                notIndI3.pop(idx)
+                n3 += 1
 
             # Report some data every now and then
             if n % self.n_report == 0:
@@ -505,12 +757,18 @@ class VKOGA_PDE(BaseEstimator):
         self.Cut_ = Cut_[:N_ctrs_so_far + n + 1, :N_ctrs_so_far + n + 1]
         self.ind_ctrs1 = self.ind_ctrs1[:N_ctrs_so_far1 + n1 + 1]  # ToDo: Check whether this is correct!!!!
         self.ind_ctrs2 = self.ind_ctrs2[:N_ctrs_so_far2 + n2 + 1]
+        self.ind_ctrs3 = self.ind_ctrs3[:N_ctrs_so_far3 + n3 + 1]
         self.coef_ = self.Cut_.transpose() @ self.c
 
         self.ctrs_ = np.concatenate((self.ctrs_,
                                      np.concatenate(list_ctrs[:n + 1], axis=0)),
                                     axis=0)
 
+        self.Vx1 = Vx1
+        self.Vx2 = Vx2
+        self.LVx1 = LVx1
+        self.nVx3 = nVx3
+
         return self
 
     def predict_s(self, X):
@@ -521,7 +779,8 @@ class VKOGA_PDE(BaseEstimator):
         # Compute prediction
         if self.ctrs_.shape[0] > 0:
             prediction = self.kernel.d2_eval(X, self.ctrs_[self.ind_ctrs1]) @ self.coef_[self.ind_ctrs1] \
-                         + self.kernel.eval(X, self.ctrs_[self.ind_ctrs2]) @ self.coef_[self.ind_ctrs2]
+                         + self.kernel.eval(X, self.ctrs_[self.ind_ctrs2]) @ self.coef_[self.ind_ctrs2] \
+                         + self.kernel.mixed_k_n(X, self.ctrs_[self.ind_ctrs3], self.n_ctrs_[self.ind_ctrs3]) @ self.coef_[self.ind_ctrs3]
         else:
             prediction = np.zeros((X.shape[0], 1))
 
@@ -533,10 +792,27 @@ class VKOGA_PDE(BaseEstimator):
         # Validate the input
         X = check_array(X)
 
-        # Compute prediction
+        # Compute predictionf
         if self.ctrs_.shape[0] > 0:
             prediction = self.kernel.dd_eval(X, self.ctrs_[self.ind_ctrs1]) @ self.coef_[self.ind_ctrs1] \
-                         + self.kernel.d1_eval(X, self.ctrs_[self.ind_ctrs2]) @ self.coef_[self.ind_ctrs2]
+                         + self.kernel.d1_eval(X, self.ctrs_[self.ind_ctrs2]) @ self.coef_[self.ind_ctrs2] \
+                         + self.kernel.mixed_L_n(X, self.ctrs_[self.ind_ctrs3], self.n_ctrs_[self.ind_ctrs3]) @ self.coef_[self.ind_ctrs3]
+        else:
+            prediction = np.zeros((X.shape[0], 1))
+
+        return prediction
+
+    def predict_ns(self, X, n_x):
+        # Prediction of ns, i.e. evaluation of normal derivative on given points
+
+        # Validate the input
+        X = check_array(X)
+
+        # Compute prediction
+        if self.ctrs_.shape[0] > 0:
+            prediction = self.kernel.mixed_L_n(self.ctrs_[self.ind_ctrs1], X, n_x) @ self.coef_[self.ind_ctrs1] \
+                         + self.kernel.mixed_k_n(self.ctrs_[self.ind_ctrs2], X, n_x) @ self.coef_[self.ind_ctrs2] \
+                         + self.kernel.mixed_n_n(X, n_x, self.ctrs_[self.ind_ctrs3], self.n_ctrs_[self.ind_ctrs3]) @ self.coef_[self.ind_ctrs3]
         else:
             prediction = np.zeros((X.shape[0], 1))
 
@@ -545,7 +821,7 @@ class VKOGA_PDE(BaseEstimator):
     def print_message(self, when):
         if self.verbose and when == 'begin':
             print('')
-            print('*' * 30 + ' [VKOGA] ' + '*' * 30)
+            print('*' * 30 + ' [PDE-VKOGA] ' + '*' * 30)
             print('Training model with')
             print('       |_ kernel              : %s' % self.kernel)
             print('       |_ regularization par. : %2.2e' % self.reg_par)