diff --git a/src/action.hpp b/src/action.hpp
index ff6264a8d3e071b1b8723a6f03a1fc307affdead..6597a7ee5542bb5bf771f199290cdc471e8f1633 100644
--- a/src/action.hpp
+++ b/src/action.hpp
@@ -43,49 +43,48 @@ get_Q_matrix(const array::ArrayView<ConstScalar, array::Shape<2,2,2> >& Q)
     return Qm;
 }
 
+template <typename LRType, typename XType>
+Eigen::Matrix<typename XType::value_type,4,4> nambu_diag_mul(const LRType& L, const XType& X, const LRType& R)
+{
+    /* Multiply L*X*R with Nambu-diagonal L, R */
+    Eigen::Matrix<typename XType::value_type,4,4> Y;
+    block_00(Y) = block_00(L) * block_00(X) * block_00(R);
+    block_01(Y) = block_00(L) * block_01(X) * block_11(R);
+    block_10(Y) = block_11(L) * block_10(X) * block_00(R);
+    block_11(Y) = block_11(L) * block_11(X) * block_11(R);
+    return Y;
+}
+
 template <typename ConstScalar, typename Shape, typename Shape2, typename Scalar = typename std::remove_const<ConstScalar>::type>
 inline Scalar S_2(const array::ArrayView<ConstScalar, Shape>& Qmat, const array::ArrayView<const Complex, Shape2>& U, const Complex& h)
 {
-    const int N_i[2] = {0, 1};
-    const int N_j[2] = {1, 0};
-    const size_t invk[4] = {3, 2, 1, 0};
+    const int N_i[2] = {1, 0};
+    const int N_j[2] = {0, 1};
+
+    /* Directions cycle (left->right, down->up, right->left, up->down) */
+    const size_t invdir[4] = {2, 3, 1, 0};
     Scalar S{0.0};
 
-    size_t nx = Qmat.dim(0);
-    size_t ny = Qmat.dim(1);
+    int nx = Qmat.dim(0);
+    int ny = Qmat.dim(1);
 
-    for (size_t i = 0; i < nx; ++i) {
-        for (size_t j = 0; j < ny; ++j) {
+    for (int i = 0; i < nx; ++i) {
+        for (int j = 0; j < ny; ++j) {
             auto Q1 = Qmat.part(i,j).matrix();
 
             /* Sum over neighbors */
             for (size_t k = 0; k < 2; ++k) {
-                int di = N_i[k];
-                int dj = N_j[k];
+                int i2 = i + N_i[k];
+                int j2 = j + N_j[k];
                 
-                if (i == 0 && di < 0)
+                if (i2 < 0 || i2 >= nx || j2 < 0 || j2 >= ny)
                     continue;
-                if (i == nx-1 && di > 0)
-                    continue;
-                if (j == 0 && dj < 0)
-                    continue;
-                if (j == ny-1 && dj > 0)
-                    continue;
-
-                size_t i2 = i + di;
-                size_t j2 = j + dj;
 
                 auto Q2 = Qmat.part(i2,j2).matrix();
                 auto U12 = U.part(i,j,k).matrix();
-                auto U21 = U.part(i2,j2,invk[k]).matrix();
+                auto U21 = U.part(i2,j2,invdir[k]).matrix();
 
-                Eigen::Matrix<Scalar,4,4> M;
-
-                /* M = Q1 - U12 * Q * U21; enforce Nambu block diagonal U */
-                block_00(M) = block_00(U12) * block_00(Q2) * block_00(U21) - block_00(Q1);
-                block_01(M) = block_00(U12) * block_01(Q2) * block_11(U21) - block_01(Q1);
-                block_10(M) = block_11(U12) * block_10(Q2) * block_00(U21) - block_10(Q1);
-                block_11(M) = block_11(U12) * block_11(Q2) * block_11(U21) - block_11(Q1);
+                auto M = nambu_diag_mul(U12, Q2, U21) - Q1;
 
                 S += (M * M).trace();
             }
@@ -103,28 +102,98 @@ inline Scalar S_Omega(const array::ArrayView<ConstScalar, Shape>& Qmat,
 {
     Scalar S{0.0};
 
-    size_t nx = Qmat.dim(0);
-    size_t ny = Qmat.dim(1);
+    int nx = Qmat.dim(0);
+    int ny = Qmat.dim(1);
 
-    for (size_t i = 0; i < nx; ++i) {
-        for (size_t j = 0; j < ny; ++j) {
+    for (int i = 0; i < nx; ++i) {
+        for (int j = 0; j < ny; ++j) {
             auto Q = Qmat.part(i,j).matrix();
             auto w = Omega.part(i, j).matrix();
 
             S += (w * Q).trace();
-            S += omega * (Q(1,1) + Q(2,2) - Q(3,3) - Q(4,4));
+            S += omega * (Q(0,0) + Q(1,1) - Q(2,2) - Q(3,3));
         }
     }
 
     return h*h*h*S;
 }
 
+template <typename ConstScalar, typename Shape, typename Shape2, typename Scalar = typename std::remove_const<ConstScalar>::type>
+inline Scalar S_H(const array::ArrayView<ConstScalar, Shape>& Qmat, const array::ArrayView<const Complex, Shape2>& U,
+                  const Complex& h, const Complex& alpha)
+{
+    using array::Array;
+    using array::Shape;
+
+    /* Neighbor plaquettes: (nth,dir) -> (di,dj)
+     * where 'nth' is the direction index of the link,
+     * starting from bottom and cycling counterclockwise.
+     */
+    const int plaqc[4][4][2] = {
+        {{0,0}, {1,0}, {1,1}, {0,1}},
+        {{0,-1}, {1,-1}, {1,0}, {0,0}},
+        {{-1,0}, {0,0}, {0,1}, {-1,1}},
+        {{-1,-1}, {0,-1}, {0,0}, {0,1}},
+    };
+
+    Scalar S{0.0};
+
+    int nx = Qmat.dim(0);
+    int ny = Qmat.dim(1);
+
+    for (int i = 0; i < nx; ++i) {
+        for (int j = 0; j < ny; ++j) {
+
+            /* Sum over neighboring plaquettes */
+            for (int iplaq = 0; iplaq < 4; ++iplaq) {
+                int i1 = (int)i + plaqc[iplaq][0][0];
+                int j1 = (int)j + plaqc[iplaq][0][1];
+                int i2 = (int)i + plaqc[iplaq][1][0];
+                int j2 = (int)j + plaqc[iplaq][1][1];
+                int i3 = (int)i + plaqc[iplaq][2][0];
+                int j3 = (int)j + plaqc[iplaq][2][1]; 
+                int i4 = (int)i + plaqc[iplaq][3][0];
+                int j4 = (int)j + plaqc[iplaq][3][1];
+
+                auto Q1 = Qmat.part(i1,j1).matrix();
+                auto Q2 = Qmat.part(i2,j2).matrix();
+                auto Q3 = Qmat.part(i3,j3).matrix();
+                auto Q4 = Qmat.part(i4,j4).matrix();
+
+                auto U21 = U.part(i1,j1,0).matrix();
+                auto U32 = U.part(i2,j2,1).matrix();
+                auto U43 = U.part(i3,j3,2).matrix();
+                auto U14 = U.part(i4,j4,3).matrix();
+
+                auto U12 = U.part(i2,j2,3).matrix();
+                auto U23 = U.part(i3,j3,2).matrix();
+                auto U34 = U.part(i4,j4,0).matrix();
+                auto U41 = U.part(i1,j1,1).matrix();
+
+                auto P = U14*U43*U32*U21;
+
+                Matrix<Complex,4,4> I;
+
+                I << 1, 0, 0, 0,
+                    0, 1, 0, 0,
+                    0, 0, 1, 0,
+                    0, 0, 0, 1;
+
+                S += ((P - I) * (Q1 - nambu_diag_mul(U12, Q2, U21) * nambu_diag_mul(U14, Q4, U41))).trace();
+            }
+        }
+    }
+
+    return (2.0*alpha/(Complex(0,4)*h)) * S;
+}
+
 template <typename ConstScalar, typename Shape, typename Shape2, typename Shape3, typename Scalar = typename std::remove_const<ConstScalar>::type>
 inline Scalar S(const array::ArrayView<ConstScalar, Shape>& Q,
                 const Scalar& omega,
                 const array::ArrayView<const Complex, Shape2>& U,
                 const array::ArrayView<const Complex, Shape3>& Omega,
-                const Complex& h)
+                const Complex& h,
+                const Complex& alpha)
 {
     using array::Array;
     using array::Shape;
@@ -139,7 +208,7 @@ inline Scalar S(const array::ArrayView<ConstScalar, Shape>& Q,
         for (size_t j = 0; j < ny; ++j)
             Qmat.part(i,j).matrix() = get_Q_matrix(Q.part(i,j));
 
-    return S_2(Qmat, U, h) + S_Omega(Qmat, omega, Omega, h);
+    return S_2(Qmat, U, h) + S_Omega(Qmat, omega, Omega, h) + S_H(Qmat, U, h, alpha);
 }
 
 
diff --git a/src/core.cpp b/src/core.cpp
index d3239251973a99a6b6f9704ee2737c83dc08dc42..e6529cf0446631f9dbe1f113f1c5efde0c2ae381 100644
--- a/src/core.cpp
+++ b/src/core.cpp
@@ -23,6 +23,7 @@ private:
     OmegaType Omega_;
     size_t nx_, ny_;
     double h_;
+    Complex alpha_;
     Complex omega_;
 
     typedef Vector<size_t> SizeVector;
@@ -53,8 +54,8 @@ private:
         }
 
 public:
-    Action(double h, UType&& U, OmegaType&& Omega)
-        : U_(std::move(U)), Omega_(std::move(Omega)), nx_(U_.dim(0)), ny_(U_.dim(1)), h_(h), omega_(0)
+    Action(double h, Complex alpha, UType&& U, OmegaType&& Omega)
+        : U_(std::move(U)), Omega_(std::move(Omega)), nx_(U_.dim(0)), ny_(U_.dim(1)), h_(h), alpha_(alpha), omega_(0)
         {
             if (Omega.dim(0) != nx_ || Omega.dim(1) != ny_)
                 throw std::domain_error("U and Omega have incompatible shape");
@@ -72,7 +73,7 @@ public:
         {
             if (Q.dim(0) != nx_ || Q.dim(1) != ny_)
                 throw std::out_of_range("Q array wrong size");
-            return S(Q, omega_, U_, Omega_, h_);
+            return S(Q, omega_, U_, Omega_, h_, alpha_);
         }
 
     CppAD::ADFun<Complex> eval_ad(const QType& Qval)
@@ -94,7 +95,7 @@ public:
             CppAD::Independent(Q.storage(), dynamic);
 
             Array<ADComplex, Shape<1> > res;
-            res(0u) = S(Q, dynamic[0], U_, Omega_, h_);
+            res(0u) = S(Q, dynamic[0], U_, Omega_, h_, alpha_);
 
             return {Q.storage(), res.storage()};
         }
@@ -183,8 +184,8 @@ PYBIND11_MODULE(_core, m)
     m.doc() = "usadelndsoc._core";
 
     py::class_<Action>(m, "Action")
-        .def(py::init<double, Action::UType, Action::OmegaType>(), "Action",
-             py::arg("h"), py::arg("U"), py::arg("Omega"))
+        .def(py::init<double, Complex, Action::UType, Action::OmegaType>(), "Action",
+             py::arg("h"), py::arg("alpha"), py::arg("U"), py::arg("Omega"))
         .def("set", &Action::set, "Set parameters", py::arg("omega"))
         .def("eval", &Action::eval, "Evaluate value of action", py::arg("Q"))
         .def("grad", &Action::grad, "Evaluate gradient of action", py::arg("Q"))