From 2bd3b7f00487f1fc03d65fac2b36661eaea45762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:27:30 +0100
Subject: [PATCH 01/10] Update constants.h

remove 'submarine' definition of gamma0. Gamma0 was defined here and used in zhangli.cu, instead of the user-definable gammaLL. This was a bug, that causes a wrong calculation of the Zhang-Li torque in the cases that the user changes gammaLL.
---
 cuda/constants.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cuda/constants.h b/cuda/constants.h
index 7119d9bbc..1a227480d 100644
--- a/cuda/constants.h
+++ b/cuda/constants.h
@@ -5,7 +5,9 @@
 #define MU0    (4*PI*1e-7)        // Permeability of vacuum in Tm/A
 #define QE     1.60217646E-19     // Electron charge in C
 #define MUB    9.2740091523E-24   // Bohr magneton in J/T
-#define GAMMA0 1.7595e11          // Gyromagnetic ratio of electron, in rad/Ts
+// GAMMA0 should NOT be used. It is a user definable parameter, not constant!
+// Anyway, now we implement the region-wise g. It was only used in zhangli.cu
+//#define GAMMA0 1.7595e11          // Gyromagnetic ratio of electron, in rad/Ts
 #define HBAR   1.05457173E-34
 
 #endif

From 7351b864b8dde4d8c43e036b17d8eaaf3b15d469 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:30:05 +0100
Subject: [PATCH 02/10] Update lltorque.go

added function ScaleGamma, which is used to implement the region-wise gamma factor.
---
 cuda/lltorque.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cuda/lltorque.go b/cuda/lltorque.go
index 9c081c502..17b08e4e8 100644
--- a/cuda/lltorque.go
+++ b/cuda/lltorque.go
@@ -20,6 +20,16 @@ func LLTorque(torque, m, B *data.Slice, alpha MSlice) {
 		alpha.DevPtr(0), alpha.Mul(0), N, cfg)
 }
 
+// Scale up the torques by GFactor (to allow regionwise g)
+func ScaleGamma(torque *data.Slice, GFactor MSlice) {
+	N := torque.Len()
+	cfg := make1DConf(N)
+
+	k_scalegamma_async(torque.DevPtr(X), torque.DevPtr(Y), torque.DevPtr(Z),
+                GFactor.DevPtr(0), GFactor.Mul(0), N, cfg)
+
+}
+
 // Landau-Lifshitz torque with precession disabled.
 // Used by engine.Relax().
 func LLNoPrecess(torque, m, B *data.Slice) {

From 2ca76934e21e4d0fe34dccd02fcdbf11b67a0156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:33:36 +0100
Subject: [PATCH 03/10] Update zhangli.go

changed the zhangli to use the user-defined gammaLL and not the constant gamma0. This was a bug, that causes a wrong calculation of the Zhang-Li torque in the cases that the user changes gammaLL.
---
 cuda/zhangli.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cuda/zhangli.go b/cuda/zhangli.go
index c2346f536..8b3da8df6 100644
--- a/cuda/zhangli.go
+++ b/cuda/zhangli.go
@@ -6,7 +6,7 @@ import (
 
 // Add Zhang-Li ST torque (Tesla) to torque.
 // see zhangli.cu
-func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol MSlice, mesh *data.Mesh) {
+func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol, g MSlice, mesh *data.Mesh) {
 	c := mesh.CellSize()
 	N := mesh.Size()
 	cfg := make3DConf(N)
@@ -21,6 +21,7 @@ func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol MSlice, mes
 		alpha.DevPtr(0), alpha.Mul(0),
 		xi.DevPtr(0), xi.Mul(0),
 		pol.DevPtr(0), pol.Mul(0),
+		g.DevPtr(0), g.Mul(0),
 		float32(c[X]), float32(c[Y]), float32(c[Z]),
 		N[X], N[Y], N[Z], mesh.PBC_code(), cfg)
 }

From df28acdef61ba140af2f9ae68b81b8fd6d694867 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:34:46 +0100
Subject: [PATCH 04/10] Update zhangli2.cu

changed the zhangli to use the user-defined gammaLL and not the constant gamma0. This was a bug, that causes a wrong calculation of the Zhang-Li torque in the cases that the user changes gammaLL.
---
 cuda/zhangli2.cu | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cuda/zhangli2.cu b/cuda/zhangli2.cu
index 4907ae065..430894a85 100644
--- a/cuda/zhangli2.cu
+++ b/cuda/zhangli2.cu
@@ -4,7 +4,8 @@
 #include "stencil.h"
 #include <stdint.h>
 
-#define PREFACTOR ((MUB) / (2 * QE * GAMMA0))
+// #define PREFACTOR ((MUB) / (2 * QE * GAMMA0))
+#define PREFACTOR ((HBAR) / (2 * QE))
 
 // spatial derivatives without dividing by cell size
 #define deltax(in) (in[idx(hclampx(ix+1), iy, iz)] - in[idx(lclampx(ix-1), iy, iz)])
@@ -21,6 +22,7 @@ addzhanglitorque2(float* __restrict__ tx, float* __restrict__ ty, float* __restr
                   float* __restrict__ alpha_, float alpha_mul,
                   float* __restrict__ xi_, float xi_mul,
                   float* __restrict__ pol_, float pol_mul,
+                  float* __restrict__ g_, float g_mul,
                   float cx, float cy, float cz,
                   int Nx, int Ny, int Nz, uint8_t PBC) {
 
@@ -37,8 +39,9 @@ addzhanglitorque2(float* __restrict__ tx, float* __restrict__ ty, float* __restr
     float alpha = amul(alpha_, alpha_mul, i);
     float xi    = amul(xi_, xi_mul, i);
     float pol   = amul(pol_, pol_mul, i);
+    float g     = amul(g_, g_mul, i);
     float invMs = inv_Msat(Ms_, Ms_mul, i);
-    float b = invMs * PREFACTOR / (1.0f + xi*xi);
+    float b = invMs * PREFACTOR / (g*(1.0f + xi*xi));
     float3 J = pol*vmul(jx_, jy_, jz_, jx_mul, jy_mul, jz_mul, i);
 
     float3 hspin = make_float3(0.0f, 0.0f, 0.0f); // (u·∇)m
@@ -62,4 +65,3 @@ addzhanglitorque2(float* __restrict__ tx, float* __restrict__ ty, float* __restr
     ty[i] += torque.y;
     tz[i] += torque.z;
 }
-

From 1673553d20eef7c0ea0883dca28786eeb22e6735 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:39:55 +0100
Subject: [PATCH 05/10] Update temperature.go

Changed the temperature field calculation to take into account the region-wise factor of gamma.
---
 cuda/temperature.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cuda/temperature.go b/cuda/temperature.go
index 4a443dc56..1656d23be 100644
--- a/cuda/temperature.go
+++ b/cuda/temperature.go
@@ -7,7 +7,7 @@ import (
 
 // Set Bth to thermal noise (Brown).
 // see temperature.cu
-func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Temp, Alpha MSlice) {
+func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Temp, Alpha, g MSlice) {
 	util.Argument(Bth.NComp() == 1 && noise.NComp() == 1)
 
 	N := Bth.Len()
@@ -17,5 +17,6 @@ func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Tem
 		Msat.DevPtr(0), Msat.Mul(0),
 		Temp.DevPtr(0), Temp.Mul(0),
 		Alpha.DevPtr(0), Alpha.Mul(0),
+		g.DevPtr(0), g.Mul(0),
 		N, cfg)
 }

From 6da8633efb988924e66db10800e4bf89b6383606 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:40:24 +0100
Subject: [PATCH 06/10] Update temperature2.cu

Changed the temperature field calculation to take into account the region-wise factor of gamma.
---
 cuda/temperature2.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cuda/temperature2.cu b/cuda/temperature2.cu
index 94331fa84..e07e5384c 100644
--- a/cuda/temperature2.cu
+++ b/cuda/temperature2.cu
@@ -7,6 +7,7 @@ settemperature2(float* __restrict__  B,      float* __restrict__ noise, float kB
                 float* __restrict__ Ms_, float Ms_mul,
                 float* __restrict__ temp_, float temp_mul,
                 float* __restrict__ alpha_, float alpha_mul,
+                float* __restrict__ g_, float g_mul,
                 int N) {
 
     int i =  ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
@@ -14,7 +15,7 @@ settemperature2(float* __restrict__  B,      float* __restrict__ noise, float kB
         float invMs = inv_Msat(Ms_, Ms_mul, i);
         float temp = amul(temp_, temp_mul, i);
         float alpha = amul(alpha_, alpha_mul, i);
-        B[i] = noise[i] * sqrtf((kB2_VgammaDt * alpha * temp * invMs ));
+        float g = amul(g_, g_mul, i);
+        B[i] = noise[i] * sqrtf((kB2_VgammaDt * alpha * temp * invMs / g));
     }
 }
-

From 94103156c81fa2208e33f1a091c289732df7dbba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:45:43 +0100
Subject: [PATCH 07/10] Create scalegamma.cu

added support for a regionwise factor for the parameter gammaLL. The total torque is now multiplied by this factor ('GFactor')
---
 cuda/scalegamma.cu | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 cuda/scalegamma.cu

diff --git a/cuda/scalegamma.cu b/cuda/scalegamma.cu
new file mode 100644
index 000000000..ae037b317
--- /dev/null
+++ b/cuda/scalegamma.cu
@@ -0,0 +1,17 @@
+#include "amul.h"
+#include "float3.h"
+#include <stdint.h>
+
+// scale torque by scalar parameter GammaFactor
+extern "C" __global__ void
+scalegamma(float* __restrict__  tx, float* __restrict__  ty, float* __restrict__  tz,
+          float* __restrict__  scalegamma_, float scalegamma_mul, int N) {
+
+    int i =  ( blockIdx.y*gridDim.x + blockIdx.x ) * blockDim.x + threadIdx.x;
+    if (i < N) {
+        float gammaf = amul(scalegamma_, scalegamma_mul,i);
+        tx[i] = tx[i]*gammaf;
+        ty[i] = ty[i]*gammaf;
+        tz[i] = tz[i]*gammaf;
+    }
+}

From 7476804d810b1372ceda9103b56763faa25c74ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:52:06 +0100
Subject: [PATCH 08/10] Update torque.go

added support for a regionwise factor for the parameter gammaLL. The total torque is now multiplied by this factor ('GFactor')
---
 engine/torque.go | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/engine/torque.go b/engine/torque.go
index 9b1a18245..cbadbd482 100644
--- a/engine/torque.go
+++ b/engine/torque.go
@@ -10,6 +10,8 @@ import (
 
 var (
 	Alpha                            = NewScalarParam("alpha", "", "Landau-Lifshitz damping constant")
+	GFactor			 	 = NewScalarParam("GFactor","","Region-wise scaling factor for GammaLL (default: 1.00). If GammaLL is set to µB/hbar (8.7941e10), GFactor is the material's g-factor.")
+	GammaLL                  float64 = 1.7595e11 // Gyromagnetic ratio of spins, in rad/Ts
 	Xi                               = NewScalarParam("xi", "", "Non-adiabaticity of spin-transfer-torque")
 	Pol                              = NewScalarParam("Pol", "", "Electrical current polarization")
 	Lambda                           = NewScalarParam("Lambda", "", "Slonczewski Λ parameter")
@@ -22,17 +24,26 @@ var (
 	STTorque                         = NewVectorField("STTorque", "T", "Spin-transfer torque/γ0", AddSTTorque)
 	J                                = NewExcitation("J", "A/m2", "Electrical current density")
 	MaxTorque                        = NewScalarValue("maxTorque", "T", "Maximum torque/γ0, over all cells", GetMaxTorque)
-	GammaLL                  float64 = 1.7595e11 // Gyromagnetic ratio of spins, in rad/Ts
 	Precess                          = true
 	DisableZhangLiTorque             = false
 	DisableSlonczewskiTorque         = false
 	fixedLayerPosition               = FIXEDLAYER_TOP // instructs mumax3 how free and fixed layers are stacked along +z direction
 )
 
+// Before, GammaLL was a user-settable parameter that was global (the same in all regions).
+// Now, we divide it in two:
+//     GammaLL is a user-settable global constant,
+//     GFactor is a regionwise parameter.
+// The torques will be calculated throughout mumax using GammaLL. Then, the torques will be multiplied regionwise by GFactor.
+// For the sake of backward compatibility,  gammaLL=1.7595e11 and GFactor=1.0. However, the user may want to set GammaLL=mu_B/hbar=0.879e11.
+// In this case, GFactor will be the material's g-factor (i.e., ~~2.0 for most transition metal ferromagnets).
+//Beware, gammaLL is defined in two places (!): here and in cuda/constants.h. Before, only this version was changed (leading to false results??)
+//GammaLL is also used to scale the timestep in the integration algorithms.
 func init() {
 	Pol.setUniform([]float64{1}) // default spin polarization
 	Lambda.Set(1)                // sensible default value (?).
-	DeclVar("GammaLL", &GammaLL, "Gyromagnetic ratio in rad/Ts")
+	DeclVar("GammaLL", &GammaLL, "Gyromagnetic ratio [rad/Ts], that will be multiplied by GFactor (Default 1.7595e11)")
+	GFactor.Set(1.00)
 	DeclVar("DisableZhangLiTorque", &DisableZhangLiTorque, "Disables Zhang-Li torque (default=false)")
 	DeclVar("DisableSlonczewskiTorque", &DisableSlonczewskiTorque, "Disables Slonczewski torque (default=false)")
 	DeclVar("DoPrecess", &Precess, "Enables LL precession (default=true)")
@@ -45,6 +56,7 @@ func init() {
 func SetTorque(dst *data.Slice) {
 	SetLLTorque(dst)
 	AddSTTorque(dst)
+	ScaleGamma(dst)
 	FreezeSpins(dst)
 }
 
@@ -60,6 +72,13 @@ func SetLLTorque(dst *data.Slice) {
 	}
 }
 
+//scales the torque/g by 'GFactor' : a region-wise scalar parameter.
+func ScaleGamma(dst *data.Slice) {
+	gfact := GFactor.MSlice()
+	defer gfact.Recycle()
+	cuda.ScaleGamma(dst,gfact)
+}
+
 // Adds the current spin transfer torque to dst
 func AddSTTorque(dst *data.Slice) {
 	if J.isZero() {
@@ -85,7 +104,9 @@ func AddSTTorque(dst *data.Slice) {
 		defer xi.Recycle()
 		pol := Pol.MSlice()
 		defer pol.Recycle()
-		cuda.AddZhangLiTorque(dst, M.Buffer(), msat, j, alpha, xi, pol, Mesh())
+		g := GFactor.MSlice()
+		defer g.Recycle()
+		cuda.AddZhangLiTorque(dst, M.Buffer(), msat, j, alpha, xi, pol, g, float32(GammaLL), Mesh())
 	}
 	if !DisableSlonczewskiTorque && !FixedLayer.isZero() {
 		msat := Msat.MSlice()

From 9c3eb92ca735fb70c58376f43a8e898faaa0bd24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 16:52:52 +0100
Subject: [PATCH 09/10] Update temperature.go

added support for a regionwise factor for the parameter gammaLL. The total torque is now multiplied by this factor ('GFactor')
---
 engine/temperature.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/engine/temperature.go b/engine/temperature.go
index f3a3a18ae..cbf6633f8 100644
--- a/engine/temperature.go
+++ b/engine/temperature.go
@@ -100,9 +100,11 @@ func (b *thermField) update() {
 	defer temp.Recycle()
 	alpha := Alpha.MSlice()
 	defer alpha.Recycle()
+	g := GFactor.MSlice()
+	defer g.Recycle()
 	for i := 0; i < 3; i++ {
 		b.generator.GenerateNormal(uintptr(noise.DevPtr(0)), int64(N), mean, stddev)
-		cuda.SetTemperature(dst.Comp(i), noise, k2_VgammaDt, ms, temp, alpha)
+		cuda.SetTemperature(dst.Comp(i), noise, k2_VgammaDt, ms, temp, alpha, g)
 	}
 
 	b.step = NSteps

From c8334b2f2f64c45ac68fdab57e11cb03dc755e3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20S?= <jsampaio@users.noreply.github.com>
Date: Thu, 3 Nov 2022 17:21:50 +0100
Subject: [PATCH 10/10] Update zhangli.go

changed the zhangli to use the user-defined gammaLL and not the constant gamma0. This was a bug, that causes a wrong calculation of the Zhang-Li torque in the cases that the user changes gammaLL.
---
 cuda/zhangli.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cuda/zhangli.go b/cuda/zhangli.go
index 8b3da8df6..2a5772760 100644
--- a/cuda/zhangli.go
+++ b/cuda/zhangli.go
@@ -6,7 +6,7 @@ import (
 
 // Add Zhang-Li ST torque (Tesla) to torque.
 // see zhangli.cu
-func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol, g MSlice, mesh *data.Mesh) {
+func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol, g MSlice, gammaLL float32, mesh *data.Mesh) {
 	c := mesh.CellSize()
 	N := mesh.Size()
 	cfg := make3DConf(N)
@@ -21,7 +21,7 @@ func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol, g MSlice,
 		alpha.DevPtr(0), alpha.Mul(0),
 		xi.DevPtr(0), xi.Mul(0),
 		pol.DevPtr(0), pol.Mul(0),
-		g.DevPtr(0), g.Mul(0),
+		g.DevPtr(0), g.Mul(0)*gammaLL,
 		float32(c[X]), float32(c[Y]), float32(c[Z]),
 		N[X], N[Y], N[Z], mesh.PBC_code(), cfg)
 }