[ci] revert to windows-2019, add help info for #214, add note on nvid…

…ia-uvm
fangq · Mar 10, 2024 · 4e7f404 · 4e7f404
1 parent c3125c8
commit 4e7f404
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 18 deletions.
diff --git a/.github/workflows/build_all.yml b/.github/workflows/build_all.yml
@@ -14,7 +14,7 @@ jobs:
     name: Build All
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-11, windows-2022]
+        os: [ubuntu-20.04, macos-11, windows-2019]
     runs-on: ${{ matrix.os }}
     defaults:
       run:
@@ -187,7 +187,7 @@ jobs:
             zip -FSr --symlink packages/mcxlab-${{ env.RELEASE_TAG }}.zip mcxlab
           fi
       - name: Upload mcxlab package
-        if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-11' || matrix.os == 'windows-2022' }}
+        if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-11' || matrix.os == 'windows-2019' }}
         uses: actions/upload-artifact@v3
         with:
           name: all-mcx-packages
@@ -217,7 +217,7 @@ jobs:
             zip -FSr --symlink mcx/packages/mcx-${{ env.RELEASE_TAG }}.zip mcx -x 'mcx/packages*'
           fi
       - name: Upload mcx package
-        if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-11' || matrix.os == 'windows-2022' }}
+        if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-11' || matrix.os == 'windows-2019' }}
         uses: actions/upload-artifact@v3
         with:
           name: all-mcx-packages

diff --git a/README.md b/README.md
@@ -308,6 +308,30 @@ to achieve the same goal. Otherwise, the simulation may hang your system
 after running for a few seconds. A hybrid GPU laptop combing an NVIDIA GPU 
 with an AMD iGPU does not seem to have this issue if using Linux.
 
+In addition, NVIDIA drirver (520 or newer) has a known glitch running on Linux kernel
+6.x (such as those in Ubuntu 22.04). See
+
+https://forums.developer.nvidia.com/t/dev-nvidia-uvm-io-error-on-ubuntu-22-04-520-to-535-driver-versions/262153
+
+When the laptop is in the "performance" mode and wakes up from suspension, MCX or any
+CUDA program fails to run with an error
+
+
+```
+MCX ERROR(-999):unknown error in unit mcx_core.cu:2523
+```
+
+This is because the kernel module `nvida-uvm` fails to be reloaded after suspension.
+If you had an open MATLAB session, you must close MATLAB first, and
+run the below commands (if MATLAB is open, you will see `rmmod: ERROR: Module nvidia_uvm is in use`)
+
+```
+sudo rmmod /dev/nvidia-uvm
+sudo modprobe nvidia-uvm
+```
+
+after the above command, MCX should be run again.
+
 New generations of Mac computers no longer support NVIDIA or AMD GPUs. you will
 have to use the OpenCL version of MCX, MCX-CL by downloading it from
 

diff --git a/README.txt b/README.txt
@@ -283,6 +283,29 @@ to achieve the same goal. Otherwise, the simulation may hang your system
 after running for a few seconds. A hybrid GPU laptop combing an NVIDIA GPU 
 with an AMD iGPU does not seem to have this issue if using Linux.
 
+In addition, NVIDIA drirver (520 or newer) has a known glitch running on Linux kernel
+6.x (such as those in Ubuntu 22.04). See
+
+https://forums.developer.nvidia.com/t/dev-nvidia-uvm-io-error-on-ubuntu-22-04-520-to-535-driver-versions/262153
+
+When the laptop is in the "performance" mode and wakes up from suspension, MCX or any
+CUDA program fails to run with an error
+
+
+  MCX ERROR(-999):unknown error in unit mcx_core.cu:2523
+
+
+This is because the kernel module `nvida-uvm` fails to be reloaded after suspension.
+If you had an open MATLAB session, you must close MATLAB first, and
+run the below commands (if MATLAB is open, you will see `rmmod: ERROR: Module nvidia_uvm is in use`)
+
+
+  sudo rmmod /dev/nvidia-uvm
+  sudo modprobe nvidia-uvm
+
+
+after the above command, MCX should be run again.
+
 New generations of Mac computers no longer support NVIDIA or AMD GPUs. you will
 have to use the OpenCL version of MCX, MCX-CL by downloading it from
 

diff --git a/mcxlab/mcxlab.m b/mcxlab/mcxlab.m
@@ -232,7 +232,10 @@
 %                               uniformly in the perpendicular direction
 %                      'slit' [*] - a colimated slit beam emitting from the line segment between
 %                               cfg.srcpos and cfg.srcpos+cfg.srcparam(1:3), with the initial
-%                               dir specified by cfg.srcdir
+%                               dir specified by cfg.srcdir; when user defines positive values for srcparam2.x or .y,
+%                               the slit source is broadened in a Guassian profile controlled by
+%                               srcparam2.x: width of Gaussian broadening in the direction perpendicular to both slit and srcdir
+%                               srcparam2.y: width of Gaussian broadening in the direction of the slit line: cfg.srcparam(1:3)
 %                      'pencilarray' - a rectangular array of pencil beams. The srcparam1 and srcparam2
 %                               are defined similarly to 'fourier', except that srcparam1(4) and srcparam2(4)
 %                               are both integers, denoting the element counts in the x/y dimensions, respectively.

diff --git a/src/mcx_core.cu b/src/mcx_core.cu
@@ -1402,7 +1402,7 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
                     r = sqrtf(0.5f * rand_next_scatlen(t)) * launchsrc->param1.x;
 
                     /** parameter to generate photon path from coordinates at focus (depends on focal distance and rayleigh range) */
-                    rv->x = -launchsrc->param1.y / launchsrc->param1.z;
+                    rv->x = __fdividef(-launchsrc->param1.y, launchsrc->param1.z);
                     rv->y = rsqrtf(r * r + launchsrc->param1.z * launchsrc->param1.z);
 
                     /** if beam direction is along +z or -z direction */
@@ -1460,34 +1460,32 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime*
                         r = TWO_PI * rand_uniform01(t); // phi
                         sincosf(r, &sphi, &cphi); // y=sin(phi), x=cos(phi)
                         rotatevector(v, 1.f, 0.f, sphi, cphi);
-                    }
-
-                    if (MCX_SRC_SLIT && (launchsrc->param2.x > 0.f || launchsrc->param2.y > 0.f)) {
+                    } else if (launchsrc->param2.x > 0.f || launchsrc->param2.y > 0.f) {
                         float sphi, cphi;
                         r = TWO_PI * rand_uniform01(t);
                         sincosf(r, &sphi, &cphi);
                         r = sqrtf(2.f * rand_next_scatlen(t));
-                        // gaussian broadening factor in direction perpendicular to both slit and v directions
+                        // gaussian broadening factor in the direction perpendicular to both slit and v directions
                         cphi *= launchsrc->param2.x * r;
-                        // gaussian broadening factor in direction of slit
+                        // gaussian broadening factor in the direction of the slit (srcparam1.x/y/z)
                         sphi *= launchsrc->param2.y * r;
-                        sphi *= rnorm3df(launchsrc->param1.x, launchsrc->param1.y, launchsrc->param1.z);
+                        sphi *= rsqrt(launchsrc->param1.x * launchsrc->param1.x + launchsrc->param1.y * launchsrc->param1.y + launchsrc->param1.z * launchsrc->param1.z);
                         *rv = float3(launchsrc->param1.y * v->z - launchsrc->param1.z * v->y,
                                      launchsrc->param1.z * v->x - launchsrc->param1.x * v->z,
                                      launchsrc->param1.x * v->y - launchsrc->param1.y * v->x);
-                        r = rsqrt(rv->x * rv->x + rv->y * rv->y + rv->z * rv->z);
-                        v->x += cphi * rv->x * r + sphi * launchsrc->param1.x;
-                        v->y += cphi * rv->y * r + sphi * launchsrc->param1.y;
-                        v->z += cphi * rv->z * r + sphi * launchsrc->param1.z;
+                        cphi *= rsqrt(rv->x * rv->x + rv->y * rv->y + rv->z * rv->z);
+                        v->x += cphi * rv->x + sphi * launchsrc->param1.x;
+                        v->y += cphi * rv->y + sphi * launchsrc->param1.y;
+                        v->z += cphi * rv->z + sphi * launchsrc->param1.z;
                         r = rsqrt(v->x * v->x + v->y * v->y + v->z * v->z);
                         v->x *= r;
                         v->y *= r;
                         v->z *= r;
                     }
 
-                    *rv = float3(rv->x + (launchsrc->param1.x) * 0.5f,
-                                 rv->y + (launchsrc->param1.y) * 0.5f,
-                                 rv->z + (launchsrc->param1.z) * 0.5f);
+                    *rv = float3(launchsrc->pos.x + (launchsrc->param1.x) * 0.5f,
+                                 launchsrc->pos.y + (launchsrc->param1.y) * 0.5f,
+                                 launchsrc->pos.z + (launchsrc->param1.z) * 0.5f);
                     canfocus = (gcfg->srctype == MCX_SRC_SLIT);
                     break;
                 }