Skip to content

Commit

Permalink
Deploying to gh-pages from @ 92d58c8 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
utkinis committed Oct 15, 2024
1 parent 4bd24b9 commit 7c3fc31
Show file tree
Hide file tree
Showing 23 changed files with 196 additions and 151 deletions.
2 changes: 1 addition & 1 deletion 404.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport content="width=device-width, initial-scale=1"> <script src="/libs/lunr/lunr.min.js"></script> <script src="/libs/lunr/lunr_index.js"></script> <script src="/libs/lunr/lunrclient.min.js"></script> <link rel=stylesheet href="/css/franklin.css"> <link rel=stylesheet href="/css/poole_hyde.css"> <link rel=stylesheet href="/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon href="/assets/favicon.png"> <title>404: File not found</title> <style> .content {max-width: 50rem} </style> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/assets/vaw_logo.png" style="width: 180px; height: auto; display: inline"> <div style="font-weight: margin-bottom: 0.5em"><a href="/"> Fall 2024</a> <span style="opacity: 0.7;">| <a href="https://www.vorlesungen.ethz.ch/Vorlesungsverzeichnis/lerneinheit.view?semkez=2024W&ansicht=KATALOGDATEN&lerneinheitId=182481&lang=en"> ETHZ 101-0250-00</a></span></div> <br> <h1><a href="/">Solving partial differential equations in parallel on GPUs</a></h1> <div style="line-height:18px; font-size: 15px; opacity: 0.85">by &nbsp; <a href="https://vaw.ethz.ch/en/people/person-detail.MjcwOTYw.TGlzdC8xOTYxLDE1MTczNjI1ODA=.html">Ludovic Räss</a>, &nbsp; <a href="https://vaw.ethz.ch/en/personen/person-detail.html?persid=124402">Mauro Werder</a>, &nbsp; <a href="https://www.cscs.ch/about/staff/">Samuel Omlin</a> & <br> <a href="https://vaw.ethz.ch/en/people/person-detail.MzAwMjIy.TGlzdC8xOTYxLDE1MTczNjI1ODA=.html">Ivan Utkin</a> </div> </div> <br> <style> </style> <nav class=sidebar-nav style="opacity: 0.9; margin-bottom: 1.2cm;"> <a class="sidebar-nav-item " href="/"><b>Welcome</b></a> <a class="sidebar-nav-item " href="/logistics/">Logistics</a> <a class="sidebar-nav-item " href="/homework/">Homeworks</a> <a class="sidebar-nav-item " href="/software_install/">Software install</a> <a class="sidebar-nav-item " href="/extras/">Extras</a> <br> <div class=course-section >Part 1 – Introduction</div> <a class="sidebar-nav-item " href="/lecture1/">Lecture 1 – Why Julia GPU</a> <a class="sidebar-nav-item " href="/lecture2/">Lecture 2 – PDEs & physical processes</a> <a class="sidebar-nav-item " href="/lecture3/">Lecture 3 – Solving elliptic PDEs</a> <div class=course-section >Part 2 – Solving PDEs on GPUs</div> <a class="sidebar-nav-item " href="/lecture4/">Lecture 4 – Porous convection</a> <a class="sidebar-nav-item " href="/lecture5/">Lecture 5 – Parallel computing</a> <a class="sidebar-nav-item " href="/lecture6/">Lecture 6 – GPU computing</a> <div class=course-section >Part 3 – Multi-GPU computing (projects)</div> <a class="sidebar-nav-item " href="/lecture7/">Lecture 7 – xPU computing</a> <a class="sidebar-nav-item " href="/lecture8/">Lecture 8 – Julia MPI & multi-xPU</a> <a class="sidebar-nav-item " href="/lecture9/">Lecture 9 – Multi-xPU & Projects</a> <a class="sidebar-nav-item " href="/lecture10/">Lecture 10 – Advanced optimisations</a> <div class=course-section >Final Projects</div> <a class="sidebar-nav-item " href="/final_proj/">Infos about final projects</a> </nav> <form id=lunrSearchForm name=lunrSearchForm > <input class=search-input name=q placeholder="Enter search term" type=text > <input type=submit value=Search formaction="/search/index.html"> </form> <br> <br> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=404_file_not_found ><a href="#404_file_not_found" class=header-anchor >404: File not found</a></h1> <p>The requested file was not found.</p> <p>Please <a href="/">click here</a> to go to the home page.</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/eth-vaw-glaciology/course-101-0250-00/"><b>Edit this page on <img class=github-logo src="https://unpkg.com/[email protected]/dist/svg/logo-github.svg"></b></a><br> Last modified: October 08, 2024. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport content="width=device-width, initial-scale=1"> <script src="/libs/lunr/lunr.min.js"></script> <script src="/libs/lunr/lunr_index.js"></script> <script src="/libs/lunr/lunrclient.min.js"></script> <link rel=stylesheet href="/css/franklin.css"> <link rel=stylesheet href="/css/poole_hyde.css"> <link rel=stylesheet href="/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon href="/assets/favicon.png"> <title>404: File not found</title> <style> .content {max-width: 50rem} </style> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/assets/vaw_logo.png" style="width: 180px; height: auto; display: inline"> <div style="font-weight: margin-bottom: 0.5em"><a href="/"> Fall 2024</a> <span style="opacity: 0.7;">| <a href="https://www.vorlesungen.ethz.ch/Vorlesungsverzeichnis/lerneinheit.view?semkez=2024W&ansicht=KATALOGDATEN&lerneinheitId=182481&lang=en"> ETHZ 101-0250-00</a></span></div> <br> <h1><a href="/">Solving partial differential equations in parallel on GPUs</a></h1> <div style="line-height:18px; font-size: 15px; opacity: 0.85">by &nbsp; <a href="https://vaw.ethz.ch/en/people/person-detail.MjcwOTYw.TGlzdC8xOTYxLDE1MTczNjI1ODA=.html">Ludovic Räss</a>, &nbsp; <a href="https://vaw.ethz.ch/en/personen/person-detail.html?persid=124402">Mauro Werder</a>, &nbsp; <a href="https://www.cscs.ch/about/staff/">Samuel Omlin</a> & <br> <a href="https://vaw.ethz.ch/en/people/person-detail.MzAwMjIy.TGlzdC8xOTYxLDE1MTczNjI1ODA=.html">Ivan Utkin</a> </div> </div> <br> <style> </style> <nav class=sidebar-nav style="opacity: 0.9; margin-bottom: 1.2cm;"> <a class="sidebar-nav-item " href="/"><b>Welcome</b></a> <a class="sidebar-nav-item " href="/logistics/">Logistics</a> <a class="sidebar-nav-item " href="/homework/">Homeworks</a> <a class="sidebar-nav-item " href="/software_install/">Software install</a> <a class="sidebar-nav-item " href="/extras/">Extras</a> <br> <div class=course-section >Part 1 – Introduction</div> <a class="sidebar-nav-item " href="/lecture1/">Lecture 1 – Why Julia GPU</a> <a class="sidebar-nav-item " href="/lecture2/">Lecture 2 – PDEs & physical processes</a> <a class="sidebar-nav-item " href="/lecture3/">Lecture 3 – Solving elliptic PDEs</a> <div class=course-section >Part 2 – Solving PDEs on GPUs</div> <a class="sidebar-nav-item " href="/lecture4/">Lecture 4 – Porous convection</a> <a class="sidebar-nav-item " href="/lecture5/">Lecture 5 – Parallel computing</a> <a class="sidebar-nav-item " href="/lecture6/">Lecture 6 – GPU computing</a> <div class=course-section >Part 3 – Multi-GPU computing (projects)</div> <a class="sidebar-nav-item " href="/lecture7/">Lecture 7 – xPU computing</a> <a class="sidebar-nav-item " href="/lecture8/">Lecture 8 – Julia MPI & multi-xPU</a> <a class="sidebar-nav-item " href="/lecture9/">Lecture 9 – Multi-xPU & Projects</a> <a class="sidebar-nav-item " href="/lecture10/">Lecture 10 – Advanced optimisations</a> <div class=course-section >Final Projects</div> <a class="sidebar-nav-item " href="/final_proj/">Infos about final projects</a> </nav> <form id=lunrSearchForm name=lunrSearchForm > <input class=search-input name=q placeholder="Enter search term" type=text > <input type=submit value=Search formaction="/search/index.html"> </form> <br> <br> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=404_file_not_found ><a href="#404_file_not_found" class=header-anchor >404: File not found</a></h1> <p>The requested file was not found.</p> <p>Please <a href="/">click here</a> to go to the home page.</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/eth-vaw-glaciology/course-101-0250-00/"><b>Edit this page on <img class=github-logo src="https://unpkg.com/[email protected]/dist/svg/logo-github.svg"></b></a><br> Last modified: October 15, 2024. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
4 changes: 2 additions & 2 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ version = "1.11.0"

[[deps.JLLWrappers]]
deps = ["Artifacts", "Preferences"]
git-tree-sha1 = "f389674c99bfcde17dc57454011aa44d5a260a40"
git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.6.0"
version = "1.6.1"

[[deps.JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
Expand Down
85 changes: 50 additions & 35 deletions assets/literate/l5_1-cpu-parallel_web.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,26 +145,28 @@ As first task, we'll compute the $T_\mathrm{eff}$ for the 2D fluid pressure (dif
- Compute the elapsed time `t_toc` at the end of the time loop and report:

````julia:ex1
t_toc = ...
A_eff = ... # Effective main memory access per iteration [GB]
t_it = ... # Execution time per iteration [s]
T_eff = A_eff/t_it # Effective memory throughput [GB/s]
t_toc = Base.time() - t_tic
A_eff = (3*2)/1e9*nx*ny*sizeof(Float64) # Effective main memory access per iteration [GB]
t_it = t_toc/niter # Execution time per iteration [s]
T_eff = A_eff/t_it # Effective memory throughput [GB/s]
````

- Report `t_toc`, `T_eff` and `niter` at the end of the code, formatting output using `@printf()` macro.
- Round `T_eff` to the 3rd significant digit.

```julia
@printf("Time = %1.3f sec, ... \n", t_toc, ...)
@printf("Time = %1.3f sec, T_eff = %1.2f GB/s (niter = %d)\n", t_toc, round(T_eff, sigdigits=3), niter)
```

### Deactivate visualisation (and error checking)
- Use keyword arguments ("kwargs") to allow for default behaviour
- Define a `do_check` flag set to `false`

````julia:ex2
function Pf_diffusion_2D(;??)
function Pf_diffusion_2D(;do_check=false)
if do_check && (iter%ncheck == 0)
...
end
return
end
````
Expand Down Expand Up @@ -217,19 +219,19 @@ The goal is now to write out the diffusion physics in a loop fashion over $x$ an
Implement a nested loop, taking car of bounds and staggering.

````julia:ex6
for iy=??
for ix=??
qDx[??] -= (qDx[??] + k_ηf_dx* ?? )*_1_θ_dτ
for iy=1:ny
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*(Pf[ix+1,iy]-Pf[ix,iy]))*_1_θ_dτ
end
end
for iy=??
for ix=??
qDy[??] -= (qDy[??] + k_ηf_dy* ?? )*_1_θ_dτ
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*(Pf[ix,iy+1]-Pf[ix,iy]))*_1_θ_dτ
end
end
for iy=??
for ix=??
Pf[??] -= ??
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= ((qDx[ix+1,iy]-qDx[ix,iy])*_dx + (qDy[ix,iy+1]-qDy[ix,iy])*_dy)*_β_dτ
end
end
````
Expand All @@ -239,26 +241,26 @@ We could now use macros to make the code nicer and clearer. Macro expression wil
Let's use macros to replace the derivative implementations

````julia:ex7
macro d_xa(A) esc(:( $A[??]-$A[??] )) end
macro d_ya(A) esc(:( $A[??]-$A[??] )) end
macro d_xa(A) esc(:( $A[ix+1,iy]-$A[ix,iy] )) end
macro d_ya(A) esc(:( $A[ix,iy+1]-$A[ix,iy] )) end
````

And update the code within the iteration loop:

````julia:ex8
for iy=??
for ix=??
qDx[??] -= (qDx[??] + k_ηf_dx* ?? )*_1_θ_dτ
for iy=1:ny
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*@d_xa(Pf))*_1_θ_dτ
end
end
for iy=??
for ix=??
qDy[??] -= (qDy[??] + k_ηf_dy* ?? )*_1_θ_dτ
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*@d_ya(Pf))*_1_θ_dτ
end
end
for iy=??
for ix=??
Pf[??] -= ??
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= (@d_xa(qDx)*_dx + @d_ya(qDy)*_dy)*_β_dτ
end
end
````
Expand All @@ -278,15 +280,28 @@ In this last step, the goal is to define `compute` functions to hold the physics
Create a `compute_flux!()` and `compute_Pf!()` functions that take input and output arrays and needed scalars as argument and return nothing.

````julia:ex9
function compute_flux!(...)
function compute_flux!(qDx,qDy,Pf,k_ηf_dx,k_ηf_dy,_1_θ_dτ)
nx,ny=size(Pf)
...
for iy=1:ny,
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*@d_xa(Pf))*_1_θ_dτ
end
end
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*@d_ya(Pf))*_1_θ_dτ
end
end
return nothing
end
function update_Pf!(Pf,...)
function update_Pf!(Pf,qDx,qDy,_dx,_dy,_β_dτ)
nx,ny=size(Pf)
...
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= (@d_xa(qDx)*_dx + @d_ya(qDy)*_dy)*_β_dτ
end
end
return nothing
end
````
Expand All @@ -306,18 +321,18 @@ Let's evaluate the performance of our code using `BenchmarkTools`. We will need
The `compute!()` function:

````julia:ex10
function compute!(Pf,qDx,qDy, ???)
compute_flux!(...)
update_Pf!(...)
function compute!(Pf,qDx,qDy,k_ηf_dx,k_ηf_dy,_1_θ_dτ,_dx,_dy,_β_dτ)
compute_flux!(qDx,qDy,Pf,k_ηf_dx,k_ηf_dy,_1_θ_dτ)
update_Pf!(Pf,qDx,qDy,_dx,_dy,_β_dτ)
return nothing
end
````

can then be called using `@belapsed` to return elapsed time for a single iteration, letting `BenchmarkTools` taking car about sampling

````julia:ex11
t_toc = @belapsed compute!($Pf,$qDx,$qDy,???)
niter = ???
t_toc = @belapsed compute!($Pf,$qDx,$qDy,$k_ηf_dx,$k_ηf_dy,$_1_θ_dτ,$_dx,$_dy,$_β_dτ)
niter = 1
````

\note{Note that variables need to be interpolated into the function call, thus taking a `$` in front.}
Expand Down
83 changes: 49 additions & 34 deletions assets/literate/l5_1-cpu-parallel_web_script.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# This file was generated, do not modify it.

t_toc = ...
A_eff = ... # Effective main memory access per iteration [GB]
t_it = ... # Execution time per iteration [s]
T_eff = A_eff/t_it # Effective memory throughput [GB/s]
t_toc = Base.time() - t_tic
A_eff = (3*2)/1e9*nx*ny*sizeof(Float64) # Effective main memory access per iteration [GB]
t_it = t_toc/niter # Execution time per iteration [s]
T_eff = A_eff/t_it # Effective memory throughput [GB/s]

function Pf_diffusion_2D(;??)
function Pf_diffusion_2D(;do_check=false)
if do_check && (iter%ncheck == 0)
...
end
return
end

Expand All @@ -16,58 +18,71 @@ _1_θ_dτ = 1.0./(1.0 + θ_dτ)

_dx, _dy = 1.0/dx, 1.0/dy

for iy=??
for ix=??
qDx[??] -= (qDx[??] + k_ηf_dx* ?? )*_1_θ_dτ
for iy=1:ny
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*(Pf[ix+1,iy]-Pf[ix,iy]))*_1_θ_dτ
end
end
for iy=??
for ix=??
qDy[??] -= (qDy[??] + k_ηf_dy* ?? )*_1_θ_dτ
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*(Pf[ix,iy+1]-Pf[ix,iy]))*_1_θ_dτ
end
end
for iy=??
for ix=??
Pf[??] -= ??
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= ((qDx[ix+1,iy]-qDx[ix,iy])*_dx + (qDy[ix,iy+1]-qDy[ix,iy])*_dy)*_β_dτ
end
end

macro d_xa(A) esc(:( $A[??]-$A[??] )) end
macro d_ya(A) esc(:( $A[??]-$A[??] )) end
macro d_xa(A) esc(:( $A[ix+1,iy]-$A[ix,iy] )) end
macro d_ya(A) esc(:( $A[ix,iy+1]-$A[ix,iy] )) end

for iy=??
for ix=??
qDx[??] -= (qDx[??] + k_ηf_dx* ?? )*_1_θ_dτ
for iy=1:ny
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*@d_xa(Pf))*_1_θ_dτ
end
end
for iy=??
for ix=??
qDy[??] -= (qDy[??] + k_ηf_dy* ?? )*_1_θ_dτ
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*@d_ya(Pf))*_1_θ_dτ
end
end
for iy=??
for ix=??
Pf[??] -= ??
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= (@d_xa(qDx)*_dx + @d_ya(qDy)*_dy)*_β_dτ
end
end

function compute_flux!(...)
function compute_flux!(qDx,qDy,Pf,k_ηf_dx,k_ηf_dy,_1_θ_dτ)
nx,ny=size(Pf)
...
for iy=1:ny,
for ix=1:nx-1
qDx[ix+1,iy] -= (qDx[ix+1,iy] + k_ηf_dx*@d_xa(Pf))*_1_θ_dτ
end
end
for iy=1:ny-1
for ix=1:nx
qDy[ix,iy+1] -= (qDy[ix,iy+1] + k_ηf_dy*@d_ya(Pf))*_1_θ_dτ
end
end
return nothing
end

function update_Pf!(Pf,...)
function update_Pf!(Pf,qDx,qDy,_dx,_dy,_β_dτ)
nx,ny=size(Pf)
...
for iy=1:ny
for ix=1:nx
Pf[ix,iy] -= (@d_xa(qDx)*_dx + @d_ya(qDy)*_dy)*_β_dτ
end
end
return nothing
end

function compute!(Pf,qDx,qDy, ???)
compute_flux!(...)
update_Pf!(...)
function compute!(Pf,qDx,qDy,k_ηf_dx,k_ηf_dy,_1_θ_dτ,_dx,_dy,_β_dτ)
compute_flux!(qDx,qDy,Pf,k_ηf_dx,k_ηf_dy,_1_θ_dτ)
update_Pf!(Pf,qDx,qDy,_dx,_dy,_β_dτ)
return nothing
end

t_toc = @belapsed compute!($Pf,$qDx,$qDy,???)
niter = ???
t_toc = @belapsed compute!($Pf,$qDx,$qDy,$k_ηf_dx,$k_ηf_dy,$_1_θ_dτ,$_dx,$_dy,$_β_dτ)
niter = 1
Loading

0 comments on commit 7c3fc31

Please sign in to comment.