Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nvidia remastered #464

Merged
merged 3 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions constantine/math_compiler/codegen_amdgpu.nim
Original file line number Diff line number Diff line change
Expand Up @@ -67,45 +67,6 @@ proc hipDeviceInit*(deviceID = 0'i32): HipDevice =

return hipDevice

# ############################################################
#
# LLVM IR for AMD GPUs
#
# ############################################################
#
# Note:
# __device__ functions for field and elliptic curve arithmetic
# might be compiled by default with scalar codegen
#
# We will need to either:
# - Derive explicitly a vectorized version of the warp/wave size (32)
# - Derive implicitly a vectorized version, probably with __forceinline__

proc wrapInCallableHipKernel*(module: ModuleRef, fn: FnDef) =
## Create a public wrapper of a Hip device function
##
## A function named `addmod` can be found by appending _public
## check hipModuleGetFunction(fnPointer, cuModule, "addmod_public")

let pubName = fn.fnImpl.getName() & "_public"
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)

let ctx = module.getContext()
let builder = ctx.createBuilder()
defer: builder.dispose()

let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
builder.positionAtEnd(blck)

var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
for i, arg in mpairs(args):
arg = pubFn.getParam(i.uint32)
discard builder.call2(fn.fnTy, fn.fnImpl, args)

# A public kernel must return void
builder.retVoid()
pubFn.setCallingConvention(AMDGPU_KERNEL)

# ############################################################
#
# Code generation
Expand Down
35 changes: 1 addition & 34 deletions constantine/math_compiler/codegen_nvidia.nim
Original file line number Diff line number Diff line change
Expand Up @@ -109,37 +109,6 @@ proc cudaDeviceInit*(deviceID = 0'i32): CUdevice =

return cuDevice

# ############################################################
#
# LLVM IR for Nvidia GPUs
#
# ############################################################

proc wrapInCallableCudaKernel*(module: ModuleRef, fn: FnDef) =
## Create a public wrapper of a cuda device function
##
## A function named `addmod` can be found by appending _public
## check cuModuleGetFunction(fnPointer, cuModule, "addmod_public")

let pubName = fn.fnImpl.getName() & "_public"
let pubFn = module.addFunction(cstring(pubName), fn.fnTy)

let ctx = module.getContext()
let builder = ctx.createBuilder()
defer: builder.dispose()

let blck = ctx.appendBasicBlock(pubFn, "publicKernelBody")
builder.positionAtEnd(blck)

var args = newSeq[ValueRef](fn.fnTy.countParamTypes())
for i, arg in mpairs(args):
arg = pubFn.getParam(i.uint32)
discard builder.call2(fn.fnTy, fn.fnImpl, args)

# A public kernel must return void
builder.retVoid()
module.tagCudaKernel((fn.fnTy, pubFn))

# ############################################################
#
# Code generation
Expand Down Expand Up @@ -194,9 +163,7 @@ proc codegenNvidiaPTX*(asy: Assembler_LLVM, sm: tuple[major, minor: int32]): str
#
# ############################################################

proc getCudaKernel*(cuMod: CUmodule, cm: CurveMetadata, opcode: Opcode): CUfunction =
# Public kernels are appended _public
let fnName = cm.genSymbol(opcode) & "_public"
proc getCudaKernel*(cuMod: CUmodule, fnName: string): CUfunction =
check cuModuleGetFunction(result, cuMod, fnName)

proc exec*[T](jitFn: CUfunction, r: var T, a, b: T) =
Expand Down
34 changes: 34 additions & 0 deletions constantine/math_compiler/impl_fields_dispatch.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
constantine/platforms/llvm/llvm,
./ir,
./impl_fields_sat {.all.},
./impl_fields_nvidia {.all.}

proc modadd*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of {bkX86_64_Linux, bkAmdGpu}:
asy.modadd_sat(fd, r, a, b, M)
of bkNvidiaPTX:
asy.modadd_nvidia(fd, r, a, b, M)

proc modsub*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of bkNvidiaPTX:
asy.modsub_nvidia(fd, r, a, b, M)
else:
doAssert false, "Unimplemented"

proc mtymul*(asy: Assembler_LLVM, fd: FieldDescriptor, r, a, b, M: ValueRef) =
case asy.backend
of bkNvidiaPTX:
asy.mtymul_nvidia(fd, r, a, b, M)
else:
doAssert false, "Unimplemented"
3 changes: 1 addition & 2 deletions constantine/math_compiler/impl_fields_globals.nim
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ proc getM0ninv*(asy: Assembler_LLVM, fd: FieldDescriptor): ValueRef =
fd.wordTy
)


return m0ninv
return asy.load2(fd.wordTy, m0ninv, "m0ninv")

when isMainModule:
let asy = Assembler_LLVM.new("test_module", bkX86_64_Linux)
Expand Down
Loading
Loading