Skip to content

Commit

Permalink
Untested: a quick approx. get_used_memory
Browse files Browse the repository at this point in the history
Progress toward #245.
  • Loading branch information
lukstafi committed Oct 14, 2024
1 parent bd0dc98 commit 1953872
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 4 deletions.
11 changes: 9 additions & 2 deletions arrayjit/lib/backend_types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ module type No_device_backend = sig

val alloc_buffer : ?old_buffer:buffer_ptr * int -> size_in_bytes:int -> unit -> buffer_ptr

val get_used_memory : unit -> int
(** Returns (an upper bound of) the memory used for arrays, in bytes. *)

val compile : ?shared:bool -> ?name:string -> Indexing.unit_bindings -> Assignments.comp -> code
(** If [~shared:true] (default [false]), the backend should prefer to do more compile work in a
device-and-stream-agnostic way. If [~shared:false], the backend can opt to postpone compiling
Expand Down Expand Up @@ -294,14 +297,18 @@ module type Lowered_backend = sig
type stream [@@deriving sexp_of]

val alloc_buffer : ?old_buffer:buffer_ptr * int -> size_in_bytes:int -> stream -> buffer_ptr

val get_used_memory : unit -> int
(** Returns (an upper bound of) the memory used for arrays, in bytes. *)

val init : stream -> context
val await : stream -> unit
val is_idle : stream -> bool
val all_work : stream -> event

val scheduled_merge_node : stream -> Tnode.t option
(** [scheduled_merge_node stream] is the tensor node that would be in the [stream]'s merge
buffer right after [await stream]. *)
(** [scheduled_merge_node stream] is the tensor node that would be in the [stream]'s merge buffer
right after [await stream]. *)

val num_devices : unit -> int
val suggested_num_streams : device -> int
Expand Down
6 changes: 6 additions & 0 deletions arrayjit/lib/backends.ml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ struct
let alloc_buffer ?old_buffer ~size_in_bytes _stream =
Backend.alloc_buffer ?old_buffer ~size_in_bytes ()

let get_used_memory = Backend.get_used_memory

type device = stream [@@deriving sexp_of]
type code = Backend.code [@@deriving sexp_of]
type code_batch = Backend.code_batch [@@deriving sexp_of]
Expand Down Expand Up @@ -368,6 +370,8 @@ module Sync_backend (Backend : Backend_types.No_device_backend) : Backend_types.
let alloc_buffer ?old_buffer ~size_in_bytes _stream =
Backend.alloc_buffer ?old_buffer ~size_in_bytes ()

let get_used_memory = Backend.get_used_memory

type device = CPU [@@deriving sexp_of]
type code = Backend.code [@@deriving sexp_of]
type code_batch = Backend.code_batch [@@deriving sexp_of]
Expand Down Expand Up @@ -700,6 +704,8 @@ module Lowered_no_device_backend (Backend : Backend_types.Lowered_no_device_back

let get_buffer tn context =
Map.find (Backend.ctx_arrays context) tn |> Option.map ~f:Backend.buffer_ptr

let get_used_memory = Ndarray.get_used_memory
end

module C_device : Backend_types.No_device_backend = Lowered_no_device_backend ((
Expand Down
4 changes: 4 additions & 0 deletions arrayjit/lib/cuda_backend.cudajit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ let alloc_buffer ?old_buffer ~size_in_bytes stream =
set_ctx stream.device.primary_context;
Cu.Deviceptr.mem_alloc ~size_in_bytes

let get_used_memory () =
let free, total = Cudajit.Device.get_free_and_total_mem () in
total - free

let opt_alloc_merge_buffer ~size_in_bytes phys_dev =
if phys_dev.copy_merge_buffer_capacity < size_in_bytes then (
set_ctx phys_dev.primary_context;
Expand Down
15 changes: 13 additions & 2 deletions arrayjit/lib/ndarray.ml
Original file line number Diff line number Diff line change
Expand Up @@ -378,20 +378,31 @@ let ptr_to_string_hum nd =

(** {2 *** Creating ***} *)

let used_memory = Atomic.make 0

let create_array ~debug:_debug prec ~dims init_op =
let size_in_bytes =
(if Array.length dims = 0 then 0 else Array.reduce_exn dims ~f:( * )) * Ops.prec_in_bytes prec
in
let%diagn2_sexp finalizer _result =
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
[%log "Deleting", _debug, ptr_to_string_hum _result]
in
let f prec = as_array prec @@ create_bigarray prec ~dims init_op in
let result = Ops.map_prec { f } prec in
Stdlib.Gc.finalise finalizer result;
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
[%debug2_sexp
[%log_block
"create_array";
[%log _debug, ptr_to_string_hum result]]];
let%debug2_sexp debug_finalizer _result = [%log "Deleting", _debug, ptr_to_string_hum _result] in
if Utils.settings.log_level > 1 then Stdlib.Gc.finalise debug_finalizer result;
result

let empty_array prec =
create_array prec ~dims:[||] (Constant_fill { values = [| 0.0 |]; strict = false })

let get_used_memory () = Atomic.get used_memory

(** {2 *** Printing ***} *)

(** Dimensions to string, ["x"]-separated, e.g. 1x2x3 for batch dims 1, input dims 3, output dims 2.
Expand Down

0 comments on commit 1953872

Please sign in to comment.