@@ -160,54 +160,53 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
160
160
* @brief The maximum reuse margin for a buffer.
161
161
*/
162
162
static const size_t max_reuse_margin = 1ull << 22 ; // 4MB
163
-
163
+
164
164
/* *
165
165
* @brief The minimum free margin for a buffer.
166
166
*/
167
167
static const size_t min_free_margin = 1ull << 20 ; // 1MB
168
-
169
-
168
+
170
169
/* *
171
170
* @brief The alignment for buffer allocation.
172
171
*/
173
- static const size_t alignment = 128 ;
174
-
172
+ static const size_t alignment = 128 ;
173
+
175
174
/* *
176
175
* @brief The device ID associated with this buffer pool.
177
176
*/
178
177
int device;
179
-
178
+
180
179
/* *
181
180
* @brief Whether to disable clean during buffer allocation.
182
181
*/
183
182
bool disable_clean = false ;
184
-
183
+
185
184
/* *
186
185
* @brief Structure representing a CANN buffer.
187
186
*/
188
187
struct ggml_cann_buffer {
189
188
void * ptr = nullptr ; // /< Pointer to the buffer.
190
189
size_t size = 0 ; // /< Size of the buffer.
191
190
std::chrono::steady_clock::time_point last_used; // /< Last used time.
192
-
191
+
193
192
bool operator >(const ggml_cann_buffer& other) const {
194
193
return size > other.size ;
195
194
}
196
195
};
197
-
196
+
198
197
/* *
199
198
* @brief Array of CANN buffers in the pool.
200
199
*/
201
200
std::unordered_map<void *, size_t > buffer_pool;
202
201
std::priority_queue<ggml_cann_buffer,
203
202
std::vector<ggml_cann_buffer>,
204
203
std::greater<>> free_buffers ;
205
-
204
+
206
205
/* *
207
206
* @brief Total size of all buffers in the pool.
208
207
*/
209
208
size_t pool_size = 0 ;
210
-
209
+
211
210
/* *
212
211
* @brief Constructor to initialize the buffer pool for a specific device.
213
212
*
@@ -216,7 +215,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
216
215
explicit ggml_cann_pool_buf_prio (int device) : device(device) {
217
216
disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN" ) != nullptr ;
218
217
}
219
-
218
+
220
219
/* *
221
220
* @brief Destructor to free all buffers in the pool.
222
221
*/
@@ -229,7 +228,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
229
228
buffer_pool.clear ();
230
229
GGML_ASSERT (pool_size == 0 );
231
230
}
232
-
231
+
233
232
/* *
234
233
* @brief Allocate a buffer of the given size.
235
234
*
@@ -243,16 +242,16 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
243
242
if (size == 0 ) {
244
243
size = alignment;
245
244
}
246
-
245
+
247
246
void * ptr = nullptr ;
248
247
auto now = std::chrono::steady_clock::now ();
249
-
248
+
250
249
std::vector<ggml_cann_buffer> free_buffers_rest;
251
250
free_buffers_rest.reserve (free_buffers.size ());
252
251
while (!free_buffers.empty ()) {
253
252
auto b = free_buffers.top ();
254
253
free_buffers.pop ();
255
-
254
+
256
255
if (b.size >= size) {
257
256
// reuse the buffer if the size is enough
258
257
const size_t margin = b.size - size;
@@ -273,7 +272,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
273
272
break ;
274
273
}
275
274
}
276
-
275
+
277
276
bool should_clean = !disable_clean &&
278
277
b.size > min_free_margin &&
279
278
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
@@ -298,14 +297,14 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
298
297
for (ggml_cann_buffer &b : free_buffers_rest) {
299
298
free_buffers.push (std::move (b));
300
299
}
301
-
300
+
302
301
#ifdef DEBUG_CANN_MALLOC
303
302
GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n " , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
304
303
#endif
305
304
if (ptr != nullptr ) {
306
305
return ptr;
307
306
}
308
-
307
+
309
308
// allocate a new buffer if no buffer can be reused
310
309
ggml_cann_set_device (device);
311
310
ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
@@ -322,7 +321,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
322
321
buffer_pool.emplace (ptr, size);
323
322
return ptr;
324
323
}
325
-
324
+
326
325
/* *
327
326
* @brief Free a buffer and return it to the pool.
328
327
*
@@ -334,7 +333,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
334
333
if (it == buffer_pool.end ()) {
335
334
GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n " , device, ptr);
336
335
}
337
-
336
+
338
337
auto now = std::chrono::steady_clock::now ();
339
338
free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
340
339
#ifdef DEBUG_CANN_MALLOC
@@ -346,7 +345,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
346
345
#endif
347
346
}
348
347
};
349
-
348
+
350
349
/* *
351
350
* @brief A pool of CANN buffers(segment buffer).
352
351
*
0 commit comments