From 3892e131dd1e4aa41ef4df0179f2b29e6b18860c Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 20 Jun 2013 02:06:53 -0400 Subject: [PATCH] improve Array buffer management - make it an error to resize an array with shared data (fixes #3430) - now able to use realloc to grow arrays (part of #3440, helps #3441) - the new scheme is simpler. one Array owns the data, instead of tracking the buffers separately as mallocptr_t - Array data can be allocated inline, with malloc, or from a pool --- src/array.c | 194 ++++++++++++---------------- src/gc.c | 364 +++++++++++++++++++++++++++++----------------------- src/julia.h | 45 +++---- 3 files changed, 307 insertions(+), 296 deletions(-) diff --git a/src/array.c b/src/array.c index 645cb0e0a9ef7..ad4c12d3b4a82 100644 --- a/src/array.c +++ b/src/array.c @@ -21,6 +21,9 @@ int jl_array_store_unboxed(jl_value_t *el_type) return store_unboxed(el_type); } +// at this size use malloc +#define MALLOC_THRESH 1048576 + #ifdef _P64 typedef __uint128_t wideint_t; #else @@ -64,44 +67,32 @@ static jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *dims) } int ndimwords = jl_array_ndimwords(ndims); - size_t tsz = sizeof(jl_array_t)-sizeof(void*); + size_t tsz = sizeof(jl_array_t); tsz += ndimwords*sizeof(size_t); if (tot <= ARRAY_INLINE_NBYTES) { - size_t basesz = tsz; if (isunboxed && elsz >= 4) tsz = (tsz+15)&-16; // align data area 16 size_t doffs = tsz; tsz += tot; - if (((tsz&0xf) == 0) && tsz == basesz) { - // leave at least 1 word at end for owner pointer - tsz += sizeof(void*); - } tsz = (tsz+15)&-16; // align whole object 16 a = allocobj(tsz); a->type = atype; - a->ismalloc = 0; - a->isinline = 1; + a->how = 0; data = (char*)a + doffs; if (tot > 0 && !isunboxed) { memset(data, 0, tot); } } else { - if ((tsz&0xf) == 0) { - // leave at least 1 word at end for owner pointer - tsz += sizeof(void*); - } tsz = (tsz+15)&-16; // align whole object size 16 a = allocobj(tsz); JL_GC_PUSH1(&a); a->type = atype; - a->ismalloc = 1; - a->isinline = 0; // temporarily initialize to make gc-safe a->data = NULL; - jl_value_t **powner = (jl_value_t**)(&a->_pad + ndimwords); - *powner = (jl_value_t*)jl_gc_managed_malloc(tot); - data = ((jl_mallocptr_t*)*powner)->ptr; + a->how = 2; + data = jl_gc_managed_malloc(tot); + jl_gc_track_malloced_array(a); if (!isunboxed) memset(data, 0, tot); JL_GC_POP(); @@ -115,22 +106,22 @@ static jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *dims) a->ndims = ndims; a->ptrarray = !isunboxed; a->elsize = elsz; + a->isshared = 0; + a->isaligned = 1; + a->offset = 0; if (ndims == 1) { a->nrows = nel; a->maxsize = nel; - a->offset = 0; } else { size_t *adims = &a->nrows; for(i=0; i < ndims; i++) adims[i] = dims[i]; } - + return a; } -static jl_mallocptr_t *array_new_buffer(jl_array_t *a, size_t newlen); - jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_tuple_t *dims) { size_t i; @@ -138,11 +129,12 @@ jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_tuple_t *di size_t ndims = jl_tuple_len(dims); int ndimwords = jl_array_ndimwords(ndims); - a = allocobj((sizeof(jl_array_t) + ndimwords*sizeof(size_t) + 15)&-16); + a = allocobj((sizeof(jl_array_t) + sizeof(void*) + ndimwords*sizeof(size_t) + 15)&-16); a->type = atype; a->ndims = ndims; + a->offset = 0; a->data = NULL; - a->isinline = 0; + a->isaligned = data->isaligned; jl_value_t *el_type = jl_tparam0(atype); if (store_unboxed(el_type)) { a->elsize = jl_datatype_size(el_type); @@ -154,40 +146,11 @@ jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_tuple_t *di } JL_GC_PUSH1(&a); - char *d = data->data; - if (data->ndims == 1) d -= data->offset*data->elsize; - if (data->isinline) { - if (data->ndims == 1 || - // also copy out data if aligned wrong - (((((size_t)d)&0x0f)!=0) && !a->ptrarray && a->elsize>=4)) { - // data might resize, so switch it to shared representation. - // problem: the buffer might be used from C in a way that it's - // assumed not to move. for now just hope this doesn't happen. - size_t datalen = jl_array_len(data); - jl_mallocptr_t *mp = array_new_buffer(data, datalen); - memcpy(mp->ptr, data->data, datalen * data->elsize); - a->data = mp->ptr; - jl_array_data_owner(a) = (jl_value_t*)mp; - a->ismalloc = 1; - - data->data = mp->ptr; - data->offset = 0; - data->maxsize = datalen; - jl_array_data_owner(data) = (jl_value_t*)mp; - data->ismalloc = 1; - data->isinline = 0; - } - else { - a->ismalloc = 0; - jl_array_data_owner(a) = (jl_value_t*)data; - } - } - else { - a->ismalloc = data->ismalloc; - jl_array_data_owner(a) = jl_array_data_owner(data); - } - - if (a->data == NULL) a->data = data->data; + jl_array_data_owner(a) = (jl_value_t*)data; + a->how = 3; + a->data = data->data; + a->isshared = 1; + data->isshared = 1; if (ndims == 1) { size_t l = jl_unbox_long(jl_tupleref(dims,0)); @@ -196,7 +159,6 @@ jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_tuple_t *di #endif a->nrows = l; a->maxsize = l; - a->offset = 0; } else { size_t *adims = &a->nrows; @@ -241,21 +203,19 @@ jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, size_t nel, a->elsize = elsz; a->ptrarray = !isunboxed; a->ndims = 1; - a->isinline = 0; - + a->isshared = 1; + a->isaligned = 0; // TODO: allow passing memalign'd buffers if (own_buffer) { - a->ismalloc = 1; - jl_array_data_owner(a) = (jl_value_t*)jl_gc_acquire_buffer(data,nel*elsz,0); + a->how = 2; + jl_gc_track_malloced_array(a); } else { - a->ismalloc = 0; - jl_array_data_owner(a) = (jl_value_t*)a; + a->how = 0; } a->nrows = nel; a->maxsize = nel; a->offset = 0; - return a; } @@ -291,21 +251,20 @@ jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, jl_tuple_t *dims, a->elsize = elsz; a->ptrarray = !isunboxed; a->ndims = ndims; - a->isinline = 0; - + a->offset = 0; + a->isshared = 1; + a->isaligned = 0; if (own_buffer) { - a->ismalloc = 1; - jl_array_data_owner(a) = (jl_value_t*)jl_gc_acquire_buffer(data,nel*elsz,0); + a->how = 2; + jl_gc_track_malloced_array(a); } else { - a->ismalloc = 0; - jl_array_data_owner(a) = (jl_value_t*)a; + a->how = 0; } if (ndims == 1) { a->nrows = nel; a->maxsize = nel; - a->offset = 0; } else { size_t *adims = &a->nrows; @@ -313,7 +272,6 @@ jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, jl_tuple_t *dims, adims[i] = jl_unbox_long(jl_tupleref(dims, i)); } } - return a; } @@ -524,42 +482,56 @@ void jl_arrayunset(jl_array_t *a, size_t i) memset(ptail, 0, a->elsize); } -static jl_mallocptr_t *array_new_buffer(jl_array_t *a, size_t newlen) +// allocate buffer of newlen elements, placing old data at given offset (in #elts) +static void array_resize_buffer(jl_array_t *a, size_t newlen, size_t oldlen, size_t offs) { - size_t nbytes = newlen * a->elsize; - if (a->elsize == 1) { + size_t es = a->elsize; + size_t nbytes = newlen * es; + size_t offsnb = offs * es; + size_t oldnbytes = oldlen * es; + size_t oldoffsnb = a->offset * es; + if (es == 1) nbytes++; + assert(!a->isshared); + char *newdata; + if (a->how == 2) { + // already malloc'd - use realloc + newdata = jl_gc_managed_realloc((char*)a->data - oldoffsnb, nbytes, + oldnbytes+oldoffsnb, a->isaligned); + if (offs != a->offset) { + memmove(&newdata[offsnb], &newdata[oldoffsnb], oldnbytes); + } } - jl_mallocptr_t *mp = jl_gc_managed_malloc(nbytes); - char *newdata = mp->ptr; - if (a->ptrarray) - memset(newdata, 0, nbytes); - if (a->elsize == 1) newdata[nbytes-1] = '\0'; - return mp; + else { + if (nbytes >= MALLOC_THRESH) { + newdata = jl_gc_managed_malloc(nbytes); + jl_gc_track_malloced_array(a); + a->how = 2; + a->isaligned = 1; + } + else { + newdata = allocb(nbytes); + a->how = 1; + } + memcpy(newdata + offsnb, (char*)a->data, oldnbytes); + } + + a->data = newdata + offsnb; + if (a->ptrarray || es==1) + memset(newdata+offsnb+oldnbytes, 0, nbytes-oldnbytes-offsnb); + a->maxsize = newlen; } void jl_array_grow_end(jl_array_t *a, size_t inc) { + if (a->isshared) jl_error("cannot resize array with shared data"); // optimized for the case of only growing and shrinking at the end - size_t alen = jl_array_len(a); + size_t alen = jl_array_nrows(a); if ((alen + inc) > a->maxsize - a->offset) { size_t newlen = a->maxsize==0 ? (inc<4?4:inc) : a->maxsize*2; while ((alen + inc) > newlen - a->offset) newlen *= 2; - jl_mallocptr_t *mp = array_new_buffer(a, newlen); - char *newdata = mp->ptr; - size_t es = a->elsize; - newdata += (a->offset*es); - size_t anb = alen*es; - memcpy(newdata, (char*)a->data, anb); - if (es == 1) { - memset(newdata + anb, 0, (newlen-a->offset-alen)*es); - } - a->maxsize = newlen; - a->data = newdata; - jl_array_data_owner(a) = (jl_value_t*)mp; - a->ismalloc = 1; - a->isinline = 0; + array_resize_buffer(a, newlen, alen, a->offset); } #ifdef STORE_ARRAY_LEN a->length += inc; @@ -569,6 +541,7 @@ void jl_array_grow_end(jl_array_t *a, size_t inc) void jl_array_del_end(jl_array_t *a, size_t dec) { + if (a->isshared) jl_error("cannot resize array with shared data"); if (dec > a->nrows) jl_throw(jl_bounds_exception); char *ptail = (char*)a->data + (a->nrows-dec)*a->elsize; @@ -596,39 +569,39 @@ void jl_array_sizehint(jl_array_t *a, size_t sz) void jl_array_grow_beg(jl_array_t *a, size_t inc) { + if (a->isshared) jl_error("cannot resize array with shared data"); // designed to handle the case of growing and shrinking at both ends if (inc == 0) return; size_t es = a->elsize; - size_t nb = inc*es; + size_t incnb = inc*es; if (a->offset >= inc) { - a->data = (char*)a->data - nb; + a->data = (char*)a->data - incnb; a->offset -= inc; } else { size_t alen = a->nrows; size_t anb = alen*es; - char *newdata; - jl_mallocptr_t *mp = NULL; if (inc > (a->maxsize-alen)/2 - (a->maxsize-alen)/20) { - size_t newlen = a->maxsize==0 ? 2*inc : a->maxsize*2; + size_t newlen = a->maxsize==0 ? inc*2 : a->maxsize*2; while (alen+2*inc > newlen-a->offset) newlen *= 2; - mp = array_new_buffer(a, newlen); - newdata = mp->ptr; size_t center = (newlen - (alen + inc))/2; - newdata += (center*es); - a->maxsize = newlen; + array_resize_buffer(a, newlen, alen, center+inc); + char *newdata = (char*)a->data - (center+inc)*es; + if (a->ptrarray) { + memset(newdata, 0, (center+inc)*es); + } a->offset = center; + a->data = newdata + center*es; } else { size_t center = (a->maxsize - (alen + inc))/2; - newdata = (char*)a->data - es*a->offset + es*center; + char *newdata = (char*)a->data - es*a->offset + es*center; + memmove(&newdata[incnb], a->data, anb); + a->data = newdata; a->offset = center; } - memmove(&newdata[nb], a->data, anb); - a->data = newdata; - if (mp) { jl_array_data_owner(a) = (jl_value_t*)mp; a->ismalloc = 1; a->isinline = 0; } } #ifdef STORE_ARRAY_LEN a->length += inc; @@ -638,6 +611,7 @@ void jl_array_grow_beg(jl_array_t *a, size_t inc) void jl_array_del_beg(jl_array_t *a, size_t dec) { + if (a->isshared) jl_error("cannot resize array with shared data"); if (dec == 0) return; if (dec > a->nrows) diff --git a/src/gc.c b/src/gc.c index 36c74a72f5e44..a043ea044a138 100644 --- a/src/gc.c +++ b/src/gc.c @@ -24,12 +24,6 @@ // OBJPROFILE counts objects by type //#define OBJPROFILE -#ifdef _P64 -# define BVOFFS 2 -#else -# define BVOFFS 4 -#endif - #ifdef _P64 #define GC_PAGE_SZ (1536*sizeof(void*))//bytes #else @@ -59,6 +53,11 @@ typedef struct _pool_t { gcval_t *freelist; } pool_t; +#ifdef _P64 +# define BVOFFS 2 +#else +# define BVOFFS 4 +#endif typedef struct _bigval_t { struct _bigval_t *next; size_t sz; @@ -73,21 +72,7 @@ typedef struct _bigval_t { }; } bigval_t; -#define gc_marked(o) (((gcval_t*)(o))->marked) -#define gc_setmark(o) (((gcval_t*)(o))->marked=1) -#define gc_val_buf(o) ((gcval_t*)(((void**)(o))-1)) -#define gc_setmark_buf(o) gc_setmark(gc_val_buf(o)) - -static bigval_t *big_objects = NULL; - -static jl_mallocptr_t *malloc_ptrs = NULL; -static jl_mallocptr_t *malloc_ptrs_freelist = NULL; - -#define N_POOLS 42 -static pool_t norm_pools[N_POOLS]; -static pool_t ephe_pools[N_POOLS]; -static pool_t *pools = &norm_pools[0]; - +// GC knobs and self-measurement variables static size_t allocd_bytes = 0; static size_t freed_bytes = 0; #define default_collect_interval (3200*1024*sizeof(void*)) @@ -98,13 +83,6 @@ static size_t max_collect_interval = 1250000000UL; static size_t max_collect_interval = 500000000UL; #endif -static htable_t finalizer_table; -static arraylist_t to_finalize; - -static arraylist_t preserved_values; - -static arraylist_t weak_refs; - #ifdef OBJPROFILE static htable_t obj_counts; #endif @@ -114,6 +92,86 @@ static double total_gc_time=0; static size_t total_freed_bytes=0; #endif +// manipulating mark bits +#define gc_marked(o) (((gcval_t*)(o))->marked) +#define gc_setmark(o) (((gcval_t*)(o))->marked=1) +#define gc_val_buf(o) ((gcval_t*)(((void**)(o))-1)) +#define gc_setmark_buf(o) gc_setmark(gc_val_buf(o)) +#define gc_typeof(v) ((jl_value_t*)(((uptrint_t)jl_typeof(v))&~1UL)) + +// malloc wrappers, aligned allocation + +#ifdef _P64 +#define malloc_a16(sz) malloc(((sz)+15)&-16) +#define free_a16(p) free(p) + +#elif defined(_OS_WINDOWS_) /* 32-bit OS is implicit here. */ +#define malloc_a16(sz) _aligned_malloc(sz?((sz)+15)&-16:1, 16) +#define free_a16(p) _aligned_free(p) + +#elif defined(__APPLE__) +#define malloc_a16(sz) malloc(((sz)+15)&-16) +#define free_a16(p) free(p) + +#else +static inline void *malloc_a16(size_t sz) +{ + void *ptr; + if (posix_memalign(&ptr, 16, (sz+15)&-16)) + return NULL; + return ptr; +} +#define free_a16(p) free(p) + +#endif + +void *jl_gc_managed_malloc(size_t sz) +{ + if (allocd_bytes > collect_interval) { + jl_gc_collect(); + } + sz = (sz+15) & -16; + void *b = malloc_a16(sz); + if (b == NULL) + jl_throw(jl_memory_exception); + allocd_bytes += sz; + return b; +} + +void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned) +{ + if (allocd_bytes > collect_interval) { + jl_gc_collect(); + } + sz = (sz+15) & -16; + void *b; +#ifdef _P64 + b = realloc(d, sz); +#elif defined(_OS_WINDOWS_) + if (isaligned) + b = _aligned_realloc(d, sz, 16); + else + b = realloc(d, sz); +#elif defined(__APPLE__) + b = realloc(d, sz); +#else + // TODO better aligned realloc here + b = malloc_a16(sz); + if (b != NULL) { + memcpy(b, d, oldsz); + if (isaligned) free_a16(d); else free(d); + } +#endif + if (b == NULL) + jl_throw(jl_memory_exception); + allocd_bytes += sz; + return b; +} + +// preserved values + +static arraylist_t preserved_values; + int jl_gc_n_preserved_values(void) { return preserved_values.len; @@ -129,6 +187,10 @@ void jl_gc_unpreserve(void) (void)arraylist_pop(&preserved_values); } +// weak references + +static arraylist_t weak_refs; + DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_weakref_t *wr = (jl_weakref_t*)alloc_2w(); @@ -163,6 +225,11 @@ static void sweep_weak_refs(void) weak_refs.len -= ndel; } +// finalization + +static htable_t finalizer_table; +static arraylist_t to_finalize; + static void schedule_finalization(void *o) { arraylist_push(&to_finalize, o); @@ -207,45 +274,9 @@ void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) } } -static int szclass(size_t sz) -{ -#ifndef _P64 - if (sz <= 8) return 0; -#endif - if (sz <= 56) return ((sz+3)/4) - 2; - if (sz <= 96) return ((sz+7)/8) + 5; - if (sz <= 512) { - if (sz <= 256) return ((sz+15)-112)/16 + 18; - else return ((sz+31)-288)/32 + 28; - } - if (sz <= 1024) return ((sz+127)-640)/128 + 36; - if (sz <= 1536) return 40; - return 41; -} - -#ifdef _P64 -#define malloc_a16(sz) malloc(((sz)+15)&-16) -#define free_a16(p) free(p) - -#elif defined(_OS_WINDOWS_) /* 32-bit OS is implicit here. */ -#define malloc_a16(sz) _aligned_malloc(sz?((sz)+15)&-16:1, 16) -#define free_a16(p) _aligned_free(p) +// big value list -#elif defined(__APPLE__) -#define malloc_a16(sz) malloc(((sz)+15)&-16) -#define free_a16(p) free(p) - -#else -static inline void *malloc_a16(size_t sz) -{ - void *ptr; - if (posix_memalign(&ptr, 16, (sz+15)&-16)) - return NULL; - return ptr; -} -#define free_a16(p) free(p) - -#endif +static bigval_t *big_objects = NULL; static void *alloc_big(size_t sz) { @@ -292,73 +323,78 @@ static void sweep_big(void) } } -jl_mallocptr_t *jl_gc_acquire_buffer(void *b, size_t sz, int isaligned) +// tracking Arrays with malloc'd storage + +typedef struct _mallocarray_t { + jl_array_t *a; + struct _mallocarray_t *next; +} mallocarray_t; + +static mallocarray_t *mallocarrays = NULL; +static mallocarray_t *mafreelist = NULL; + +void jl_gc_track_malloced_array(jl_array_t *a) { - jl_mallocptr_t *mp; - if (malloc_ptrs_freelist == NULL) { - mp = malloc(sizeof(jl_mallocptr_t)); + mallocarray_t *ma; + if (mafreelist == NULL) { + ma = malloc(sizeof(mallocarray_t)); } else { - mp = malloc_ptrs_freelist; - malloc_ptrs_freelist = malloc_ptrs_freelist->next; + ma = mafreelist; + mafreelist = mafreelist->next; } -#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) - mp->isaligned = isaligned; -#else - (void)isaligned; -#endif - mp->sz = sz; - mp->ptr = b; - mp->next = malloc_ptrs; - malloc_ptrs = mp; - return mp; + ma->a = a; + ma->next = mallocarrays; + mallocarrays = ma; } -jl_mallocptr_t *jl_gc_managed_malloc(size_t sz) +static size_t array_nbytes(jl_array_t *a) { - if (allocd_bytes > collect_interval) { - jl_gc_collect(); + if (jl_array_ndims(a)==1) + return a->elsize * a->maxsize; + else + return a->elsize * jl_array_len(a); +} + +void jl_gc_free_array(jl_array_t *a) +{ + if (a->how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->isaligned) + free_a16(d); + else + free(d); + freed_bytes += array_nbytes(a); } - sz = (sz+15) & -16; - void *b = malloc_a16(sz); - if (b == NULL) - jl_throw(jl_memory_exception); - allocd_bytes += sz; - return jl_gc_acquire_buffer(b, sz, 1); } -static void sweep_malloc_ptrs(void) +static void sweep_malloced_arrays() { - jl_mallocptr_t *mp = malloc_ptrs; - jl_mallocptr_t **pmp = &malloc_ptrs; - while (mp != NULL) { - jl_mallocptr_t *nxt = (jl_mallocptr_t*)((uptrint_t)mp->next & ~1UL); - if (((gcval_t*)mp)->marked) { - pmp = &mp->next; - ((gcval_t*)mp)->marked = 0; + mallocarray_t *ma = mallocarrays; + mallocarray_t **pma = &mallocarrays; + while (ma != NULL) { + mallocarray_t *nxt = ma->next; + if (gc_marked(ma->a)) { + pma = &ma->next; } else { - *pmp = nxt; - if (mp->ptr) { - freed_bytes += mp->sz; -#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) - if (mp->isaligned) { - free_a16(mp->ptr); - } - else { - free(mp->ptr); - } -#else - free_a16(mp->ptr); -#endif - } - mp->next = malloc_ptrs_freelist; - malloc_ptrs_freelist = mp; + *pma = nxt; + assert(ma->a->how == 2); + jl_gc_free_array(ma->a); + ma->next = mafreelist; + mafreelist = ma; } - mp = nxt; + ma = nxt; } } +// pool allocation + +#define N_POOLS 42 +static pool_t norm_pools[N_POOLS]; +static pool_t ephe_pools[N_POOLS]; +static pool_t *pools = &norm_pools[0]; + static void add_page(pool_t *p) { gcpage_t *pg = malloc_a16(sizeof(gcpage_t)); @@ -397,6 +433,22 @@ static inline void *pool_alloc(pool_t *p) return v; } +static int szclass(size_t sz) +{ +#ifndef _P64 + if (sz <= 8) return 0; +#endif + if (sz <= 56) return ((sz+3)/4) - 2; + if (sz <= 96) return ((sz+7)/8) + 5; + if (sz <= 512) { + if (sz <= 256) return ((sz+15)-112)/16 + 18; + else return ((sz+31)-288)/32 + 28; + } + if (sz <= 1024) return ((sz+127)-640)/128 + 36; + if (sz <= 1536) return 40; + return 41; +} + static void sweep_pool(pool_t *p) { //int empty; @@ -456,12 +508,14 @@ static void sweep_pool(pool_t *p) freed_bytes += (nfreed - old_nfree)*osize; } +// sweep phase + extern void jl_unmark_symbols(void); static void gc_sweep(void) { sweep_big(); - sweep_malloc_ptrs(); + sweep_malloced_arrays(); int i; for(i=0; i < N_POOLS; i++) { sweep_pool(&norm_pools[i]); @@ -470,12 +524,12 @@ static void gc_sweep(void) jl_unmark_symbols(); } +// mark phase + static jl_value_t **mark_stack = NULL; static size_t mark_stack_size = 0; static size_t mark_sp = 0; -#define gc_typeof(v) ((jl_value_t*)(((uptrint_t)jl_typeof(v))&~1UL)) - static void push_root(jl_value_t *v) { assert(v != NULL); @@ -574,30 +628,17 @@ static void gc_mark_all() } else if (((jl_datatype_t*)(vt))->name == jl_array_typename) { jl_array_t *a = (jl_array_t*)v; - char *data = a->data; - if (data == NULL) continue; - int ndims = jl_array_ndims(a); - char *data0 = data; - if (ndims == 1) data0 -= a->offset*a->elsize; - if (!a->isinline) { + if (a->how == 3) { jl_value_t *owner = jl_array_data_owner(a); - if (a->ismalloc) { - // jl_mallocptr_t - if (gc_marked(owner)) - continue; - gc_setmark(owner); - } - else { - // an array - v = owner; - if (v != (jl_value_t*)a) { - gc_push_root(v); - continue; - } - } + gc_push_root(owner); + continue; } - if (a->ptrarray) { + else if (a->how == 1) { + gc_setmark_buf((char*)a->data - a->offset*a->elsize); + } + if (a->ptrarray && a->data!=NULL) { size_t l = jl_array_len(a); + void *data = a->data; for(size_t i=0; i < l; i++) { jl_value_t *elt = ((jl_value_t**)data)[i]; if (elt != NULL) gc_push_root(elt); @@ -727,6 +768,8 @@ static void gc_mark(void) gc_mark_all(); } +// collector entry point and control + static int is_gc_enabled = 1; DLLEXPORT void jl_gc_enable(void) { is_gc_enabled = 1; } DLLEXPORT void jl_gc_disable(void) { is_gc_enabled = 0; } @@ -756,9 +799,7 @@ static void print_obj_profile(void) void jl_gc_collect(void) { size_t actual_allocd = allocd_bytes; - allocd_bytes = 0; if (is_gc_enabled) { - freed_bytes = 0; JL_SIGATOMIC_BEGIN(); #if defined(GCTIME) || defined(GC_FINAL_STATS) double t0 = clock_now(); @@ -796,16 +837,20 @@ void jl_gc_collect(void) actual_allocd, freed_bytes, collect_interval, (double)freed_bytes/(double)actual_allocd); #endif - if (freed_bytes < ((7*actual_allocd)/10)) { - if (collect_interval <= (2*max_collect_interval)/5) - collect_interval = (5*collect_interval)/2; + if (freed_bytes < (7*(actual_allocd/10))) { + if (collect_interval <= 2*(max_collect_interval/5)) + collect_interval = 5*(collect_interval/2); } else { collect_interval = default_collect_interval; } + freed_bytes = 0; } + allocd_bytes = 0; } +// allocator entry points + void *allocb(size_t sz) { void *b; @@ -887,6 +932,8 @@ void jl_print_gc_stats(JL_STREAM *s) } #endif +// initialization + void jl_gc_init(void) { int szc[N_POOLS] = { 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, @@ -930,6 +977,8 @@ void jl_gc_init(void) #endif } +// GC summary stats + #if defined(MEMPROFILE) static size_t pool_stats(pool_t *p, size_t *pwaste) { @@ -956,13 +1005,13 @@ static size_t pool_stats(pool_t *p, size_t *pwaste) } *pwaste = npgs*GC_PAGE_SZ - (nused*p->osize); JL_PRINTF(JL_STDOUT, - "%4d : %7d/%7d objects, %5d pages, %8d bytes, %8d waste\n", - p->osize, - nused, - nused+nfree, - npgs, - nused*p->osize, - *pwaste); + "%4d : %7d/%7d objects, %5d pages, %8d bytes, %8d waste\n", + p->osize, + nused, + nused+nfree, + npgs, + nused*p->osize, + *pwaste); return nused*p->osize; } @@ -997,14 +1046,13 @@ static void big_obj_stats(void) } v = v->next; } - jl_mallocptr_t *mp = malloc_ptrs; - while (mp != NULL) { - jl_mallocptr_t *nxt = (jl_mallocptr_t*)((uptrint_t)mp->next & ~1UL); - if (((gcval_t*)mp)->marked) { + mallocarray_t *ma = mallocarrays; + while (ma != NULL) { + if (gc_marked(ma->a)) { nused++; - nbytes += mp->sz; + nbytes += array_nbytes(ma->a); } - mp = nxt; + ma = ma->next; } JL_PRINTF(JL_STDOUT, "%d bytes in %d large objects\n", nbytes, nused); diff --git a/src/julia.h b/src/julia.h index 08a9b20ce4b0c..84d053321d9d9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -76,27 +76,10 @@ typedef struct { jl_value_t *data[1]; } jl_tuple_t; -// pseudo-object to track managed malloc pointers -// currently only referenced from an array's data owner field -typedef struct _jl_mallocptr_t { - struct _jl_mallocptr_t *next; - size_t sz; - void *ptr; -#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) - int isaligned; -#endif -} jl_mallocptr_t; - // how much space we're willing to waste if an array outgrows its // original object #define ARRAY_INLINE_NBYTES (2048*sizeof(void*)) -/* - array data is allocated in two ways: either inline in the array object, - (in _space), or with malloc with data owner pointing to a jl_mallocptr_t. - data owner can also point to another array, if the original data was - allocated inline. -*/ typedef struct { JL_DATA_TYPE void *data; @@ -104,10 +87,18 @@ typedef struct { size_t length; #endif - unsigned short ndims:13; + unsigned short ndims:11; unsigned short ptrarray:1; // representation is pointer array - unsigned short ismalloc:1; // data owner is a jl_mallocptr_t - unsigned short isinline:1; // data stored inline + /* + how - allocation style + 0 = data is inlined, or a foreign pointer we don't manage + 1 = julia-allocated buffer that needs to be marked + 2 = malloc-allocated pointer this array object manages + 3 = has a pointer to the Array that owns the data + */ + unsigned short how:2; + unsigned short isshared:1; // data is shared by multiple Arrays + unsigned short isaligned:1; // data allocated with memalign uint16_t elsize; uint32_t offset; // for 1-d only. does not need to get big. @@ -120,11 +111,7 @@ typedef struct { }; // other dim sizes go here for ndims > 2 - // followed by alignment padding and inline data, or an owner pointer - union { - char _space[1]; - void *_pad; - }; + // followed by alignment padding and inline data, or owner pointer } jl_array_t; #ifdef STORE_ARRAY_LEN @@ -138,7 +125,7 @@ DLLEXPORT size_t jl_array_len_(jl_array_t *a); #define jl_array_dim0(a) (((jl_array_t*)(a))->nrows) #define jl_array_nrows(a) (((jl_array_t*)(a))->nrows) #define jl_array_ndims(a) ((int32_t)(((jl_array_t*)a)->ndims)) -#define jl_array_data_owner(a) (*((jl_value_t**)(&a->_pad+jl_array_ndimwords(jl_array_ndims(a))))) +#define jl_array_data_owner(a) (*((jl_value_t**)(&a->ncols+1+jl_array_ndimwords(jl_array_ndims(a))))) // compute # of extra words needed to store dimensions static inline int jl_array_ndimwords(uint32_t ndims) @@ -1100,8 +1087,10 @@ void jl_gc_unpreserve(void); int jl_gc_n_preserved_values(void); DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f); DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value); -jl_mallocptr_t *jl_gc_acquire_buffer(void *b, size_t sz, int isaligned); -jl_mallocptr_t *jl_gc_managed_malloc(size_t sz); +void *jl_gc_managed_malloc(size_t sz); +void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned); +void jl_gc_free_array(jl_array_t *a); +void jl_gc_track_malloced_array(jl_array_t *a); void *alloc_2w(void); void *alloc_3w(void); void *alloc_4w(void);