From b97e3c423b06619721a1fc8eb799030b62909f23 Mon Sep 17 00:00:00 2001 From: "Christopher J. Hazard, PhD" <143410553+howsohazard@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:50:44 -0400 Subject: [PATCH 1/4] 18025: Begins cleanup --- src/Amalgam/IntegerSet.h | 41 +++++++++++--- src/Amalgam/SeparableBoxFilterDataStore.cpp | 15 ----- src/Amalgam/SeparableBoxFilterDataStore.h | 15 +++-- src/Amalgam/entity/EntityQueryBuilder.h | 5 +- src/Amalgam/entity/EntityQueryCaches.cpp | 62 ++++++++++++--------- 5 files changed, 84 insertions(+), 54 deletions(-) diff --git a/src/Amalgam/IntegerSet.h b/src/Amalgam/IntegerSet.h index 30c25c66..9382181f 100644 --- a/src/Amalgam/IntegerSet.h +++ b/src/Amalgam/IntegerSet.h @@ -950,7 +950,8 @@ class BitArrayIntegerSet } //Sets this to the BitArrayIntegerSet to the set that contains only elements that it and another jointly contain - void Intersect(BitArrayIntegerSet &other) + // does NOT update the number of elements, so UpdateNumElements must be called + void IntersectInBatch(BitArrayIntegerSet &other) { //if no intersection, then just clear and exit if(numElements == 0 || other.numElements == 0) @@ -971,6 +972,12 @@ class BitArrayIntegerSet bitBucket[i] = 0; TrimBack(); + } + + //Sets this to the BitArrayIntegerSet to the set that contains only elements that it and another jointly contain + inline void Intersect(BitArrayIntegerSet &other) + { + IntersectInBatch(other); UpdateNumElements(); } @@ -1492,12 +1499,22 @@ class EfficientIntegerSet } //removs all elements of this container from other - void EraseTo(BitArrayIntegerSet &other) + inline void EraseTo(BitArrayIntegerSet &other, bool in_batch = false) { if(isSisContainer) - other.erase(sisContainer); + { + if(in_batch) + other.EraseInBatch(sisContainer); + else + other.erase(sisContainer); + } else - other.erase(baisContainer); + { + if(in_batch) + other.EraseInBatch(baisContainer); + else + other.erase(baisContainer); + } } //removes all elements contained by other, intended for calling in a batch @@ -1660,12 +1677,22 @@ class EfficientIntegerSet } //sets other to the set that contains only elements that it and other jointly contain - inline void IntersectTo(BitArrayIntegerSet &other) + inline void IntersectTo(BitArrayIntegerSet &other, bool in_batch = false) { if(IsSisContainer()) - other.Intersect(sisContainer); + { + if(in_batch) + other.IntersectInBatch(sisContainer); + else + other.Intersect(sisContainer); + } else - other.Intersect(baisContainer); + { + if(in_batch) + other.IntersectInBatch(baisContainer); + else + other.Intersect(baisContainer); + } } //flips the elements in the set starting with element 0 up to but not including up_to_id diff --git a/src/Amalgam/SeparableBoxFilterDataStore.cpp b/src/Amalgam/SeparableBoxFilterDataStore.cpp index 23e9c02d..4097419b 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.cpp +++ b/src/Amalgam/SeparableBoxFilterDataStore.cpp @@ -299,11 +299,6 @@ void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistance distances.clear(); distances.resize(GetNumInsertedEntities(), 0.0); - //remove any entities that are missing labels - for(auto absolute_feature_index : target_column_indices) - columnData[absolute_feature_index]->invalidIndices.EraseInBatchFrom(enabled_indices); - enabled_indices.UpdateNumElements(); - //for each desired feature, compute and add distance terms of possible window query candidate entities for(size_t query_feature_index = 0; query_feature_index < target_column_indices.size(); query_feature_index++) { @@ -476,11 +471,6 @@ void SeparableBoxFilterDataStore::FindEntitiesNearestToIndexedEntity(Generalized possible_knn_indices.erase(search_index); possible_knn_indices.erase(ignore_index); - //remove invalid cases - for(size_t absolute_feature_index : target_column_indices) - columnData[absolute_feature_index]->invalidIndices.EraseInBatchFrom(possible_knn_indices); - possible_knn_indices.UpdateNumElements(); - //if num enabled indices < top_k, return sorted distances if(GetNumInsertedEntities() <= top_k || possible_knn_indices.size() <= top_k) return FindAllValidElementDistances(*dist_params, target_column_indices, target_values, target_value_types, possible_knn_indices, distances_out, rand_stream); @@ -634,11 +624,6 @@ void SeparableBoxFilterDataStore::FindNearestEntities(GeneralizedDistance &dist_ PopulateUnknownFeatureValueTerms(dist_params); - //ignore cases with missing labels - for(size_t i = 0; i < num_enabled_features; i++) - columnData[target_column_indices[i]]->invalidIndices.EraseInBatchFrom(enabled_indices); - enabled_indices.UpdateNumElements(); - enabled_indices.erase(ignore_entity_index); //if num enabled indices < top_k, return sorted distances diff --git a/src/Amalgam/SeparableBoxFilterDataStore.h b/src/Amalgam/SeparableBoxFilterDataStore.h index 306709a7..5ade6f81 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.h +++ b/src/Amalgam/SeparableBoxFilterDataStore.h @@ -266,7 +266,9 @@ class SeparableBoxFilterDataStore } //filters out to include only entities that have the given feature - inline void IntersectEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out) + //if in_batch is true, will update out in batch for performance, + //meaning its number of elements will need to be updated + inline void IntersectEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out, bool in_batch) { if(numEntities == 0) { @@ -281,7 +283,7 @@ class SeparableBoxFilterDataStore return; } - columnData[column->second]->invalidIndices.EraseTo(out); + columnData[column->second]->invalidIndices.EraseTo(out, in_batch); } //sets out to include only entities that have the given feature and records the values into @@ -364,7 +366,9 @@ class SeparableBoxFilterDataStore } //filters out to include only entities that don't have the given feature - inline void IntersectEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out) + //if in_batch is true, will update out in batch for performance, + //meaning its number of elements will need to be updated + inline void IntersectEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out, bool in_batch) { if(numEntities == 0) return; @@ -373,7 +377,7 @@ class SeparableBoxFilterDataStore if(column == labelIdToColumnIndex.end()) return; - columnData[column->second]->invalidIndices.IntersectTo(out); + columnData[column->second]->invalidIndices.IntersectTo(out, in_batch); } //given a feature_id, value_type, and value, inserts into out all the entities that have the value @@ -488,6 +492,7 @@ class SeparableBoxFilterDataStore //populates distances_out with all entities and their distances that have a distance to target less than max_dist //if enabled_indices is not nullptr, intersects with the enabled_indices set. + //assumes that enabled_indices only contains indices that have valid values for all the features void FindEntitiesWithinDistance(GeneralizedDistance &dist_params, std::vector &position_label_ids, std::vector &position_values, std::vector &position_value_types, double max_dist, BitArrayIntegerSet &enabled_indices, std::vector> &distances_out); @@ -496,6 +501,7 @@ class SeparableBoxFilterDataStore // if expand_to_first_nonzero_distance is set, then it will expand top_k until it it finds the first nonzero distance or until it includes all enabled indices // if const_dist_params is true, then it will make a copy before making any modifications //will not modify enabled_indices, but instead will make a copy for any modifications + //assumes that enabled_indices only contains indices that have valid values for all the features void FindEntitiesNearestToIndexedEntity(GeneralizedDistance *dist_params_ref, std::vector &position_label_ids, bool constant_dist_params, size_t search_index, size_t top_k, BitArrayIntegerSet &enabled_indices, bool expand_to_first_nonzero_distance, std::vector> &distances_out, @@ -503,6 +509,7 @@ class SeparableBoxFilterDataStore //Finds the nearest neighbors //enabled_indices is the set of entities to find from, and will be modified + //assumes that enabled_indices only contains indices that have valid values for all the features void FindNearestEntities(GeneralizedDistance &dist_params, std::vector &position_label_ids, std::vector &position_values, std::vector &position_value_types, size_t top_k, size_t ignore_entity_index, BitArrayIntegerSet &enabled_indices, diff --git a/src/Amalgam/entity/EntityQueryBuilder.h b/src/Amalgam/entity/EntityQueryBuilder.h index 24db9321..9c93af86 100644 --- a/src/Amalgam/entity/EntityQueryBuilder.h +++ b/src/Amalgam/entity/EntityQueryBuilder.h @@ -417,11 +417,10 @@ namespace EntityQueryBuilder } } } - else //entities may have missing data, so need exist query - { + else need_exist_query = true; - } + //TODO: remove zeroed features where appropriate if(need_exist_query) { //add exists query and swap, so the exists_condition is before cur_condition diff --git a/src/Amalgam/entity/EntityQueryCaches.cpp b/src/Amalgam/entity/EntityQueryCaches.cpp index 3ddd2932..253ea5c5 100644 --- a/src/Amalgam/entity/EntityQueryCaches.cpp +++ b/src/Amalgam/entity/EntityQueryCaches.cpp @@ -204,8 +204,12 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray is_first = false; } else - sbfds.IntersectEntitiesWithFeature(label, matching_entities); + sbfds.IntersectEntitiesWithFeature(label, matching_entities, true); } + + if(!is_first || cond->existLabels.size() > 0) + matching_entities.UpdateNumElements(); + return; } @@ -219,8 +223,12 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray is_first = false; } else - sbfds.IntersectEntitiesWithoutFeature(label, matching_entities); + sbfds.IntersectEntitiesWithoutFeature(label, matching_entities, true); } + + if(!is_first || cond->existLabels.size() > 0) + matching_entities.UpdateNumElements(); + return; } @@ -241,6 +249,21 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray EntityQueriesStatistics::DistanceTransform distance_transform(cond->transformSuprisalToProb, cond->distanceWeightExponent, use_entity_weights, get_weight); + //if first, need to populate with all entities + if(is_first) + { + matching_entities.clear(); + matching_entities.SetAllIds(sbfds.GetNumInsertedEntities()); + } + + //only select cases that have all of the correct features + for(auto i : cond->positionLabels) + sbfds.IntersectEntitiesWithFeature(i, matching_entities, true); + matching_entities.UpdateNumElements(); + + if(matching_entities.size() == 0) + return; + if(cond->queryType == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || cond->queryType == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE) { //labels and values must have the same size @@ -250,12 +273,14 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray return; } - //if first, need to populate with all entities - if(is_first) - { - matching_entities.clear(); - matching_entities.SetAllIds(sbfds.GetNumInsertedEntities()); - } + //TODO 18025: finish moving this here from EntityQueryBuilder + //for(size_t i = 0; i < cond->distParams.featureParams.size(); i++) + //{ + // if(!cond->distParams.IsFeatureEnabled(i)) + // { + // + // } + //} //if no position labels, then the weight must be zero so just randomly choose k if(cond->positionLabels.size() == 0) @@ -300,18 +325,9 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray } else //cond->queryType == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS or ENT_COMPUTE_ENTITY_CONVICTIONS or ENT_COMPUTE_ENTITY_KL_DIVERGENCES or ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE { - size_t total_contained_entities = sbfds.GetNumInsertedEntities(); - if(total_contained_entities == 0) - return; - - //if there are no existLabels, or number of existLabels is same as the number of entities in cache, we don't compute on subset - const bool compute_on_subset = (cond->existLabels.size() != 0 && cond->existLabels.size() < total_contained_entities); - - size_t top_k = std::min(static_cast(cond->maxToRetrieve), total_contained_entities); - BitArrayIntegerSet *ents_to_compute_ptr = nullptr; //if nullptr, compute is done on all entities in the cache - if(compute_on_subset) //if subset is specified, set ents_to_compute_ptr to set of ents_to_compute + if(cond->existLabels.size() != 0) //if subset is specified, set ents_to_compute_ptr to set of ents_to_compute { ents_to_compute_ptr = &buffers.tempMatchingEntityIndices; ents_to_compute_ptr->clear(); @@ -345,16 +361,12 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray ents_to_compute_ptr = &matching_entities; } - //only select cases that have all of the correct features - for(auto i : cond->positionLabels) - sbfds.IntersectEntitiesWithFeature(i, *ents_to_compute_ptr); - #ifdef MULTITHREAD_SUPPORT ConvictionProcessor conviction_processor(buffers.convictionBuffers, - buffers.knnCache, distance_transform, top_k, cond->useConcurrency); + buffers.knnCache, distance_transform, static_cast(cond->maxToRetrieve), cond->useConcurrency); #else ConvictionProcessor conviction_processor(buffers.convictionBuffers, - buffers.knnCache, distance_transform, top_k); + buffers.knnCache, distance_transform, static_cast(cond->maxToRetrieve)); #endif buffers.knnCache.ResetCache(sbfds, matching_entities, cond->distParams, cond->positionLabels); @@ -548,7 +560,7 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray if(is_first) sbfds.FindAllEntitiesWithFeature(cond->singleLabel, matching_entities); else - sbfds.IntersectEntitiesWithFeature(cond->singleLabel, matching_entities); + sbfds.IntersectEntitiesWithFeature(cond->singleLabel, matching_entities, false); BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; temp.clear(); From e981dacd559f8ab8a9acdaca81ac6afe9dcb218b Mon Sep 17 00:00:00 2001 From: "Christopher J. Hazard, PhD" <143410553+howsohazard@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:31:38 -0400 Subject: [PATCH 2/4] 18025: More cleanup --- src/Amalgam/entity/EntityQueries.cpp | 3 ++ src/Amalgam/entity/EntityQueryBuilder.h | 68 ------------------------ src/Amalgam/entity/EntityQueryCaches.cpp | 32 +++++++---- 3 files changed, 24 insertions(+), 79 deletions(-) diff --git a/src/Amalgam/entity/EntityQueries.cpp b/src/Amalgam/entity/EntityQueries.cpp index 2fdf6b27..259363df 100644 --- a/src/Amalgam/entity/EntityQueries.cpp +++ b/src/Amalgam/entity/EntityQueries.cpp @@ -695,6 +695,8 @@ EvaluableNodeReference EntityQueryCondition::GetMatchingEntities(Entity *contain { size_t num_to_keep = std::min(static_cast(maxToRetrieve), matching_entities.size()); + distParams.SetAndConstrainParams(); + //get values for each entity StochasticTieBreakingPriorityQueue> nearest_entities(randomStream.CreateOtherStreamViaRand()); for(size_t i = 0; i < matching_entities.size(); i++) @@ -755,6 +757,7 @@ EvaluableNodeReference EntityQueryCondition::GetMatchingEntities(Entity *contain case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: { + distParams.SetAndConstrainParams(); //find those that match for(size_t i = 0; i < matching_entities.size(); i++) { diff --git a/src/Amalgam/entity/EntityQueryBuilder.h b/src/Amalgam/entity/EntityQueryBuilder.h index 9c93af86..1bd0d4f5 100644 --- a/src/Amalgam/entity/EntityQueryBuilder.h +++ b/src/Amalgam/entity/EntityQueryBuilder.h @@ -399,74 +399,6 @@ namespace EntityQueryBuilder } } } - - - //check if any of the positions are not valid - bool need_exist_query = false; - bool has_position_data = !DoesDistanceQueryUseEntitiesInsteadOfPosition(condition_type); - - //check for any disabled features (e.g., zero'd weight) - if(has_position_data) - { - for(size_t i = 0; i < cur_condition->distParams.featureParams.size(); i++) - { - if(!cur_condition->distParams.IsFeatureEnabled(i)) - { - need_exist_query = true; - break; - } - } - } - else - need_exist_query = true; - - //TODO: remove zeroed features where appropriate - if(need_exist_query) - { - //add exists query and swap, so the exists_condition is before cur_condition - conditions.emplace_back(); - EntityQueryCondition *exists_condition = &(conditions.back()); - - //need to reretrieve the pointer in case there has been a reallocation via emplace_back - // don't get the end one just placed, get the one before that - cur_condition = &conditions[conditions.size() - 2]; - - //swap data and pointers - std::swap(*exists_condition, *cur_condition); - std::swap(exists_condition, cur_condition); - - exists_condition->queryType = ENT_QUERY_EXISTS; - //if has_position_data, then will add on those needed features below - // but if it doesn't, then need to include all labels - if(!has_position_data) - exists_condition->existLabels = cur_condition->positionLabels; - - //remove any 0 weighted features; if has_position_data, then move them to the exist query - // don't increment i here because if a feature is moved to the exists_condition, - // then a new feature is moved into that new index and that feature position needs to be rechecked - for(size_t i = 0; i < cur_condition->positionLabels.size();) - { - if(cur_condition->distParams.featureParams[i].weight == 0.0) - { - //only move/remove data if the right type of query - if(has_position_data) - { - exists_condition->existLabels.push_back(cur_condition->positionLabels[i]); - cur_condition->valueToCompare.erase(cur_condition->valueToCompare.begin() + i); - cur_condition->valueTypes.erase(cur_condition->valueTypes.begin() + i); - } - - cur_condition->positionLabels.erase(cur_condition->positionLabels.begin() + i); - cur_condition->distParams.featureParams.erase(begin(cur_condition->distParams.featureParams) + i); - continue; - } - - i++; - } - } - - //perform this last to make sure all changes are in - cur_condition->distParams.SetAndConstrainParams(); } //builds a query condition from cn diff --git a/src/Amalgam/entity/EntityQueryCaches.cpp b/src/Amalgam/entity/EntityQueryCaches.cpp index 253ea5c5..0ea16f5e 100644 --- a/src/Amalgam/entity/EntityQueryCaches.cpp +++ b/src/Amalgam/entity/EntityQueryCaches.cpp @@ -257,13 +257,32 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray } //only select cases that have all of the correct features - for(auto i : cond->positionLabels) - sbfds.IntersectEntitiesWithFeature(i, matching_entities, true); + //but remove features that have 0 weight for better performance + for(size_t i = 0; i < cond->positionLabels.size(); i++) + { + sbfds.IntersectEntitiesWithFeature(cond->positionLabels[i], matching_entities, true); + if(!cond->distParams.IsFeatureEnabled(i)) + { + cond->positionLabels.erase(cond->positionLabels.begin() + i); + cond->distParams.featureParams.erase(begin(cond->distParams.featureParams) + i); + + if(cond->queryType == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || cond->queryType == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE) + { + cond->valueToCompare.erase(cond->valueToCompare.begin() + i); + cond->valueTypes.erase(cond->valueTypes.begin() + i); + } + + //need to process the new value in this feature slot + i--; + } + } matching_entities.UpdateNumElements(); if(matching_entities.size() == 0) return; + cond->distParams.SetAndConstrainParams(); + if(cond->queryType == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || cond->queryType == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE) { //labels and values must have the same size @@ -273,15 +292,6 @@ void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArray return; } - //TODO 18025: finish moving this here from EntityQueryBuilder - //for(size_t i = 0; i < cond->distParams.featureParams.size(); i++) - //{ - // if(!cond->distParams.IsFeatureEnabled(i)) - // { - // - // } - //} - //if no position labels, then the weight must be zero so just randomly choose k if(cond->positionLabels.size() == 0) { From e92add92d37eb04a8948e1d462e59198804f200d Mon Sep 17 00:00:00 2001 From: "Christopher J. Hazard, PhD" <143410553+howsohazard@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:48:40 -0400 Subject: [PATCH 3/4] 18025: Minor memory to for Interpreter --- src/Amalgam/interpreter/Interpreter.cpp | 5 +++++ src/Amalgam/interpreter/Interpreter.h | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Amalgam/interpreter/Interpreter.cpp b/src/Amalgam/interpreter/Interpreter.cpp index b8a8af85..9479e64e 100644 --- a/src/Amalgam/interpreter/Interpreter.cpp +++ b/src/Amalgam/interpreter/Interpreter.cpp @@ -21,6 +21,11 @@ #include #include +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +thread_local +#endif + std::vector Interpreter::conditionsBuffer; + std::array Interpreter::_opcodes = { //built-in / system specific diff --git a/src/Amalgam/interpreter/Interpreter.h b/src/Amalgam/interpreter/Interpreter.h index fd0d6009..82e2a816 100644 --- a/src/Amalgam/interpreter/Interpreter.h +++ b/src/Amalgam/interpreter/Interpreter.h @@ -814,7 +814,11 @@ class Interpreter std::vector constructionStackIndices; //buffer to use as for parsing and querying conditions - std::vector conditionsBuffer; + //one per thread to save memory on Interpreter objects +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local +#endif + static std::vector conditionsBuffer; //the interpreter that called this one -- used for debugging Interpreter *callingInterpreter; From 773f4b0cfe8472159cf853a9434cc9e8722fc99f Mon Sep 17 00:00:00 2001 From: "Christopher J. Hazard, PhD" <143410553+howsohazard@users.noreply.github.com> Date: Fri, 27 Oct 2023 17:01:37 -0400 Subject: [PATCH 4/4] 18025: Fixes typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 00eb245b..b51b39a1 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ ## Introduction -Amalgam™ is a domain specific language ([DSL](https://en.wikipedia.org/wiki/Domain-specific_language)) developed primarily for [genetic programming](https://en.wikipedia.org/wiki/Generic_programming) and [instance based machine learning](https://en.wikipedia.org/wiki/Instance-based_learning), but also for simulation, agent based modeling, data storage and retrieval, the mathematics of probability theory and information theory, and game content and AI. The language format is somewhat LISP-like in that it uses parenthesized list format with prefix notation and is geared toward functional programming, where there is a one-to-one mapping between the code and the corresponding parse tree. +Amalgam™ is a domain specific language ([DSL](https://en.wikipedia.org/wiki/Domain-specific_language)) developed primarily for [genetic programming](https://en.wikipedia.org/wiki/Genetic_programming) and [instance based machine learning](https://en.wikipedia.org/wiki/Instance-based_learning), but also for simulation, agent based modeling, data storage and retrieval, the mathematics of probability theory and information theory, and game content and AI. The language format is somewhat LISP-like in that it uses parenthesized list format with prefix notation and is geared toward functional programming, where there is a one-to-one mapping between the code and the corresponding parse tree. Whereas virtually all practical programming languages are primarily designed for some combination of programmer productivity and computational performance, Amalgam prioritizes code matching and merging, as well as a deep equivalence of code and data. Amalgam uses _entities_ to store code and data, with a rich query system to find entities by their _labels_. The language uses a variable stack, but all attributes and methods are stored directly as labels in entities. There is no separate class versus instance, but entities can be used as prototypes to be copied and modified. Though code and data are represented as trees from the root of each entity, graphs in code and data structures are permitted and are flattened to code using special references. Further, instead of failing early when there is an error, Amalgam supports genetic programming and code mixing by being extremely weakly typed, and attempts to find a way to execute code no matter whether types match or not.