diff --git a/CommonData/column.cpp b/CommonData/column.cpp index e7eb87d44f..aa0ac554ab 100644 --- a/CommonData/column.cpp +++ b/CommonData/column.cpp @@ -493,10 +493,8 @@ columnType Column::setValues(const stringvec & values, const stringvec & labels, bool onlyDoubles = true, onlyInts = true; - //Make sure we have only 1 label per value and display combo, because otherwise this will get too complicated - if(labelsMergeDuplicates() && aChange) - (*aChange) = true; - + //Weve already made sure we have only 1 label per value and display combo, because otherwise this will get too complicated + intset ints; // to suggest whether this is a scalar or not we need to know whether we have more than treshold ints or not. int tmpInt; double tmpDbl; @@ -636,10 +634,13 @@ void Column::_dbUpdateLabelOrder(bool noIncRevisionWhenBatchedPlease) intintmap orderPerDbIds; + _highestIntsId = 0; for(size_t i=0; i<_labels.size(); i++) { _labels[i]->setOrder(i); orderPerDbIds[_labels[i]->dbId()] = i; + + _highestIntsId = std::max(_highestIntsId, _labels[i]->intsId()); } db().labelsSetOrder(orderPerDbIds); @@ -657,6 +658,8 @@ void Column::labelsClear(bool doIncRevision) db().labelsClear(_id); _labels.clear(); _labelByIntsIdMap.clear(); + _labelByValDis.clear(); + _highestIntsId = 0; if(doIncRevision) incRevision(false); @@ -692,6 +695,9 @@ int Column::labelsAdd(int display) int Column::labelsAdd(const std::string &display) { + + JASPTIMER_SCOPE(Column::labelsAdd displaystring); + if(display == "") return EmptyValues::missingValueInteger; @@ -708,30 +714,33 @@ int Column::labelsAdd(const std::string &display) int Column::labelsAdd(const std::string & display, const std::string & description, const Json::Value & originalValue) { - sizetset intIds; - - for(Label * label : _labels) - intIds.insert(label->intsId()); - - for(size_t newIntId = 0; ; newIntId++) - if(intIds.count(newIntId) == 0) - return labelsAdd(newIntId, display, true, description, originalValue); + JASPTIMER_SCOPE(Column::labelsAdd 3 args); + + return labelsAdd(++_highestIntsId, display, true, description, originalValue); } int Column::labelsAdd(int value, const std::string & display, bool filterAllows, const std::string & description, const Json::Value & originalValue, int order, int id) { - JASPTIMER_SCOPE(Column::labelsAdd); + JASPTIMER_SCOPE(Column::labelsAdd lotsa arg); + + auto valDisplay = std::make_pair(Label::originalValueAsString(this, originalValue), display); + + if(_labelByValDis.count(valDisplay)) + return _labelByValDis.at(valDisplay)->intsId(); Label * label = new Label(this, display, value, filterAllows, description, originalValue, order, id); _labels.push_back(label); - _labelByIntsIdMap[label->intsId()] = label; + _labelByIntsIdMap[label->intsId()] = label; + _labelByValDis[valDisplay] = label; + + _highestIntsId = std::max(_highestIntsId, label->intsId()); _dbUpdateLabelOrder(true); return label->intsId(); } -void Column::labelsRemoveByIntsId(std::set valuesToRemove) +void Column::labelsRemoveByIntsId(std::set valuesToRemove, bool updateOrder) { if (valuesToRemove.empty()) return; @@ -744,7 +753,12 @@ void Column::labelsRemoveByIntsId(std::set valuesToRemove) [&](Label * label) { if(std::find(valuesToRemove.begin(), valuesToRemove.end(), label->intsId()) != valuesToRemove.end()) { - _labelByIntsIdMap.erase(label->intsId()); + _labelByIntsIdMap.erase(label->intsId()); + + auto valDis = std::make_pair(label->originalValueAsString(), label->labelDisplay()); + if(_labelByValDis.count(valDis) && _labelByValDis.at(valDis) == label) + _labelByValDis.erase(valDis); + label->dbDelete(); delete label; return true; @@ -753,13 +767,14 @@ void Column::labelsRemoveByIntsId(std::set valuesToRemove) }), _labels.end()); - _dbUpdateLabelOrder(); + if(updateOrder) + _dbUpdateLabelOrder(); } strintmap Column::labelsResetValues(int & maxValue) { JASPTIMER_SCOPE(Column::labelsResetValues); - + beginBatchedLabelsDB(); strintmap result; @@ -780,6 +795,8 @@ strintmap Column::labelsResetValues(int & maxValue) maxValue = labelValue; + _highestIntsId = maxValue; + endBatchedLabelsDB(); return result; @@ -797,12 +814,14 @@ void Column::labelsRemoveBeyond(size_t indexToStartRemoving) void Column::labelsTempReset() { - _labelsTemp . clear(); - _labelsTempDbls . clear(); - _labelsTempToIndex . clear(); - _labelsTempRevision = -1; - _labelsTempMaxWidth = 0; - _labelsTempNumerics = 0; + _labelsTemp . clear(); + _labelsTempDbls . clear(); + _labelsTempToIndex . clear(); + _labelsTempRevision = -1; + _labelsTempMaxWidth = 0; + _labelsTempNumerics = 0; + _labelByNonEmptyIndex .clear(); + _labelNonEmptyIndexByLabel .clear(); } int Column::labelsTempCount() @@ -814,15 +833,20 @@ int Column::labelsTempCount() _labelsTemp . reserve(_labels.size()); _labelsTempDbls . reserve(_labels.size()); + size_t nonEmptyIndex = 0; for(size_t r=0; r<_labels.size(); r++) if(!_labels[r]->isEmptyValue()) { _labelsTemp . push_back(_labels[r]->label()); _labelsTempDbls . push_back(_labels[r]->originalValue().isDouble() ? _labels[r]->originalValue().asDouble() : EmptyValues::missingValueDouble); _labelsTempToIndex[_labelsTemp[_labelsTemp.size()-1]] = _labelsTemp.size()-1; //We store the index in _labelsTemp in a map. + _labelByNonEmptyIndex[nonEmptyIndex] = _labels[r]; + _labelNonEmptyIndexByLabel[_labels[r]] = nonEmptyIndex; if(!std::isnan(*_labelsTempDbls.rbegin())) _labelsTempNumerics++; + + nonEmptyIndex++; } doubleset dblset; @@ -879,15 +903,14 @@ std::string Column::labelsTempDisplay(size_t tempLabelIndex) return _labelsTemp[tempLabelIndex]; } -Label * Column::labelByIndexNotEmpty(size_t index) const +int Column::labelIndexNonEmpty(Label *label) const +{ + return !_labelNonEmptyIndexByLabel.count(label) ? -1 : _labelNonEmptyIndexByLabel.at(label); +} + +Label * Column::labelByIndexNotEmpty(int index) const { - size_t nonEmpty = 0; - - for(size_t l=0; l<_labels.size(); l++) - if(!_labels[l]->isEmptyValue() && nonEmpty++ == index) - return _labels[l]; - - return nullptr; + return !_labelByNonEmptyIndex.count(index) ? nullptr : _labelByNonEmptyIndex.at(index); } size_t Column::labelCountNotEmpty() const @@ -941,8 +964,13 @@ int Column::labelsDoubleValueIsTempLabelRow(double dbl) void Column::_resetLabelValueMap() { _labelByIntsIdMap.clear(); + _labelByValDis.clear(); + for(Label * label : _labels) - _labelByIntsIdMap[label->intsId()] = label; + { + _labelByIntsIdMap[label->intsId()] = label; + _labelByValDis[std::make_pair(label->originalValueAsString(), label->labelDisplay())] = label; + } labelsTempReset(); } @@ -1272,11 +1300,21 @@ bool Column::replaceDoubleLabelFromRowWithDouble(size_t row, double dbl) return true; } -void Column::labelValueChanged(Label *label, double aDouble) +void Column::labelValueChanged(Label *label, double aDouble, const Json::Value & previousOriginal) { + auto oldValDis = std::make_pair(Label::originalValueAsString(this, previousOriginal), label->labelDisplay()); + bool merged = _labelByValDis.count(label->origValDisplay()) != 0; + + if(merged) + labelsMergeDuplicateInto(label); + + //Make sure it was registered before: + assert(_labelByValDis[oldValDis] == label); + //And that its new location is free: + assert(_labelByValDis.count(label->origValDisplay()) == 0 || _labelByValDis.at(label->origValDisplay()) == label); + //Lets assume that all occurences of a label in _dbls are the same. //So when we encounter one that is the same as what is passed here we can return immediately - for(size_t r=0; r<_dbls.size(); r++) if(_ints[r] == label->intsId()) { @@ -1286,6 +1324,13 @@ void Column::labelValueChanged(Label *label, double aDouble) _dbls[r] = aDouble; } + + _labelByValDis.erase(oldValDis); + _labelByValDis[label->origValDisplay()] = label; + + if(merged) + _dbUpdateLabelOrder(); + dbUpdateValues(); } @@ -1295,12 +1340,30 @@ void Column::labelsHandleAutoSort(bool doDbUpdateEtc) labelsOrderByValue(doDbUpdateEtc); } -void Column::labelDisplayChanged(Label *label) +void Column::labelDisplayChanged(Label *label, const std::string & previousDisplay) { + auto oldValDis = std::make_pair(label->originalValueAsString(), previousDisplay); + bool merged = _labelByValDis.count(label->origValDisplay()) != 0; + + if(merged) + labelsMergeDuplicateInto(label); + + + //Make sure it was registered before: + assert(_labelByValDis[oldValDis] == label); + //And that its new location is free: + assert(_labelByValDis.count(label->origValDisplay()) == 0 || _labelByValDis.at(label->origValDisplay()) == label); + + _labelByValDis.erase(oldValDis); + _labelByValDis[label->origValDisplay()] = label; + + if(merged) + _dbUpdateLabelOrder(); + if(_labelsTempRevision < _revision) return; //We dont care about this change anymore if the list is out of date - size_t labelIdx = labelIndex(label); + size_t labelIdx = labelIndexNonEmpty(label); if(_labelsTemp.size() > labelIdx) _labelsTemp[labelIdx] = label->label(); @@ -1513,54 +1576,46 @@ Labelset Column::labelsByValue(const std::string & value) const return Labelset(found.begin(), found.end()); } + + Label * Column::labelByValueAndDisplay(const std::string &value, const std::string &labelText) const { JASPTIMER_SCOPE(Column::labelsByValueAndDisplay); - Labels found; - for(Label * label : _labels) - if(label->originalValueAsString() == value && label->label() == labelText) - return label; - - - return nullptr; + auto valDis = std::make_pair(value, labelText); + return _labelByValDis.count(valDis) == 0 ? nullptr : _labelByValDis.at(valDis); } -bool Column::labelsMergeDuplicates() +void Column::labelsMergeDuplicateInto(Label * labelPrime) { - bool thereWasDuplication = false; - for(size_t labelIndex=0; labelIndex < _labels.size(); labelIndex++) + const std::string value = labelPrime->originalValueAsString(), + labelText = labelPrime->label(); + Labelset found; + std::copy_if(_labels.begin(), _labels.end(), std::inserter(found, found.begin()), [&value, &labelText](Label * label) { - const std::string value = _labels[labelIndex]->originalValueAsString(), - labelText = _labels[labelIndex]->label(); - Labels found; - std::copy_if(_labels.begin(), _labels.end(), std::back_inserter(found), [&value, &labelText](Label * label) - { - return label->originalValueAsString() == value && label->label() == labelText; - }); + return label->originalValueAsString() == value && label->label() == labelText; + }); + + if(found.size() > 1) + { + found.erase(labelPrime); - if(found.size() > 1) - { - thereWasDuplication = true; + for(Label * otherLabel : found) + for(int & anInt : _ints) + if(anInt == otherLabel->intsId()) + anInt = labelPrime->intsId(); - //First one wins - for(size_t f=1; fintsId()) - anInt = found[0]->intsId(); + intset ids; - found.erase(found.begin()); - intset ids; - - for(Label * label : found) - ids.insert(label->intsId()); - - labelsRemoveByIntsId(ids); - } + for(Label * label : found) + ids.insert(label->intsId()); + + labelsRemoveByIntsId(ids, false); + + _labelByValDis[labelPrime->origValDisplay()] = labelPrime; + labelsTempReset(); } - - return thereWasDuplication; } bool Column::labelsRemoveOrphans() @@ -1578,14 +1633,6 @@ bool Column::labelsRemoveOrphans() return idsNotUsed.size(); } -int Column::labelIndex(const Label *label) const -{ - for(size_t i=0; i<_labels.size(); i++) - if(_labels[i] == label) - return i; - return -1; -} - std::set Column::labelsMoveRows(std::vector rows, bool up) { JASPTIMER_SCOPE(Column::labelsMoveRows); @@ -1921,11 +1968,11 @@ Json::Value Column::serializeLabels() const void Column::deserializeLabelsForCopy(const Json::Value & labels) { - labelsTempReset(); beginBatchedLabelsDB(); _labelByIntsIdMap.clear(); + _labelByValDis.clear(); _labels.clear(); if (labels.isArray()) diff --git a/CommonData/column.h b/CommonData/column.h index eb204e3f07..94727b35d9 100644 --- a/CommonData/column.h +++ b/CommonData/column.h @@ -41,6 +41,8 @@ class Analysis; class Column : public DataSetBaseNode { public: + typedef std::map, Label*> LabelByStrStr; + Column(DataSet * data, int id = -1); ~Column(); @@ -114,7 +116,7 @@ class Column : public DataSetBaseNode int labelsAdd( const std::string & display); int labelsAdd( const std::string & display, const std::string & description, const Json::Value & originalValue); int labelsAdd( int value, const std::string & display, bool filterAllows, const std::string & description, const Json::Value & originalValue, int order=-1, int id=-1); - void labelsRemoveByIntsId( intset valuesToRemove); + void labelsRemoveByIntsId( intset valuesToRemove, bool updateOrder = true); strintmap labelsResetValues( int & maxValue); void labelsRemoveBeyond( size_t indexToStartRemoving); @@ -149,9 +151,9 @@ class Column : public DataSetBaseNode Label * replaceDoublesTillLabelsRowWithLabels(size_t row); bool replaceDoubleLabelFromRowWithDouble(size_t row, double dbl); ///< Returns true if succes - void labelValueChanged(Label * label, double aDouble); ///< Pass NaN for non-convertible values - void labelValueChanged(Label * label, int anInteger) { labelValueChanged(label, double(anInteger)); } - void labelDisplayChanged(Label * label); + void labelValueChanged(Label * label, double aDouble, const Json::Value & previousOriginal); ///< Pass NaN for non-convertible values + void labelValueChanged(Label * label, int anInteger, const Json::Value & previousOriginal) { labelValueChanged(label, double(anInteger), previousOriginal); } + void labelDisplayChanged(Label * label, const std::string & previousDisplay); bool setStringValue( size_t row, const std::string & value, const std::string & label = "", bool writeToDB = true); ///< Does two things, if label=="" it will handle user input, as value or label depending on columnType. Otherwise it will simply try to use userEntered as a value. But this will trigger the setting of type bool setValue( size_t row, const std::string & value, const std::string & label, bool writeToDB = true); @@ -166,16 +168,16 @@ class Column : public DataSetBaseNode Labels & labels() { return _labels; } const Labels & labels() const { return _labels; } - bool labelsMergeDuplicates(); + void labelsMergeDuplicateInto(Label * label); bool labelsRemoveOrphans(); Labelset labelsByDisplay( const std::string & display) const; ///< Might be nullptr for missing label Labelset labelsByValue( const std::string & value) const; ///< - int labelIndex( const Label * label) const; + int labelIndexNonEmpty( Label * label) const; Label * labelByRow( int row) const; ///< Label * labelByValue( const std::string & value) const; ///< Might be nullptr for missing label, returns the first of labelsByValue Label * labelByIntsId( int intsId) const; ///< Might be nullptr for missing label Label * labelByDisplay( const std::string & display) const; ///< Might be nullptr for missing label, returns the first of labelsByDisplay - Label * labelByIndexNotEmpty( size_t index) const; + Label * labelByIndexNotEmpty( int index) const; Label * labelByValueAndDisplay( const std::string & value, const std::string & label) const; ///< Might be nullptr for missing label, assumes you ran labelsMergeDuplicates before void labelsHandleAutoSort( bool doDbUpdateEtc = true); size_t labelCountNotEmpty() const; @@ -249,7 +251,8 @@ class Column : public DataSetBaseNode int _id = -1, _analysisId = -1, // Actually initialized in DatabaseInterface::columnInsert _labelsTempRevision = -1, ///< When were the "temporary labels" created? - _labelsTempNumerics = 0; ///< Use the labelsTemp step to calculate the amount of numeric labels + _labelsTempNumerics = 0, ///< Use the labelsTemp step to calculate the amount of numeric labels + _highestIntsId = -1; qsizetype _labelsTempMaxWidth = 0; stringvec _labelsTemp; ///< Contains displaystring for labels. Used to allow people to edit "double" labels. Initialized when necessary doublevec _labelsTempDbls; @@ -267,7 +270,10 @@ class Column : public DataSetBaseNode doublevec _dbls; intvec _ints; stringset _dependsOnColumns; - std::map _labelByIntsIdMap; + std::map _labelByIntsIdMap, + _labelByNonEmptyIndex; + std::map _labelNonEmptyIndexByLabel; + LabelByStrStr _labelByValDis; int _batchedLabelDepth = 0; static bool _autoSortByValuesByDefault; diff --git a/CommonData/label.cpp b/CommonData/label.cpp index 539147bbaa..0f264a69d4 100644 --- a/CommonData/label.cpp +++ b/CommonData/label.cpp @@ -1,5 +1,4 @@ #include "label.h" -#include #include "column.h" #include "timers.h" #include "databaseinterface.h" @@ -135,9 +134,10 @@ bool Label::setLabel(const std::string & label) { if(_label != label) { + std::string oldLabel = _label; _label = label.empty() ? originalValueAsString() : label; - _column->labelDisplayChanged(this); + _column->labelDisplayChanged(this, oldLabel); dbUpdate(); return true; @@ -150,12 +150,13 @@ bool Label::setOriginalValue(const Json::Value & originalLabel) { if(_originalValue != originalLabel) { + Json::Value previous = _originalValue; _originalValue = originalLabel; dbUpdate(); - if(_originalValue.isDouble()) _column->labelValueChanged(this, _originalValue.asDouble() ); - else if(_originalValue.isInt()) _column->labelValueChanged(this, _originalValue.asInt() ); - else _column->labelValueChanged(this, EmptyValues::missingValueDouble); + if(_originalValue.isDouble()) _column->labelValueChanged(this, _originalValue.asDouble(), previous); + else if(_originalValue.isInt()) _column->labelValueChanged(this, _originalValue.asInt(), previous); + else _column->labelValueChanged(this, EmptyValues::missingValueDouble, previous); return true; } @@ -227,19 +228,24 @@ bool Label::isEmptyValue() const std::string Label::originalValueAsString(bool fancyEmptyValue) const { - switch(_originalValue.type()) + return originalValueAsString(_column, _originalValue, fancyEmptyValue); +} + +std::string Label::originalValueAsString(const Column * column, const Json::Value & originalValue, bool fancyEmptyValue) +{ + switch(originalValue.type()) { default: return fancyEmptyValue ? EmptyValues::displayString() : ""; case Json::intValue: - return std::to_string(_originalValue.asInt()); + return std::to_string(originalValue.asInt()); case Json::realValue: - return _column->doubleToDisplayString(_originalValue.asDouble(), fancyEmptyValue); + return column->doubleToDisplayString(originalValue.asDouble(), fancyEmptyValue); case Json::stringValue: - return _originalValue.asString(); + return originalValue.asString(); } } diff --git a/CommonData/label.h b/CommonData/label.h index 8c973ccf29..62441dcf1e 100644 --- a/CommonData/label.h +++ b/CommonData/label.h @@ -45,7 +45,10 @@ class Label : public DataSetBaseNode int order() const { return _order; } bool filterAllows() const { return _filterAllows; } const Json::Value & originalValue() const { return _originalValue; } + std::pair origValDisplay() const { return std::make_pair(originalValueAsString(), labelDisplay()); } + static std::string originalValueAsString(const Column * column, const Json::Value & originalValue, bool fancyEmptyValue = false); std::string originalValueAsString(bool fancyEmptyValue = false) const; std::string str() const; @@ -77,7 +80,7 @@ class Label : public DataSetBaseNode bool _filterAllows = true; ///< Used in generating filters for when users disable and enable certain labels/levels }; -typedef std::vector Labels; -typedef std::set Labelset; +typedef std::vector Labels; +typedef std::set Labelset; #endif // LABEL_H diff --git a/Desktop/data/datasetpackage.cpp b/Desktop/data/datasetpackage.cpp index 5bf13ec13f..e267eeadfb 100644 --- a/Desktop/data/datasetpackage.cpp +++ b/Desktop/data/datasetpackage.cpp @@ -296,7 +296,7 @@ QModelIndex DataSetPackage::indexForSubNode(DataSetBaseNode * node) const { Label * lab = dynamic_cast( node); Column * col = lab ? dynamic_cast(node->parent()) : nullptr; - int i = col ? col->labelIndex(lab) : -1; + int i = col ? col->labelIndexNonEmpty(lab) : -1; return createIndex(i, 0, dynamic_cast(lab)); }