From a668d628a7cead3610e93764cab3b26bf0cb2006 Mon Sep 17 00:00:00 2001 From: Matt Chambers Date: Wed, 25 Oct 2023 15:39:28 -0400 Subject: [PATCH 1/5] - added gene level parsimony option in protein association dialog * bumped schema to 23.12 for gene level parsimony * fixed Skyline build system to copy vendor DLLs to both Debug and Release directories so vendor reader tests don't fail in Debug config * removed MinPeptidesPerProtein test from TestHugeAssociateProteins because it's now hidden in the Refine -> Associate Proteins dialog --- Jamroot.jam | 43 +- .../Skyline/CommandArgUsage.Designer.cs | 9 + pwiz_tools/Skyline/CommandArgUsage.resx | 3 + pwiz_tools/Skyline/CommandArgs.cs | 6 +- pwiz_tools/Skyline/CommandLine.cs | 1 + .../EditUI/AssociateProteinsDlg.Designer.cs | 21 + .../Skyline/EditUI/AssociateProteinsDlg.cs | 54 ++- .../Skyline/EditUI/AssociateProteinsDlg.resx | 133 ++++-- pwiz_tools/Skyline/Jamfile.jam | 7 +- .../Model/AuditLog/EnumNames.Designer.cs | 36 ++ .../Skyline/Model/AuditLog/EnumNames.resx | 12 + .../Model/AuditLog/PropertyNames.Designer.cs | 9 + .../Skyline/Model/AuditLog/PropertyNames.resx | 3 + pwiz_tools/Skyline/Model/PeptideGroup.cs | 3 +- .../Skyline/Model/PeptideGroupDocNode.cs | 11 +- .../Model/Proteome/ProteinAssociation.cs | 205 ++++++++-- .../Model/Proteome/ProteinGroupMetadata.cs | 19 +- .../Model/Proteome/ProteinMetadataManager.cs | 31 +- .../Model/Serialization/DocumentFormat.cs | 3 +- .../Skyline/Properties/Resources.Designer.cs | 195 +++++++++ pwiz_tools/Skyline/Properties/Resources.resx | 12 + pwiz_tools/Skyline/Test/SrmSettingsTest.cs | 4 +- .../AssociateProteinsDlgTest.cs | 379 ++++++++++++++++-- .../TestPerf/PerfAssociateProteinsHugeTest.cs | 20 +- .../TestUtil/Schemas/Skyline_Current.xsd | 1 + 25 files changed, 1058 insertions(+), 162 deletions(-) diff --git a/Jamroot.jam b/Jamroot.jam index 0d9290df17..c22e0e7eec 100644 --- a/Jamroot.jam +++ b/Jamroot.jam @@ -765,6 +765,31 @@ rule install-type ( properties * ) } import string ; +rule install-vendor-api-dependencies-to-locations ( locations + : properties * ) +{ + local dependencies ; + if msvc in $(properties) && static in $(properties) && ! [ without-binary-msdata ] + { + for loc in $(locations) + { + local location = $(loc) ; + if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI//install_pwiz_vendor_api_abi_dlls/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI//install_pwiz_vendor_api_abi_sqlite/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI/T2D ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI/T2D//install_pwiz_vendor_api_abi_t2d/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/Agilent ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Agilent//install_pwiz_vendor_api_agilent/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/Bruker ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Bruker//install_pwiz_vendor_api_bruker/$(location) ; } + # wait for CCS<->DT support in Mobllion # if [ path.exists pwiz_aux/msrc/utility/vendor_api/Mobilion ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Mobilion//install_pwiz_vendor_api_mbi/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/Shimadzu ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Shimadzu//install_pwiz_vendor_api_shimadzu/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/thermo ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/thermo//install_pwiz_vendor_api_thermo/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/UIMF ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/UIMF//install_pwiz_vendor_api_uimf/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/UNIFI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/UNIFI//install_pwiz_vendor_api_unifi/$(location) ; } + if [ path.exists pwiz_aux/msrc/utility/vendor_api/Waters ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Waters//install_pwiz_vendor_api_waters/$(location) ; } + if on in $(properties) { dependencies += $(PWIZ_ROOT_PATH)/pwiz_tools/prototype//ThermoRawMetaDumpInstall/$(location) ; } + } + } + return $(dependencies) ; +} + rule install-vendor-api-dependencies ( properties * ) { properties = $(properties) [ vendor-api-support $(properties) ] ; @@ -772,23 +797,7 @@ rule install-vendor-api-dependencies ( properties * ) if $(location) { location = [ string.join [ path.make $(location[1]) ] ] ; } location ?= [ install-location $(properties) ] ; - local dependencies ; - if msvc in $(properties) && static in $(properties) && ! [ without-binary-msdata ] - { - if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI//install_pwiz_vendor_api_abi_dlls/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI//install_pwiz_vendor_api_abi_sqlite/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/ABI/T2D ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/ABI/T2D//install_pwiz_vendor_api_abi_t2d/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/Agilent ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Agilent//install_pwiz_vendor_api_agilent/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/Bruker ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Bruker//install_pwiz_vendor_api_bruker/$(location) ; } -# wait for CCS<->DT support in Mobllion # if [ path.exists pwiz_aux/msrc/utility/vendor_api/Mobilion ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Mobilion//install_pwiz_vendor_api_mbi/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/Shimadzu ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Shimadzu//install_pwiz_vendor_api_shimadzu/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/thermo ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/thermo//install_pwiz_vendor_api_thermo/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/UIMF ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/UIMF//install_pwiz_vendor_api_uimf/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/UNIFI ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/UNIFI//install_pwiz_vendor_api_unifi/$(location) ; } - if [ path.exists pwiz_aux/msrc/utility/vendor_api/Waters ] { dependencies += $(PWIZ_ROOT_PATH)/pwiz_aux/msrc/utility/vendor_api/Waters//install_pwiz_vendor_api_waters/$(location) ; } - if on in $(properties) { dependencies += $(PWIZ_ROOT_PATH)/pwiz_tools/prototype//ThermoRawMetaDumpInstall/$(location) ; } - } - return $(dependencies) ; + return [ install-vendor-api-dependencies-to-locations $(location) : $(properties) ] ; } rule install-identdata-dependencies ( properties * ) diff --git a/pwiz_tools/Skyline/CommandArgUsage.Designer.cs b/pwiz_tools/Skyline/CommandArgUsage.Designer.cs index 800ac9d6ab..acd1240a45 100644 --- a/pwiz_tools/Skyline/CommandArgUsage.Designer.cs +++ b/pwiz_tools/Skyline/CommandArgUsage.Designer.cs @@ -132,6 +132,15 @@ internal static string _annotation_values { } } + /// + /// Looks up a localized string similar to Associate peptides with genes (or gene groups) instead of proteins, and apply parsimony options to that association.. + /// + internal static string _associate_proteins_gene_level_parsimony { + get { + return ResourceManager.GetString("_associate_proteins_gene_level_parsimony", resourceCulture); + } + } + /// /// Looks up a localized string similar to Proteins that match the same set of peptides will be combined into a single target in the document.. /// diff --git a/pwiz_tools/Skyline/CommandArgUsage.resx b/pwiz_tools/Skyline/CommandArgUsage.resx index f5897ba82f..d172415a16 100644 --- a/pwiz_tools/Skyline/CommandArgUsage.resx +++ b/pwiz_tools/Skyline/CommandArgUsage.resx @@ -1018,4 +1018,7 @@ Useful in sample mixtures including multiple species. Adds additional information to the program output in the case of an error, that may be helpful to Skyline developers. + + Associate peptides with genes (or gene groups) instead of proteins, and apply parsimony options to that association. + \ No newline at end of file diff --git a/pwiz_tools/Skyline/CommandArgs.cs b/pwiz_tools/Skyline/CommandArgs.cs index 99cb638e2e..8ddd85732a 100644 --- a/pwiz_tools/Skyline/CommandArgs.cs +++ b/pwiz_tools/Skyline/CommandArgs.cs @@ -1399,6 +1399,8 @@ public bool ImportingSearch public static readonly Argument ARG_AP_GROUP_PROTEINS = new Argument(@"associate-proteins-group-proteins", (c, p) => c.AssociateProteinsGroupProteins = p.IsNameOnly || bool.Parse(p.Value)); + public static readonly Argument ARG_AP_GENE_LEVEL = new Argument(@"associate-proteins-gene-level-parsimony", + (c, p) => c.AssociateProteinsGeneLevelParsimony = p.IsNameOnly || bool.Parse(p.Value)); public static readonly Argument ARG_AP_SHARED_PEPTIDES = DocArgument.FromEnumType(@"associate-proteins-shared-peptides", (c, p) => c.AssociateProteinsSharedPeptides = p); public static readonly Argument ARG_AP_MINIMAL_LIST = new Argument(@"associate-proteins-minimal-protein-list", @@ -1409,16 +1411,18 @@ public bool ImportingSearch (c, p) => c.AssociateProteinsMinPeptidesPerProtein = p.ValueInt) { WrapValue = true }; private static readonly ArgumentGroup GROUP_ASSOCIATE_PROTEINS = new ArgumentGroup(() => Resources.CommandLine_AssociateProteins_Associating_peptides_with_proteins, false, - ARG_AP_GROUP_PROTEINS, ARG_AP_SHARED_PEPTIDES, ARG_AP_MINIMAL_LIST, ARG_AP_MIN_PEPTIDES, ARG_AP_REMOVE_SUBSETS) + ARG_AP_GROUP_PROTEINS, ARG_AP_GENE_LEVEL, ARG_AP_SHARED_PEPTIDES, ARG_AP_MINIMAL_LIST, ARG_AP_MIN_PEPTIDES, ARG_AP_REMOVE_SUBSETS) { LeftColumnWidth = 45 }; public bool? AssociateProteinsGroupProteins { get; private set; } + public bool? AssociateProteinsGeneLevelParsimony { get; private set; } public bool? AssociateProteinsFindMinimalProteinList { get; private set; } public bool? AssociateProteinsRemoveSubsetProteins { get; private set; } public SharedPeptides? AssociateProteinsSharedPeptides { get; private set; } public int? AssociateProteinsMinPeptidesPerProtein { get; private set; } public bool AssociatingProteins => AssociateProteinsFindMinimalProteinList.HasValue || AssociateProteinsGroupProteins.HasValue || + AssociateProteinsGeneLevelParsimony.HasValue || AssociateProteinsMinPeptidesPerProtein.HasValue || AssociateProteinsRemoveSubsetProteins.HasValue || AssociateProteinsSharedPeptides.HasValue; diff --git a/pwiz_tools/Skyline/CommandLine.cs b/pwiz_tools/Skyline/CommandLine.cs index ba7446979d..93a94088f2 100644 --- a/pwiz_tools/Skyline/CommandLine.cs +++ b/pwiz_tools/Skyline/CommandLine.cs @@ -2189,6 +2189,7 @@ private bool AssociateProteins(CommandArgs commandArgs) var proteinAssociation = new ProteinAssociation(Document, progressMonitor); proteinAssociation.UseFastaFile(fastaPath, DigestProteinToPeptides, progressMonitor); proteinAssociation.ApplyParsimonyOptions(commandArgs.AssociateProteinsGroupProteins.GetValueOrDefault(), + commandArgs.AssociateProteinsGeneLevelParsimony.GetValueOrDefault(), commandArgs.AssociateProteinsFindMinimalProteinList.GetValueOrDefault(), commandArgs.AssociateProteinsRemoveSubsetProteins.GetValueOrDefault(), commandArgs.AssociateProteinsSharedPeptides.GetValueOrDefault(), diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.Designer.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.Designer.cs index 955a343e3c..20bb341b7a 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.Designer.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.Designer.cs @@ -42,6 +42,8 @@ private void InitializeComponent() this.flowLayoutPanel = new System.Windows.Forms.FlowLayoutPanel(); this.lblGroupProteins = new System.Windows.Forms.Label(); this.lnkHelpProteinGroups = new System.Windows.Forms.LinkLabel(); + this.cbGeneLevel = new System.Windows.Forms.CheckBox(); + this.lblGroupAtGeneLevel = new System.Windows.Forms.Label(); this.lblSharedPeptides = new System.Windows.Forms.Label(); this.lnkHelpSharedPeptides = new System.Windows.Forms.LinkLabel(); this.cbMinimalProteinList = new System.Windows.Forms.CheckBox(); @@ -126,6 +128,8 @@ private void InitializeComponent() this.flowLayoutPanel.Controls.Add(this.cbGroupProteins); this.flowLayoutPanel.Controls.Add(this.lblGroupProteins); this.flowLayoutPanel.Controls.Add(this.lnkHelpProteinGroups); + this.flowLayoutPanel.Controls.Add(this.cbGeneLevel); + this.flowLayoutPanel.Controls.Add(this.lblGroupAtGeneLevel); this.flowLayoutPanel.Controls.Add(this.lblSharedPeptides); this.flowLayoutPanel.Controls.Add(this.lnkHelpSharedPeptides); this.flowLayoutPanel.Controls.Add(this.comboSharedPeptides); @@ -155,6 +159,21 @@ private void InitializeComponent() this.lnkHelpProteinGroups.TabStop = true; this.lnkHelpProteinGroups.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.lnkHelp_LinkClicked); // + // cbGeneLevel + // + resources.ApplyResources(this.cbGeneLevel, "cbGeneLevel"); + this.cbGeneLevel.Name = "cbGeneLevel"; + this.cbGeneLevel.UseVisualStyleBackColor = true; + this.cbGeneLevel.CheckedChanged += new System.EventHandler(this.cbGeneLevel_CheckedChanged); + // + // lblGroupAtGeneLevel + // + resources.ApplyResources(this.lblGroupAtGeneLevel, "lblGroupAtGeneLevel"); + this.flowLayoutPanel.SetFlowBreak(this.lblGroupAtGeneLevel, true); + this.lblGroupAtGeneLevel.Name = "lblGroupAtGeneLevel"; + this.helpTip.SetToolTip(this.lblGroupAtGeneLevel, resources.GetString("lblGroupAtGeneLevel.ToolTip")); + this.lblGroupAtGeneLevel.Click += new System.EventHandler(this.lblGroupAtGeneLevel_Click); + // // lblSharedPeptides // resources.ApplyResources(this.lblSharedPeptides, "lblSharedPeptides"); @@ -474,5 +493,7 @@ private void InitializeComponent() private System.Windows.Forms.Label lblRemoveSubsetProteins; private System.Windows.Forms.ToolTip helpTip; private System.Windows.Forms.Panel proteinSourcePanel; + private System.Windows.Forms.CheckBox cbGeneLevel; + private System.Windows.Forms.Label lblGroupAtGeneLevel; } } \ No newline at end of file diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs index 7f60cfa95c..ef01459c1e 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs @@ -93,6 +93,7 @@ public AssociateProteinsDlg(SrmDocument document, bool reuseLastFasta = true) comboSharedPeptides.Items.Add(EnumNames.ResourceManager.GetString(@"SharedPeptides_" + sharedPeptides) ?? throw new InvalidOperationException(sharedPeptides)); GroupProteins = peptideSettings.ProteinAssociationSettings?.GroupProteins ?? false; + GeneLevelParsimony = peptideSettings.ProteinAssociationSettings?.GeneLevelParsimony ?? false; FindMinimalProteinList = peptideSettings.ProteinAssociationSettings?.FindMinimalProteinList ?? false; RemoveSubsetProteins = peptideSettings.ProteinAssociationSettings?.RemoveSubsetProteins ?? false; SelectedSharedPeptides = peptideSettings.ProteinAssociationSettings?.SharedPeptides ?? ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins; @@ -103,6 +104,7 @@ public AssociateProteinsDlg(SrmDocument document, bool reuseLastFasta = true) _driverBackgroundProteome.LoadList(peptideSettings.BackgroundProteome.Name); helpTip.SetToolTip(cbGroupProteins, helpTip.GetToolTip(lblGroupProteins)); + helpTip.SetToolTip(cbGeneLevel, helpTip.GetToolTip(lblGroupAtGeneLevel)); helpTip.SetToolTip(cbMinimalProteinList, helpTip.GetToolTip(lblMinimalProteinList)); helpTip.SetToolTip(cbRemoveSubsetProteins, helpTip.GetToolTip(lblRemoveSubsetProteins)); helpTip.SetToolTip(comboSharedPeptides, helpTip.GetToolTip(lblSharedPeptides)); @@ -213,6 +215,11 @@ public bool GroupProteins get => cbGroupProteins.Checked; set => cbGroupProteins.Checked = value; } + public bool GeneLevelParsimony + { + get => cbGeneLevel.Checked; + set => cbGeneLevel.Checked = value; + } public bool FindMinimalProteinList { @@ -254,6 +261,7 @@ private void UpdateParsimonyResults() return; var groupProteins = GroupProteins; + var geneLevel = GeneLevelParsimony; var findMinimalProteinList = FindMinimalProteinList; var removeSubsetProteins = RemoveSubsetProteins; var selectedSharedPeptides = SelectedSharedPeptides; @@ -262,7 +270,7 @@ private void UpdateParsimonyResults() using (var longWaitDlg = new LongWaitDlg()) { longWaitDlg.PerformWork(this, 1000, - broker => _proteinAssociation.ApplyParsimonyOptions(groupProteins, findMinimalProteinList, removeSubsetProteins, selectedSharedPeptides, minPeptidesPerProtein, broker)); + broker => _proteinAssociation.ApplyParsimonyOptions(groupProteins, geneLevel, findMinimalProteinList, removeSubsetProteins, selectedSharedPeptides, minPeptidesPerProtein, broker)); if (longWaitDlg.IsCanceled) return; } @@ -287,6 +295,10 @@ private void checkBoxParsimony_CheckedChanged(object sender, EventArgs e) private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) { + // setting gene level parsimony forces protein grouping on, so do nothing in that case + if (GeneLevelParsimony) + return; + comboSharedPeptides.SelectedIndexChanged -= comboParsimony_SelectedIndexChanged; // adjust labels to reflect whether proteins or protein groups are used for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) @@ -313,6 +325,37 @@ private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) UpdateParsimonyResults(); } + private void cbGeneLevel_CheckedChanged(object sender, EventArgs e) + { + comboSharedPeptides.SelectedIndexChanged -= comboParsimony_SelectedIndexChanged; + // adjust labels to reflect whether genes or protein groups are used + for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) + comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( + (GeneLevelParsimony ? @"SharedPeptidesGene_" : @"SharedPeptidesGroup_") + + _sharedPeptideOptionNames[i]) ?? + throw new InvalidOperationException(_sharedPeptideOptionNames[i]); + comboSharedPeptides.SelectedIndexChanged += comboParsimony_SelectedIndexChanged; + + // gene level parsimony implies grouping, so force the checkbox on and disable it + if (GeneLevelParsimony) + { + cbGroupProteins.Checked = true; + cbGroupProteins.Enabled = false; + lblMinimalProteinList.Text = Resources.AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides; + lblRemoveSubsetProteins.Text = Resources.AssociateProteinsDlg_Remove_subset_genes; + lblMinPeptides.Text = Resources.AssociateProteinsDlg_Min_peptides_per_gene; + } + else + { + cbGroupProteins.Enabled = true; + lblMinimalProteinList.Text = Resources.AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides; + lblRemoveSubsetProteins.Text = Resources.AssociateProteinsDlg_Remove_subset_protein_groups; + lblMinPeptides.Text = Resources.AssociateProteinsDlg_Min_peptides_per_protein_group; + } + + UpdateParsimonyResults(); + } + private void comboParsimony_SelectedIndexChanged(object sender, EventArgs e) { UpdateParsimonyResults(); @@ -462,6 +505,10 @@ private SrmDocument CreateDocTree(SrmDocument current) if (longWaitDlg.IsCanceled) return null; } + + if (cbGeneLevel.Checked) + Settings.Default.ShowPeptidesDisplayMode = ProteinMetadataManager.ProteinDisplayMode.ByGene.ToString(); + return result; } @@ -621,6 +668,11 @@ private void lblGroupProtein_Click(object sender, EventArgs e) cbGroupProteins.Checked = !cbGroupProteins.Checked; } + private void lblGroupAtGeneLevel_Click(object sender, EventArgs e) + { + cbGeneLevel.Checked = !cbGeneLevel.Checked; + } + private void lblMinimalProteinList_Click(object sender, EventArgs e) { cbMinimalProteinList.Checked = !cbMinimalProteinList.Checked; diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.resx b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.resx index 7a82ffdffc..7b13e99083 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.resx +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.resx @@ -129,7 +129,7 @@ - 420, 507 + 420, 523 75, 23 @@ -190,7 +190,7 @@ NoControl - 339, 507 + 339, 523 75, 23 @@ -316,6 +316,75 @@ 2 + + True + + + NoControl + + + 3, 23 + + + 3, 3, 0, 3 + + + 15, 14 + + + 15 + + + MiddleRight + + + cbGeneLevel + + + System.Windows.Forms.CheckBox, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + flowLayoutPanel + + + 3 + + + True + + + NoControl + + + 18, 23 + + + 0, 3, 0, 3 + + + 100, 13 + + + 14 + + + Group at g&ene level + + + Peptides will be associated and grouped by gene instead of by protein. + + + lblGroupAtGeneLevel + + + System.Windows.Forms.Label, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + flowLayoutPanel + + + 4 + True @@ -323,7 +392,7 @@ NoControl - 0, 26 + 0, 46 0, 6, 0, 3 @@ -353,7 +422,7 @@ flowLayoutPanel - 3 + 5 True @@ -365,7 +434,7 @@ NoControl - 105, 20 + 105, 40 0, 0, 3, 0 @@ -389,7 +458,7 @@ flowLayoutPanel - 4 + 6 True @@ -398,7 +467,7 @@ NoControl - 3, 75 + 3, 95 3, 6, 0, 3 @@ -422,13 +491,13 @@ flowLayoutPanel - 6 + 8 True - 18, 75 + 18, 95 0, 6, 0, 3 @@ -456,7 +525,7 @@ flowLayoutPanel - 7 + 9 True @@ -468,7 +537,7 @@ NoControl - 250, 69 + 250, 89 0, 0, 3, 0 @@ -492,7 +561,7 @@ flowLayoutPanel - 8 + 10 True @@ -501,7 +570,7 @@ NoControl - 3, 95 + 3, 115 3, 3, 0, 3 @@ -525,7 +594,7 @@ flowLayoutPanel - 9 + 11 True @@ -534,7 +603,7 @@ NoControl - 18, 95 + 18, 115 0, 3, 0, 3 @@ -561,7 +630,7 @@ flowLayoutPanel - 10 + 12 True @@ -573,7 +642,7 @@ NoControl - 139, 92 + 139, 112 0, 0, 3, 0 @@ -597,7 +666,7 @@ flowLayoutPanel - 11 + 13 True @@ -606,7 +675,7 @@ NoControl - 0, 118 + 0, 138 0, 6, 3, 3 @@ -636,10 +705,10 @@ flowLayoutPanel - 12 + 14 - 3, 135 + 3, 155 3, 0, 3, 3 @@ -660,7 +729,7 @@ flowLayoutPanel - 13 + 15 Fill @@ -669,7 +738,7 @@ 3, 16 - 477, 164 + 477, 182 0 @@ -687,7 +756,7 @@ 0 - 3, 45 + 3, 65 341, 21 @@ -705,7 +774,7 @@ flowLayoutPanel - 5 + 7 Top, Left, Right @@ -714,7 +783,7 @@ 12, 177 - 483, 183 + 483, 201 2 @@ -906,7 +975,7 @@ Targets - 12, 387 + 12, 403 483, 89 @@ -936,7 +1005,7 @@ NoControl - 12, 369 + 12, 385 132, 13 @@ -1026,7 +1095,7 @@ RightToLeft - 12, 481 + 12, 497 483, 20 @@ -1077,7 +1146,7 @@ 6, 13 - 507, 542 + 507, 558 468, 554 @@ -1092,7 +1161,7 @@ modeUIHandler - pwiz.Skyline.Util.Helpers+ModeUIExtender, Skyline, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null + pwiz.Skyline.Util.Helpers+ModeUIExtender, Skyline-daily, Version=23.1.1.282, Culture=neutral, PublicKeyToken=null headerColumn @@ -1152,6 +1221,6 @@ AssociateProteinsDlg - pwiz.Skyline.Util.ModeUIInvariantFormEx, Skyline, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null + pwiz.Skyline.Util.ModeUIInvariantFormEx, Skyline-daily, Version=23.1.1.282, Culture=neutral, PublicKeyToken=null \ No newline at end of file diff --git a/pwiz_tools/Skyline/Jamfile.jam b/pwiz_tools/Skyline/Jamfile.jam index ea692e83ee..8217f60c6f 100644 --- a/pwiz_tools/Skyline/Jamfile.jam +++ b/pwiz_tools/Skyline/Jamfile.jam @@ -395,6 +395,10 @@ if [ modules.peek : NT ] && --i-agree-to-the-vendor-licenses in [ modules.peek : return $(result) ; } + rule install-vendor-api-dependencies-to-debug-and-release ( properties * ) + { + return [ install-vendor-api-dependencies-to-locations $(SKYLINE_PATH)/bin/x64/Debug $(SKYLINE_PATH)/bin/x64/Release : $(properties) ] ; + } notfile NugetRestore : @do_nuget_restore ; @@ -412,7 +416,8 @@ if [ modules.peek : NT ] && --i-agree-to-the-vendor-licenses in [ modules.peek : ../../pwiz_tools/commandline//msconvert/$(PWIZ_WRAPPER_PATH)/obj/$(PLATFORM) TestDiagnostics//TestDiagnostics @build-location - @install-vendor-api-dependencies + #@install-vendor-api-dependencies + @install-vendor-api-dependencies-to-debug-and-release NugetRestore ../../pwiz/utility/bindings/CLI//pwiz_data_cli/$(PWIZ_WRAPPER_PATH)/obj/$(PLATFORM) ../../pwiz/utility/bindings/CLI//pwiz_data_cli.xml/$(PWIZ_WRAPPER_PATH)/obj/$(PLATFORM) diff --git a/pwiz_tools/Skyline/Model/AuditLog/EnumNames.Designer.cs b/pwiz_tools/Skyline/Model/AuditLog/EnumNames.Designer.cs index 6ed5a877a1..42352f2695 100644 --- a/pwiz_tools/Skyline/Model/AuditLog/EnumNames.Designer.cs +++ b/pwiz_tools/Skyline/Model/AuditLog/EnumNames.Designer.cs @@ -1536,6 +1536,42 @@ public static string SharedPeptides_Removed { } } + /// + /// Looks up a localized string similar to Assigned to the gene with the most peptides. + /// + public static string SharedPeptidesGene_AssignedToBestProtein { + get { + return ResourceManager.GetString("SharedPeptidesGene_AssignedToBestProtein", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Assigned to the first gene. + /// + public static string SharedPeptidesGene_AssignedToFirstProtein { + get { + return ResourceManager.GetString("SharedPeptidesGene_AssignedToFirstProtein", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Duplicated between genes. + /// + public static string SharedPeptidesGene_DuplicatedBetweenProteins { + get { + return ResourceManager.GetString("SharedPeptidesGene_DuplicatedBetweenProteins", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Removed (peptides must be unique to a single gene). + /// + public static string SharedPeptidesGene_Removed { + get { + return ResourceManager.GetString("SharedPeptidesGene_Removed", resourceCulture); + } + } + /// /// Looks up a localized string similar to Assigned to the protein group with the most peptides. /// diff --git a/pwiz_tools/Skyline/Model/AuditLog/EnumNames.resx b/pwiz_tools/Skyline/Model/AuditLog/EnumNames.resx index a50b1e0f0b..68f0cb30e6 100644 --- a/pwiz_tools/Skyline/Model/AuditLog/EnumNames.resx +++ b/pwiz_tools/Skyline/Model/AuditLog/EnumNames.resx @@ -714,4 +714,16 @@ Ratio to Light + + Assigned to the gene with the most peptides + + + Assigned to the first gene + + + Duplicated between genes + + + Removed (peptides must be unique to a single gene) + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.Designer.cs b/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.Designer.cs index 8e28e67ae4..ced2966b2b 100644 --- a/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.Designer.cs +++ b/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.Designer.cs @@ -2643,6 +2643,15 @@ public static string ParsimonySettings_FindMinimalProteinList { } } + /// + /// Looks up a localized string similar to Group at gene level. + /// + public static string ParsimonySettings_GeneLevelParsimony { + get { + return ResourceManager.GetString("ParsimonySettings_GeneLevelParsimony", resourceCulture); + } + } + /// /// Looks up a localized string similar to Group proteins. /// diff --git a/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.resx b/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.resx index 5c652e16e8..e10528b68d 100644 --- a/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.resx +++ b/pwiz_tools/Skyline/Model/AuditLog/PropertyNames.resx @@ -1763,4 +1763,7 @@ Min m/z + + Group at gene level + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Model/PeptideGroup.cs b/pwiz_tools/Skyline/Model/PeptideGroup.cs index 701805c61e..b442d22b54 100644 --- a/pwiz_tools/Skyline/Model/PeptideGroup.cs +++ b/pwiz_tools/Skyline/Model/PeptideGroup.cs @@ -481,10 +481,11 @@ public bool Equals(FastaSequenceGroup obj) { if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; - return base.Equals(obj) && + var equals = base.Equals(obj) && Equals(obj._name, _name) && ArrayUtil.EqualsDeep(obj.FastaSequenceList, FastaSequenceList) && obj.IsDecoy == IsDecoy; + return equals; } public override bool Equals(object obj) diff --git a/pwiz_tools/Skyline/Model/PeptideGroupDocNode.cs b/pwiz_tools/Skyline/Model/PeptideGroupDocNode.cs index 2c111dbe5e..d4f57787ab 100644 --- a/pwiz_tools/Skyline/Model/PeptideGroupDocNode.cs +++ b/pwiz_tools/Skyline/Model/PeptideGroupDocNode.cs @@ -154,15 +154,18 @@ public PeptideGroupDocNode ChangeDescription(string desc) public PeptideGroupDocNode ChangeProteinMetadata(ProteinMetadata proteinMetadata) { var newMetadata = proteinMetadata; - if (Equals(PeptideGroup.Name, newMetadata.Name)) - newMetadata = newMetadata.ChangeName(null); // no actual override - if (Equals(PeptideGroup.Description, newMetadata.Description)) - newMetadata = newMetadata.ChangeDescription(null); // no actual override var group = PeptideGroup as FastaSequenceGroup; if (group != null) { Assume.AreEqual(group.FastaSequenceList.Count, proteinMetadata.ProteinMetadataList.Count); } + else + { + if (Equals(PeptideGroup.Name, newMetadata.Name)) + newMetadata = newMetadata.ChangeName(null); // no actual override + if (Equals(PeptideGroup.Description, newMetadata.Description)) + newMetadata = newMetadata.ChangeDescription(null); // no actual override + } return ChangeProp(ImClone(this), im => im._proteinMetadata = newMetadata); } diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs index e0436927e9..713bad4fac 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs @@ -25,11 +25,15 @@ using System.Xml; using System.Xml.Schema; using System.Xml.Serialization; +using pwiz.Common.Collections; using pwiz.Common.SystemUtil; using pwiz.ProteomeDatabase.API; +using pwiz.ProteomeDatabase.DataModel; +using pwiz.ProteomeDatabase.Fasta; using pwiz.Skyline.Model.AuditLog; using pwiz.Skyline.Properties; using pwiz.Skyline.Util; +using pwiz.Skyline.Util.Extensions; namespace pwiz.Skyline.Model.Proteome { @@ -38,10 +42,19 @@ public class ProteinAssociation private SrmDocument _document; private StringSearch _peptideTrie; private Dictionary> _peptideToPath; - private Dictionary> _peptideToProteins, _peptideToProteinGroups; - private MappingResultsInternal _results, _finalResults, _proteinGroupResults; - private IDictionary _proteinGroupAssociations; + private Dictionary> _peptideToProteins; + private MappingResultsInternal _results, _finalResults; private HashSet _peptidesRemovedByFilters; + public IDictionary _proteinToMetadata { get; private set; } + + internal class ProteinOrGeneGroupResultCache + { + public MappingResultsInternal Results { get; set; } + public Dictionary PeptideGroupByProteinOrGeneGroup { get; set; } + public Dictionary> PeptideToProteinOrGeneGroup { get; set; } + } + + private Dictionary _proteinOrGeneGroupResultCacheByGeneLevel; public IDictionary AssociatedProteins { get; private set; } public IDictionary ParsimoniousProteins { get; private set; } @@ -74,12 +87,15 @@ public ProteinAssociation(SrmDocument document, ILongWaitBroker broker) private void ResetMapping() { - _proteinGroupAssociations = null; - _proteinGroupResults = null; + _proteinOrGeneGroupResultCacheByGeneLevel = new Dictionary() + { + { false, null }, + { true, null } + }; _finalResults = null; - _peptideToProteinGroups = null; _peptideToProteins = null; _peptidesRemovedByFilters = null; + _proteinToMetadata = null; } public void UseFastaFile(string file, Func> digestProteinToPeptides, ILongWaitBroker broker) @@ -189,6 +205,10 @@ private Dictionary FindProteinMatches(I _results.PeptidesUnmapped = distinctTargetPeptideCount - _results.PeptidesMapped; _results.FinalProteinCount = proteinAssociations.Count; + _proteinToMetadata = new Dictionary(); + foreach (var kvp in proteinAssociations) + _proteinToMetadata[kvp.Key.Sequence.Name] = kvp.Key.Metadata; + return proteinAssociations; } @@ -197,9 +217,10 @@ public class ParsimonySettings : Immutable, IXmlSerializable, IValidating { public static ParsimonySettings DEFAULT = new ParsimonySettings() { MinPeptidesPerProtein = 1 }; - public ParsimonySettings(bool groupProteins, bool findMinimalProteinList, bool removeSubsetProteins, SharedPeptides sharedPeptides, int minPeptidesPerProtein) + public ParsimonySettings(bool groupProteins, bool geneLevel, bool findMinimalProteinList, bool removeSubsetProteins, SharedPeptides sharedPeptides, int minPeptidesPerProtein) { GroupProteins = groupProteins; + GeneLevelParsimony = geneLevel; FindMinimalProteinList = findMinimalProteinList; RemoveSubsetProteins = removeSubsetProteins; SharedPeptides = sharedPeptides; @@ -209,6 +230,9 @@ public ParsimonySettings(bool groupProteins, bool findMinimalProteinList, bool r [Track(ignoreDefaultParent:true)] public bool GroupProteins { get; private set; } + [Track(defaultValues: typeof(DefaultValuesFalse))] + public bool GeneLevelParsimony { get; private set; } + [Track(ignoreDefaultParent: true)] public bool FindMinimalProteinList { get; private set; } @@ -227,6 +251,7 @@ public bool Equals(ParsimonySettings obj) if (ReferenceEquals(null, obj)) return false; if (ReferenceEquals(this, obj)) return true; return obj.GroupProteins == GroupProteins && + obj.GeneLevelParsimony == GeneLevelParsimony && obj.FindMinimalProteinList == FindMinimalProteinList && obj.RemoveSubsetProteins == RemoveSubsetProteins && obj.SharedPeptides == SharedPeptides && @@ -247,6 +272,7 @@ public override int GetHashCode() { int result = MinPeptidesPerProtein; result = (result * 397) ^ GroupProteins.GetHashCode(); + result = (result * 397) ^ GeneLevelParsimony.GetHashCode(); result = (result * 397) ^ FindMinimalProteinList.GetHashCode(); result = (result * 397) ^ RemoveSubsetProteins.GetHashCode(); result = (result * 397) ^ SharedPeptides.GetHashCode(); @@ -269,6 +295,7 @@ private enum Attr { min_peptides_per_protein, group_proteins, + gene_level_parsimony, find_minimal_protein_list, remove_subset_proteins, shared_peptides @@ -283,6 +310,7 @@ public void ReadXml(XmlReader reader) { MinPeptidesPerProtein = reader.GetIntAttribute(Attr.min_peptides_per_protein, 1); GroupProteins = reader.GetBoolAttribute(Attr.group_proteins); + GeneLevelParsimony = reader.GetBoolAttribute(Attr.gene_level_parsimony); FindMinimalProteinList = reader.GetBoolAttribute(Attr.find_minimal_protein_list); RemoveSubsetProteins = reader.GetBoolAttribute(Attr.remove_subset_proteins); SharedPeptides = reader.GetEnumAttribute(Attr.shared_peptides, SharedPeptides.DuplicatedBetweenProteins); @@ -301,6 +329,7 @@ public void WriteXml(XmlWriter writer) { writer.WriteAttribute(Attr.min_peptides_per_protein, MinPeptidesPerProtein, 1); writer.WriteAttribute(Attr.group_proteins, GroupProteins, false); + writer.WriteAttribute(Attr.gene_level_parsimony, GeneLevelParsimony, false); writer.WriteAttribute(Attr.find_minimal_protein_list, FindMinimalProteinList, false); writer.WriteAttribute(Attr.remove_subset_proteins, RemoveSubsetProteins, false); writer.WriteAttribute(Attr.shared_peptides, SharedPeptides, SharedPeptides.DuplicatedBetweenProteins); @@ -324,6 +353,7 @@ public interface IMappingResults int PeptidesUnmapped { get; } bool GroupProteins { get; } + bool GeneLevelParsimony { get; } bool FindMinimalProteinList { get; } bool RemoveSubsetProteins { get; } SharedPeptides SharedPeptides { get; } @@ -355,6 +385,7 @@ public MappingResultsInternal Clone() RemoveSubsetProteins = RemoveSubsetProteins, SharedPeptides = SharedPeptides, GroupProteins = GroupProteins, + GeneLevelParsimony = GeneLevelParsimony, MinPeptidesPerProtein = MinPeptidesPerProtein }; } @@ -365,12 +396,13 @@ public MappingResultsInternal Clone() public int PeptidesUnmapped { get; set; } public bool GroupProteins { get; set; } + public bool GeneLevelParsimony { get; set; } public bool FindMinimalProteinList { get; set; } public bool RemoveSubsetProteins { get; set; } public SharedPeptides SharedPeptides { get; set; } public int MinPeptidesPerProtein { get; set; } - public ParsimonySettings ParsimonySettings => new ParsimonySettings(GroupProteins, FindMinimalProteinList, + public ParsimonySettings ParsimonySettings => new ParsimonySettings(GroupProteins, GeneLevelParsimony, FindMinimalProteinList, RemoveSubsetProteins, SharedPeptides, MinPeptidesPerProtein); public int FinalProteinCount { get; set; } @@ -408,9 +440,14 @@ public override int GetHashCode() { return _hash; } + + public override string ToString() + { + return string.Join(TextUtil.SEPARATOR_CSV.ToString(), Peptides.Select(p => p.ModifiedSequenceDisplay)); + } } - public void ApplyParsimonyOptions(bool groupProteins, bool findMinimalProteinList, bool removeSubsetProteins, SharedPeptides sharedPeptides, int minPeptidesPerProtein, ILongWaitBroker broker) + public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findMinimalProteinList, bool removeSubsetProteins, SharedPeptides sharedPeptides, int minPeptidesPerProtein, ILongWaitBroker broker) { Dictionary> peptideToProteinGroups = _peptideToProteins; @@ -418,25 +455,28 @@ public void ApplyParsimonyOptions(bool groupProteins, bool findMinimalProteinLis _peptidesRemovedByFilters = new HashSet(); - if (groupProteins) + if (groupProteins || geneLevel) { - if (_proteinGroupAssociations == null) + if (_proteinOrGeneGroupResultCacheByGeneLevel[geneLevel] == null) { - _proteinGroupResults = _results.Clone(); - _proteinGroupResults.GroupProteins = true; - _proteinGroupAssociations = CalculateProteinGroups(_proteinGroupResults, broker); + var cache = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel] = new ProteinOrGeneGroupResultCache(); + cache.Results = _results.Clone(); + cache.Results.GroupProteins = true; + cache.Results.GeneLevelParsimony = geneLevel; + cache.PeptideGroupByProteinOrGeneGroup = CalculateProteinOrGeneGroups(cache.Results, geneLevel, broker); - if (_proteinGroupAssociations == null) + if (cache.PeptideGroupByProteinOrGeneGroup == null) return; } - _finalResults = _proteinGroupResults.Clone(); - ParsimoniousProteins = _proteinGroupAssociations; - peptideToProteinGroups = _peptideToProteinGroups; + _finalResults = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].Results.Clone(); + ParsimoniousProteins = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].PeptideGroupByProteinOrGeneGroup; + peptideToProteinGroups = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].PeptideToProteinOrGeneGroup; } else { _finalResults = _results; + _finalResults.GeneLevelParsimony = false; ParsimoniousProteins = AssociatedProteins; } @@ -561,11 +601,65 @@ public void ApplyParsimonyOptions(bool groupProteins, bool findMinimalProteinLis _finalResults.FinalSharedPeptideCount = sharedPeptidesRemaining.Values.Sum(); } - private Dictionary CalculateProteinGroups(MappingResultsInternal results, ILongWaitBroker broker) + public class GeneLevelEqualityComparer : EqualityComparer + { + public override bool Equals(IProteinRecord x, IProteinRecord y) + { + if (ReferenceEquals(x, y)) return true; + if (ReferenceEquals(null, x)) return false; + if (ReferenceEquals(null, y)) return false; + if (x.Metadata.Gene.IsNullOrEmpty() && y.Metadata.Gene.IsNullOrEmpty()) return x.Sequence.Equals(y.Sequence); + if (x.Metadata.Gene.IsNullOrEmpty() != y.Metadata.Gene.IsNullOrEmpty()) return false; + + return x.Metadata.Gene.Equals(y.Metadata.Gene); + } + + public override int GetHashCode(IProteinRecord obj) + { + if (obj.Metadata.Gene == null) + return obj.Sequence.GetHashCode(); + return obj.Metadata.Gene.GetHashCode(); + } + } + + private Dictionary CalculateProteinOrGeneGroups(MappingResultsInternal results, bool geneLevel, ILongWaitBroker broker) { var _peptideGroupToProteins = new Dictionary>(); - foreach(var kvp in AssociatedProteins) + var proteinOrGeneToPeptideGroup = AssociatedProteins; + if (geneLevel) + { + // gene to protein to peptides; the top level dictionary uses the GeneLevelEqualityComparer + var geneToPeptides = new Dictionary>>(new GeneLevelEqualityComparer()); + foreach (var kvp in AssociatedProteins) + if (!geneToPeptides.ContainsKey(kvp.Key)) + geneToPeptides.Add(kvp.Key, new Dictionary> { { kvp.Key, kvp.Value.Peptides } }); + else + geneToPeptides[kvp.Key][kvp.Key] = kvp.Value.Peptides; + // now pick the protein with the longest sequence if it contains all the peptides, or a concatenation of all of the sequences if not + IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary> proteinToPeptides) + { + if (proteinToPeptides.Count == 1) + return proteinToPeptides.Keys.First(); + + var longestProtein = proteinToPeptides.OrderByDescending(kvp2 => kvp2.Key.Sequence.Sequence.Length).First().Key; + var allPeptides = proteinToPeptides.Values.SelectMany(o => o).Distinct().ToList(); + if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Peptide.Sequence))) + return longestProtein; + + // each protein's individual metadata is kept, but all protein sequences are replaced by the concatenated sequence + var concatenatedSequence = string.Join(string.Empty, proteinToPeptides.Keys.Select(p => p.Sequence.Sequence)); + return new FastaRecord(longestProtein.RecordIndex, 0, + new FastaSequenceGroup(longestProtein.Sequence.Name, + proteinToPeptides.Keys.Select(p => new FastaSequence(p.Sequence.Name, + p.Sequence.Description, p.Sequence.Alternatives, concatenatedSequence)).ToList()), + new ProteinGroupMetadata(proteinToPeptides.Keys.Select(p => p.Metadata).ToList())); + } + proteinOrGeneToPeptideGroup = geneToPeptides.ToDictionary(kvp => GenerateConcatenatedSequenceIfNecessary(kvp.Value), + kvp => new PeptideAssociationGroup(kvp.Value.Values.SelectMany(o => o).Distinct().ToList())); + } + + foreach(var kvp in proteinOrGeneToPeptideGroup) if (!_peptideGroupToProteins.ContainsKey(kvp.Value)) _peptideGroupToProteins.Add(kvp.Value, new List { kvp.Key }); else @@ -574,19 +668,22 @@ private Dictionary CalculateProteinGrou results.FinalPeptideCount = 0; results.FinalProteinCount = _peptideGroupToProteins.Count; - broker.Message = ProteomeResources.ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups; - var proteinGroupAssociations = new Dictionary(); + if (geneLevel) + broker.Message = Resources.ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups; + else + broker.Message = ProteomeResources.ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups; + var proteinGroupAssociations = new Dictionary(geneLevel ? new GeneLevelEqualityComparer() : EqualityComparer.Default); - _peptideToProteinGroups = new Dictionary>(); + var peptideToProteinOrGeneGroups = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].PeptideToProteinOrGeneGroup = new Dictionary>(); Action addPeptideAssociations = (protein, peptides) => { foreach (var peptide in peptides.Peptides) { - if (!_peptideToProteinGroups.ContainsKey(peptide)) - _peptideToProteinGroups.Add(peptide, new List {protein}); + if (!peptideToProteinOrGeneGroups.ContainsKey(peptide)) + peptideToProteinOrGeneGroups.Add(peptide, new List {protein}); else - _peptideToProteinGroups[peptide].Add(protein); + peptideToProteinOrGeneGroups[peptide].Add(protein); } }; @@ -607,10 +704,16 @@ private Dictionary CalculateProteinGrou continue; } + string ProteinOrGeneGroupName(IProteinRecord p) + { + if (geneLevel && !p.Metadata.Gene.IsNullOrEmpty()) return p.Metadata.Gene; + return p.Sequence.Name; + } + var proteinsByRecordIndex = kvp.Value.OrderBy(p => p.RecordIndex).ToList(); - var proteinGroupName = string.Join(ProteinGroupMetadata.GROUP_SEPARATOR, proteinsByRecordIndex.Select(p => p.Sequence.Name)); + var proteinGroupName = string.Join(ProteinGroupMetadata.GROUP_SEPARATOR, proteinsByRecordIndex.Select(ProteinOrGeneGroupName).Distinct()); var proteinFastaSequence = new FastaSequenceGroup(proteinGroupName, proteinsByRecordIndex.Select(r => r.Sequence).ToList()); - var proteinGroup = new FastaRecord(kvp.Value[0].RecordIndex, 0, proteinFastaSequence); + var proteinGroup = new FastaRecord(kvp.Value[0].RecordIndex, 0, proteinFastaSequence, kvp.Value[0].Metadata); proteinGroupAssociations[proteinGroup] = kvp.Key; addPeptideAssociations(proteinGroup, kvp.Key); } @@ -911,6 +1014,7 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) foreach (var keyValuePair in proteinAssociationsList) { var protein = keyValuePair.Key.Sequence; + var metadata = keyValuePair.Key.Metadata; var children = new List(); foreach (PeptideDocNode peptideDocNode in keyValuePair.Value.Peptides) { @@ -919,10 +1023,17 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) children.Add(peptideDocNode.ChangeFastaSequence(protein)); } - var proteinOrGroupMetadata = protein is FastaSequenceGroup - ? new ProteinGroupMetadata((protein as FastaSequenceGroup).FastaSequenceList.Select(s => new ProteinMetadata(s.Name, s.Description)).ToList()) - : new ProteinMetadata(protein.Name, protein.Description); + var proteinOrGroupMetadata = protein is FastaSequenceGroup group + ? new ProteinGroupMetadata(group.FastaSequenceList.Select(s => + { + var proteinMetadata = _proteinToMetadata[s.Name]; + return new ProteinMetadata(s.Name, s.Description, proteinMetadata.PreferredName, + proteinMetadata.Accession, proteinMetadata.Gene, proteinMetadata.Species); + }).ToList()) + : new ProteinMetadata(protein.Name, protein.Description, metadata.PreferredName, metadata.Accession, + metadata.Gene, metadata.Species); var peptideGroupDocNode = new PeptideGroupDocNode(protein, proteinOrGroupMetadata, children.ToArray()); + //peptideGroupDocNode = peptideGroupDocNode.ChangeName(protein.Name).ChangeDescription(protein.Description); newPeptideGroups.Add(peptideGroupDocNode); if (monitor.IsCanceled) @@ -950,7 +1061,8 @@ private static IEnumerable ParseFastaWithFilePositions(Stream strea { int index = 0; long streamLength = stream.Length; - foreach (var fastaData in FastaData.ParseFastaFile(new StreamReader(stream))) + var wefi = new WebEnabledFastaImporter(); + foreach (var fastaData in wefi.Import(new StreamReader(stream))) { yield return new FastaRecord(index, (int) (stream.Position * 100 / streamLength), fastaData); index++; @@ -961,6 +1073,7 @@ public interface IProteinRecord { int RecordIndex { get; } FastaSequence Sequence { get; } + ProteinMetadata Metadata { get; } int Progress { get; } } @@ -971,15 +1084,17 @@ public interface IProteinSource private class BackgroundProteomeRecord : IProteinRecord { - public BackgroundProteomeRecord(int index, FastaSequence sequence, int progress) + public BackgroundProteomeRecord(int index, FastaSequence sequence, ProteinMetadata metadata, int progress) { RecordIndex = index; Sequence = sequence; Progress = progress; + Metadata = metadata; } public int RecordIndex { get; } public FastaSequence Sequence { get; } + public ProteinMetadata Metadata { get; } public int Progress { get; } } @@ -1000,7 +1115,7 @@ public IEnumerable Proteins get { for (int i = 0; i < DbProteins.Count; ++i) - yield return new BackgroundProteomeRecord(i, Proteome.MakeFastaSequence(DbProteins[i]), i * 100 / DbProteins.Count); + yield return new BackgroundProteomeRecord(i, Proteome.MakeFastaSequence(DbProteins[i]), DbProteins[i].ProteinMetadata, i * 100 / DbProteins.Count); } } @@ -1017,16 +1132,36 @@ public IEnumerable Proteins /// private class FastaRecord : IProteinRecord { - public FastaRecord(int recordIndex, int progress, FastaSequence fastaSequence) + public FastaRecord(int recordIndex, int progress, FastaSequence fastaSequence, ProteinMetadata metadata) { RecordIndex = recordIndex; Progress = progress; Sequence = fastaSequence; + Metadata = metadata; + } + + public FastaRecord(int recordIndex, int progress, DbProtein protein) + { + RecordIndex = recordIndex; + Progress = progress; + + var firstName = protein.Names.First(); + var alternatives = protein.Names.Skip(1).Select(o => o.GetProteinMetadata()).ToList(); + Sequence = new FastaSequence(firstName.Name, firstName.Description, alternatives, protein.Sequence); + Metadata = firstName.GetProteinMetadata(); } public int RecordIndex { get; } public int Progress { get; } public FastaSequence Sequence { get; } + public ProteinMetadata Metadata { get; } + + public override string ToString() + { + if (Metadata.Gene.IsNullOrEmpty()) + return $@"{Sequence.Name}:{Helpers.TruncateString(Sequence.Sequence, 30)}"; + return $@"{Sequence.Name} ({Metadata.Gene}):{Helpers.TruncateString(Sequence.Sequence, 30)}"; + } } public class FastaSource : IProteinSource diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinGroupMetadata.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinGroupMetadata.cs index 1d49e20c35..6834cf12de 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinGroupMetadata.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinGroupMetadata.cs @@ -36,9 +36,9 @@ private ProteinGroupMetadata() : base(null, null) { } - private ProteinGroupMetadata(ProteinGroupMetadata other, WebSearchInfo webSearchInfo = null) : base(other.Name, other.Description) + private ProteinGroupMetadata(ProteinGroupMetadata other, WebSearchInfo webSearchInfo = null) : base(other.Name, + other.Description, other.PreferredName, other.Accession, other.Gene, other.Species, (webSearchInfo ?? other.WebSearchInfo).ToString()) { - webSearchInfo ??= other.ProteinMetadataList.First().WebSearchInfo; ProteinMetadataList = ImmutableList.ValueOf(other.ProteinMetadataList); } @@ -171,7 +171,7 @@ public override ProteinMetadata Merge(ProteinMetadata source) return this; if (source is ProteinGroupMetadata sourceGroup) { - return new ProteinGroupMetadata(sourceGroup.ProteinMetadataList); + return new ProteinGroupMetadata(sourceGroup, WebSearchInfo); } Assume.Fail(@"cannot merge ProteinMetadata into ProteinGroupMetadata"); @@ -190,13 +190,12 @@ public override ProteinMetadata Merge(string name, string description) public bool Equals(ProteinGroupMetadata other) { - if (other == null) - return false; - if (!string.Equals(Name, other.Name)) - return false; - if (!ArrayUtil.EqualsDeep(ProteinMetadataList, other.ProteinMetadataList)) - return false; - return true; + if (ReferenceEquals(null, other)) return false; + if (ReferenceEquals(this, other)) return true; + + bool equals = string.Equals(Name, other.Name) && + ArrayUtil.EqualsDeep(ProteinMetadataList, other.ProteinMetadataList); + return equals; } public override bool Equals(object obj) diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinMetadataManager.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinMetadataManager.cs index 2b896a0eb5..57f68294a8 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinMetadataManager.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinMetadataManager.cs @@ -164,17 +164,30 @@ private SrmDocument LookupProteinMetadata(SrmDocument docOrig, IProgressMonitor } else if (nodePepGroup.ProteinMetadata.NeedsSearch()) { - var proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.Name); - if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName)) - proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit - if ((proteinMetadata == null) && !String.IsNullOrEmpty(nodePepGroup.ProteinMetadata.Accession)) - proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.ProteinMetadata.Accession); // Parsed accession might hit - if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch()) + void CheckBackgroundProteome(FastaSequence seq, ProteinMetadata proteinMetadataOrGroup, int i) { - // Background proteome has already resolved this - _processedNodes.Add(nodePepGroup.Id.GlobalIndex, proteinMetadata); - nResolved++; + var currentProteinMetadata = proteinMetadataOrGroup.ProteinMetadataList[i]; + var proteinMetadata = proteomeDb.GetProteinMetadataByName(seq.Name); + if ((proteinMetadata == null) && !Equals(nodePepGroup.Name, nodePepGroup.OriginalName)) + proteinMetadata = proteomeDb.GetProteinMetadataByName(nodePepGroup.OriginalName); // Original name might hit + if ((proteinMetadata == null) && !String.IsNullOrEmpty(currentProteinMetadata.Accession)) + proteinMetadata = proteomeDb.GetProteinMetadataByName(currentProteinMetadata.Accession); // Parsed accession might hit + if ((proteinMetadata != null) && !proteinMetadata.NeedsSearch()) + { + // Background proteome has already resolved this + if (_processedNodes.TryGetValue(nodePepGroup.Id.GlobalIndex, out var processedProteinGroupMetadata)) + _processedNodes[nodePepGroup.Id.GlobalIndex] = processedProteinGroupMetadata.ChangeSingleProteinMetadata(proteinMetadata); + else + _processedNodes.Add(nodePepGroup.Id.GlobalIndex, nodePepGroup.ProteinMetadata.ChangeSingleProteinMetadata(proteinMetadata)); + } } + + for (var i = 0; i < nodePepGroup.ProteinMetadata.ProteinMetadataList.Count; i++) + { + var fastaSequenceOrGroup = nodePepGroup.PeptideGroup as FastaSequence; + CheckBackgroundProteome(fastaSequenceOrGroup?.FastaSequenceList[i], nodePepGroup.ProteinMetadata, i); + } + nResolved++; } if (!UpdatePrecentComplete(progressMonitor, 100 * nResolved / nUnresolved, ref progressStatus)) diff --git a/pwiz_tools/Skyline/Model/Serialization/DocumentFormat.cs b/pwiz_tools/Skyline/Model/Serialization/DocumentFormat.cs index 893cf5f2c4..ff1f7f6688 100644 --- a/pwiz_tools/Skyline/Model/Serialization/DocumentFormat.cs +++ b/pwiz_tools/Skyline/Model/Serialization/DocumentFormat.cs @@ -115,7 +115,8 @@ public struct DocumentFormat : IComparable public static readonly DocumentFormat VERSION_22_25 = new DocumentFormat(22.25); // spectrum class filters and peptide_integration minOccurs public static readonly DocumentFormat VERSION_23_1 = new DocumentFormat(23.1); // Release format public static readonly DocumentFormat VERSION_23_11 = new DocumentFormat(23.11); // Surrogate calibration curve - public static readonly DocumentFormat CURRENT = VERSION_23_11; + public static readonly DocumentFormat VERSION_23_12 = new DocumentFormat(23.12); // gene level parsimony + public static readonly DocumentFormat CURRENT = VERSION_23_12; private readonly double _versionNumber; public DocumentFormat(double versionNumber) diff --git a/pwiz_tools/Skyline/Properties/Resources.Designer.cs b/pwiz_tools/Skyline/Properties/Resources.Designer.cs index a3ec6aafc6..5d811574b0 100644 --- a/pwiz_tools/Skyline/Properties/Resources.Designer.cs +++ b/pwiz_tools/Skyline/Properties/Resources.Designer.cs @@ -633,6 +633,116 @@ public static string AssociateProteinsDlg_ApplyChanges_Associated_proteins { } } + /// + /// Looks up a localized string similar to Shared Peptides. + /// + public static string AssociateProteinsDlg_CellValueNeeded_Shared_Peptides { + get { + return ResourceManager.GetString("AssociateProteinsDlg_CellValueNeeded_Shared_Peptides", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Find &minimal gene group list that explains all peptides. + /// + public static string AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Find &minimal protein group list that explains all peptides. + /// + public static string AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Finding peptides in FASTA file. + /// + public static string AssociateProteinsDlg_FindProteinMatchesWithFasta_Finding_peptides_in_FASTA_file { + get { + return ResourceManager.GetString("AssociateProteinsDlg_FindProteinMatchesWithFasta_Finding_peptides_in_FASTA_file", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to No matches were found using the imported fasta file.. + /// + public static string AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches_were_found_using_the_imported_fasta_file_ { + get { + return ResourceManager.GetString("AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches_were_found_using_the_" + + "imported_fasta_file_", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Mi&n peptides per gene. + /// + public static string AssociateProteinsDlg_Min_peptides_per_gene { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Min_peptides_per_gene", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Mi&n peptides per protein group. + /// + public static string AssociateProteinsDlg_Min_peptides_per_protein_group { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Min_peptides_per_protein_group", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Existing protein associations will be discarded.. + /// + public static string AssociateProteinsDlg_OnShown_Existing_protein_associations_will_be_discarded_ { + get { + return ResourceManager.GetString("AssociateProteinsDlg_OnShown_Existing_protein_associations_will_be_discarded_", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Organize all document peptides into associated proteins or protein groups.. + /// + public static string AssociateProteinsDlg_OnShown_Organize_all_document_peptides_into_associated_proteins_or_protein_groups { + get { + return ResourceManager.GetString("AssociateProteinsDlg_OnShown_Organize_all_document_peptides_into_associated_prote" + + "ins_or_protein_groups", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to &Remove subset genes. + /// + public static string AssociateProteinsDlg_Remove_subset_genes { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Remove_subset_genes", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to &Remove subset protein groups. + /// + public static string AssociateProteinsDlg_Remove_subset_protein_groups { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Remove_subset_protein_groups", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Applying parsimony options. + /// + public static string AssociateProteinsDlg_UpdateParsimonyResults_Applying_parsimony_options { + get { + return ResourceManager.GetString("AssociateProteinsDlg_UpdateParsimonyResults_Applying_parsimony_options", resourceCulture); + } + } + /// /// Looks up a localized string similar to No background proteome defined, see the Digestion tab in Peptide Settings for more information.. /// @@ -7754,6 +7864,91 @@ public static System.Drawing.Bitmap Protein { } } + /// + /// Looks up a localized string similar to Are you sure you want to delete the molecule list '{0}'?. + /// + public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_molecule_list___0___ { + get { + return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_molecule_list__" + + "_0___", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Are you sure you want to delete the protein '{0}'?. + /// + public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_protein___0___ { + get { + return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_protein___0___", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Are you sure you want to delete these {0} molecule lists?. + /// + public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__molecule_lists_ { + get { + return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__molecule_" + + "lists_", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Are you sure you want to delete these {0} proteins?. + /// + public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__proteins_ { + get { + return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__proteins_" + + "", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Calculating protein groups. + /// + public static string ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups { + get { + return ResourceManager.GetString("ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Group of {0} proteins. + /// + public static string ProteinAssociation_CalculateProteinGroups_Group_of__0__proteins { + get { + return ResourceManager.GetString("ProteinAssociation_CalculateProteinGroups_Group_of__0__proteins", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Calculating gene groups. + /// + public static string ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups { + get { + return ResourceManager.GetString("ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Calculating protein clusters. + /// + public static string ProteinAssociation_Calculating_protein_clusters { + get { + return ResourceManager.GetString("ProteinAssociation_Calculating_protein_clusters", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Creating protein targets and assigning their peptides. + /// + public static string ProteinAssociation_CreateDocTree_Creating_protein_targets_and_assigning_their_peptides { + get { + return ResourceManager.GetString("ProteinAssociation_CreateDocTree_Creating_protein_targets_and_assigning_their_pep" + + "tides", resourceCulture); + } + } + /// /// Looks up a localized string similar to Unmapped Peptides. /// diff --git a/pwiz_tools/Skyline/Properties/Resources.resx b/pwiz_tools/Skyline/Properties/Resources.resx index e7b3917e86..1d20a8d5f1 100644 --- a/pwiz_tools/Skyline/Properties/Resources.resx +++ b/pwiz_tools/Skyline/Properties/Resources.resx @@ -4357,4 +4357,16 @@ If you choose Disable, you can enable Auto-select later with the "Refine > Ad Error: Could not find background proteome file {0} + + Calculating gene groups + + + Find &minimal gene group list that explains all peptides + + + &Remove subset genes + + + Mi&n peptides per gene + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Test/SrmSettingsTest.cs b/pwiz_tools/Skyline/Test/SrmSettingsTest.cs index ba5a8077b5..8602632674 100644 --- a/pwiz_tools/Skyline/Test/SrmSettingsTest.cs +++ b/pwiz_tools/Skyline/Test/SrmSettingsTest.cs @@ -1150,18 +1150,20 @@ public void SerializeProteinAssociationSettingsTest() " \n" + " \n" + " \n" + " \n" + ""; - AssertEx.DeserializeNoError(proteinAssociationSerialized, DocumentFormat.PROTEIN_GROUPS); + AssertEx.DeserializeNoError(proteinAssociationSerialized, DocumentFormat.CURRENT); var doc = AssertEx.Deserialize(proteinAssociationSerialized); var parsimonySettings = doc.Settings.PeptideSettings.ProteinAssociationSettings; Assert.AreEqual(0, parsimonySettings.MinPeptidesPerProtein); Assert.AreEqual(true, parsimonySettings.GroupProteins); + Assert.AreEqual(true, parsimonySettings.GeneLevelParsimony); Assert.AreEqual(true, parsimonySettings.FindMinimalProteinList); Assert.AreEqual(true, parsimonySettings.RemoveSubsetProteins); Assert.AreEqual(ProteinAssociation.SharedPeptides.AssignedToBestProtein, parsimonySettings.SharedPeptides); diff --git a/pwiz_tools/Skyline/TestFunctional/AssociateProteinsDlgTest.cs b/pwiz_tools/Skyline/TestFunctional/AssociateProteinsDlgTest.cs index c5634e731e..127a70780f 100644 --- a/pwiz_tools/Skyline/TestFunctional/AssociateProteinsDlgTest.cs +++ b/pwiz_tools/Skyline/TestFunctional/AssociateProteinsDlgTest.cs @@ -33,6 +33,7 @@ using pwiz.Skyline.Model.Irt; using pwiz.Skyline.Model.Proteome; using pwiz.Skyline.Properties; +using pwiz.Skyline.Util; using pwiz.SkylineTestUtil; namespace pwiz.SkylineTestFunctional @@ -57,10 +58,14 @@ protected override void DoTest() TestUseFasta(); TestInvalidFasta(); TestUseBackgroundProteome(); - TestParsimonyOptions(); TestFastaOverride(); + TestParsimonyOptions(); + + Assert.IsFalse(IsRecordMode); } + public bool IsRecordMode => false; + private void TestInvalidFasta() { RunUI(() => SkylineWindow.OpenFile(TestFilesDir.GetTestPath("AssociateProteinsTest.sky"))); @@ -71,11 +76,10 @@ private void TestInvalidFasta() // ReSharper disable LocalizableElement File.WriteAllLines(invalidFastaFilepath, new[] { - ">FOOBAR\tThe first header line\x01", - "The second header line that I've never seen in a protein FASTA", + ">FOOBAR\tThe first header line", "ELVISLIVES", - ">BAZ|Another header. Where did it g\x02 wrong?", - "PEPTIDEK" + ">BAZ|Another header. Where did it go wrong?", + "PEPTID\u0002EK" }); // ReSharper restore LocalizableElement var errorDlg = ShowDialog(() => associateProteinsDlg.FastaFileName = invalidFastaFilepath); @@ -192,6 +196,7 @@ private void OkAssociateProteinsDialog(AssociateProteinsDlg dlg) private class ParsimonyTestCase { public string[] Proteins; + public string[] ProteinDescriptions; public string[] Peptides; public int ExpectedPeptidesMapped, ExpectedPeptidesUnmapped; public int ExpectedProteinsMapped, ExpectedProteinsUnmapped; @@ -200,6 +205,7 @@ private class ParsimonyTestCase public class OptionsAndResult { public bool GroupProteins; + public bool GeneLevelParsimony; public bool FindMinimalProteinList; public ProteinAssociation.SharedPeptides SharedPeptides; public int MinPeptidesPerProtein = 1; @@ -218,6 +224,185 @@ public bool RemoveSubsetProteins private static ParsimonyTestCase[] _parsimonyTestCases = new[] { + new ParsimonyTestCase + { + Proteins = new[] + { + // two homolog proteins from different genes + "SAMESAME", + "SAMESAME", + + // two partial homolog proteins from different genes, each with a distinct peptide + "AAAACCCC", + "AAADCCCD", + + // two partial homolog proteins from different genes, no distinct peptide to distinguish them + "DDDDEEEE", + "DDDFEEEF", + + // isoforms of the same gene, each with a distinct peptide + "FFFFGGGGHHHH", + "FFFFGGHHHH", + + // isoforms of the same gene, no distinct peptide to distinguish them + "IIIIKKKK", + "IIIGKKKG", + + // isoforms of the same gene, some with distinct peptides and some without + "LLLLNNNN", + "LLLGNNNG", + "LLGGNNNG", + }, + ProteinDescriptions = new [] + { + "GN=Same", + "GN=Same", + + "GN=PartialHomologsDistinctA", + "GN=PartialHomologsDistinctB", + + "GN=PartialHomologsNonDistinctA", + "GN=PartialHomologsNonDistinctB", + + "GN=IsoformsDistinct", + "GN=IsoformsDistinct", + + "GN=IsoformsNonDistinct", + "GN=IsoformsNonDistinct", + + "accession:ISD1 GN=IsoformsSomeDistinct", + "accession:ISD2 GN=IsoformsSomeDistinct", + "accession:ISD3_NonDistinct GN=IsoformsSomeDistinct", + }, + Peptides = new[] + { + "SAM", "SAMES", "AMES", // M will be generated with and without +16 + + "AAA", "CCC", + "AACC", "AAACCC", + "AAD", "CCCD", + + "DD", "EE", "DDD", "EEE", + + "FFFFGG", "FFGG", "GGHHHH", + "FGGGGH", "GGGGH", + "FGGH", "FGGHHHH", + + "II", "KK", "III", "KKK", + + "NNN", // all 3 + "LLLL", // only ISD1 + "LLLG", // only ISD2 + "GNNNG", // ISD2 and ISD3 + }, + ExpectedPeptidesMapped = 28, + ExpectedPeptidesUnmapped = 0, + ExpectedProteinsMapped = 13, + ExpectedProteinsUnmapped = 0, + OptionsAndResults = new [] + { + new ParsimonyTestCase.OptionsAndResult + { + GeneLevelParsimony = true, + SharedPeptides = ProteinAssociation.SharedPeptides.Removed, + ExpectedFinalPeptides = 29, + ExpectedFinalProteins = 7, + ExpectedMappedSharedPeptides = 4, + ExpectedFinalSharedPeptides = 0, + }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + SharedPeptides = ProteinAssociation.SharedPeptides.Removed, + ExpectedFinalPeptides = 24, + ExpectedFinalProteins = 9, + ExpectedMappedSharedPeptides = 15, + ExpectedFinalSharedPeptides = 0, + }, + new ParsimonyTestCase.OptionsAndResult + { + GeneLevelParsimony = true, + SharedPeptides = ProteinAssociation.SharedPeptides.AssignedToBestProtein, + ExpectedFinalPeptides = 33, + ExpectedFinalProteins = 7, + ExpectedMappedSharedPeptides = 4, + ExpectedFinalSharedPeptides = 4, + }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + SharedPeptides = ProteinAssociation.SharedPeptides.AssignedToBestProtein, + ExpectedFinalPeptides = 36, + ExpectedFinalProteins = 9, + ExpectedMappedSharedPeptides = 15, + ExpectedFinalSharedPeptides = 10, + }, + new ParsimonyTestCase.OptionsAndResult + { + GeneLevelParsimony = true, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 33, + ExpectedFinalProteins = 7, + ExpectedMappedSharedPeptides = 4, + ExpectedFinalSharedPeptides = 4, + }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 39, + ExpectedFinalProteins = 10, + ExpectedMappedSharedPeptides = 15, + ExpectedFinalSharedPeptides = 15, + }, + } + }, + new ParsimonyTestCase + { + Proteins = new[] + { + // histone isoforms + "KSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLMREIAQDFKTDLRFQSSAVMALQEACE", + "KSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEACE", + "KSAPATGGVKKPHRYRPGTVALREIRRYQKSTELLIRKLPFQRLVREIAQDFKTDLRFQSSAVMALQEASE" + }, + ProteinDescriptions = new [] + { + "accession:Q16695 GN=HIST3H3", + "accession:P68431 GN=HIST1H3A", + "accession:Q71DI3 GN=HIST2H3A", + }, + Peptides = new[] + { + "SAPATGGVK", + "YRPGTVALR", + }, + ExpectedPeptidesMapped = 2, + ExpectedPeptidesUnmapped = 0, + ExpectedProteinsMapped = 3, + ExpectedProteinsUnmapped = 0, + OptionsAndResults = new [] + { + new ParsimonyTestCase.OptionsAndResult + { + GeneLevelParsimony = true, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 2, + ExpectedFinalProteins = 1, + ExpectedMappedSharedPeptides = 0, + ExpectedFinalSharedPeptides = 0, + }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 2, + ExpectedFinalProteins = 1, + ExpectedMappedSharedPeptides = 0, + ExpectedFinalSharedPeptides = 0, + }, + } + }, new ParsimonyTestCase { Proteins = new[] {"AKAAK", "AKAAKAAAK", "ARAAR", "ARAARAAAR", "ELVISWASHERE" }, @@ -279,6 +464,7 @@ public bool RemoveSubsetProteins new ParsimonyTestCase { Proteins = new[] {"AKAAK", "AKAAKAAAK", "ARAAR", "ARAARAAAR", "ARVK", "ARVKR", "FFFFGGGGHH", "FFGG", "FFGGHH", "FFFGGGHHH", "FGGHHII", "HHHIIII", "ELVISWASHERE" }, + ProteinDescriptions = new [] {"GN=AAK", "GN=AAK", "GN=Pirate", "GN=Pirate", "GN=Aardvark", "GN=Aardvark", "GN=FGH", "GN=FG", "GN=FGH", "GN=FGH", "GN=FGHI", "GN=HI", "RockandrollNotAGene"}, Peptides = new[] {"AK", "AAK", "AAAK", "AAR", "AR", "VK", "FFFF", "FG", "GH", "HI", "HII", "HIII", "HIIII", "IIII", "PEPTIDE" }, ExpectedPeptidesMapped = 14, ExpectedPeptidesUnmapped = 1, @@ -298,6 +484,18 @@ public bool RemoveSubsetProteins ExpectedFinalSharedPeptides = 0, }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + GeneLevelParsimony = true, + FindMinimalProteinList = false, + SharedPeptides = ProteinAssociation.SharedPeptides.AssignedToFirstProtein, + ExpectedFinalPeptides = 14, + ExpectedFinalProteins = 6, + ExpectedMappedSharedPeptides = 11, + ExpectedFinalSharedPeptides = 0, + }, + new ParsimonyTestCase.OptionsAndResult { GroupProteins = true, @@ -309,6 +507,18 @@ public bool RemoveSubsetProteins ExpectedFinalSharedPeptides = 2, }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + GeneLevelParsimony = true, + FindMinimalProteinList = false, + SharedPeptides = ProteinAssociation.SharedPeptides.AssignedToBestProtein, + ExpectedFinalPeptides = 15, + ExpectedFinalProteins = 6, + ExpectedMappedSharedPeptides = 11, + ExpectedFinalSharedPeptides = 2, + }, + new ParsimonyTestCase.OptionsAndResult { GroupProteins = true, @@ -320,6 +530,18 @@ public bool RemoveSubsetProteins ExpectedFinalSharedPeptides = 0, }, + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + GeneLevelParsimony = true, + FindMinimalProteinList = false, + SharedPeptides = ProteinAssociation.SharedPeptides.Removed, + ExpectedFinalPeptides = 9, + ExpectedFinalProteins = 5, + ExpectedMappedSharedPeptides = 11, + ExpectedFinalSharedPeptides = 0, + }, + new ParsimonyTestCase.OptionsAndResult { GroupProteins = true, @@ -331,8 +553,20 @@ public bool RemoveSubsetProteins ExpectedMappedSharedPeptides = 17, ExpectedFinalSharedPeptides = 10, }, - - // DuplicatedBetweenProteins should be last option so that all peptides are kept in order to test a second round of protein association + + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + GeneLevelParsimony = true, + FindMinimalProteinList = false, + RemoveSubsetProteins = true, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 19, + ExpectedFinalProteins = 6, + ExpectedMappedSharedPeptides = 11, + ExpectedFinalSharedPeptides = 10, + }, + new ParsimonyTestCase.OptionsAndResult { GroupProteins = true, @@ -343,6 +577,19 @@ public bool RemoveSubsetProteins ExpectedMappedSharedPeptides = 17, ExpectedFinalSharedPeptides = 17, }, + + // DuplicatedBetweenProteins should be last option so that all peptides are kept in order to test a second round of protein association + new ParsimonyTestCase.OptionsAndResult + { + GroupProteins = true, + GeneLevelParsimony = true, + FindMinimalProteinList = false, + SharedPeptides = ProteinAssociation.SharedPeptides.DuplicatedBetweenProteins, + ExpectedFinalPeptides = 20, + ExpectedFinalProteins = 7, + ExpectedMappedSharedPeptides = 11, + ExpectedFinalSharedPeptides = 11, + }, } }, @@ -448,8 +695,17 @@ private void TestParsimonyOptions() string fastaFilePath = TestFilesDir.GetTestPath("testProteins.fasta"); using (var fastaFile = new StreamWriter(fastaFilePath)) { - for (int j = 0; j < testCase.Proteins.Length; ++j) - fastaFile.WriteLine($">Protein{j + 1}{Environment.NewLine}{testCase.Proteins[j]}"); + Assume.IsTrue(testCase.ProteinDescriptions.IsNullOrEmpty() || testCase.Proteins.Length == testCase.ProteinDescriptions.Length); + if (testCase.ProteinDescriptions != null && testCase.ProteinDescriptions.Length > 0) + { + for (int j = 0; j < testCase.Proteins.Length; ++j) + fastaFile.WriteLine($">Protein{j + 1} {testCase.ProteinDescriptions[j]}{Environment.NewLine}{testCase.Proteins[j]}"); + } + else + { + for (int j = 0; j < testCase.Proteins.Length; ++j) + fastaFile.WriteLine($">Protein{j + 1}{Environment.NewLine}{testCase.Proteins[j]}"); + } } modificationMatcher.CreateMatches(srmSettings, testCase.Peptides, Settings.Default.StaticModList, Settings.Default.HeavyModList); @@ -492,29 +748,63 @@ private void TestParsimonyOptions() RunUI(() => { dlg.GroupProteins = optionsAndResult.GroupProteins; + dlg.GeneLevelParsimony = optionsAndResult.GeneLevelParsimony; dlg.FindMinimalProteinList = optionsAndResult.FindMinimalProteinList; dlg.RemoveSubsetProteins = optionsAndResult.RemoveSubsetProteins; dlg.SelectedSharedPeptides = optionsAndResult.SharedPeptides; dlg.MinPeptidesPerProtein = optionsAndResult.MinPeptidesPerProtein; }); - //PauseTest(); + RunUI(() => { - Assert.AreEqual(optionsAndResult.ExpectedFinalProteins, dlg.FinalResults.FinalProteinCount, $"Test case {i + 1}.{j + 1} FinalProteinCount"); - Assert.AreEqual(optionsAndResult.ExpectedFinalPeptides, dlg.FinalResults.FinalPeptideCount, $"Test case {i + 1}.{j + 1} FinalPeptideCount"); - Assert.AreEqual(optionsAndResult.ExpectedMappedSharedPeptides, dlg.FinalResults.TotalSharedPeptideCount, $"Test case {i + 1}.{j + 1} TotalSharedPeptideCount"); - Assert.AreEqual(optionsAndResult.ExpectedFinalSharedPeptides, dlg.FinalResults.FinalSharedPeptideCount, $"Test case {i + 1}.{j + 1} FinalSharedPeptideCount"); - - Assert.AreEqual(testCase.ExpectedPeptidesMapped, dlg.FinalResults.PeptidesMapped, $"Test case {i + 1}.{j + 1} PeptidesMapped"); - Assert.AreEqual(testCase.ExpectedPeptidesUnmapped, dlg.FinalResults.PeptidesUnmapped, $"Test case {i + 1}.{j + 1} PeptidesUnmapped"); - Assert.AreEqual(testCase.ExpectedProteinsMapped, dlg.FinalResults.ProteinsMapped, $"Test case {i + 1}.{j + 1} ProteinsMapped"); - Assert.AreEqual(testCase.ExpectedProteinsUnmapped, dlg.FinalResults.ProteinsUnmapped, $"Test case {i + 1}.{j + 1} ProteinsUnmapped"); + if (IsRecordMode) + { + Console.WriteLine(); + Console.WriteLine(string.Join(", ", testCase.Proteins)); + Console.WriteLine(string.Join(", ", testCase.ProteinDescriptions ?? Array.Empty())); + Console.WriteLine(string.Join(", ", testCase.Peptides)); + Console.WriteLine(); + + Settings.Default.ShowPeptidesDisplayMode = optionsAndResult.GeneLevelParsimony ? "ByGene" : "ByName"; + + // print mapping of peptide group nodes to peptide nodes + foreach (var pg in dlg.DocumentFinal.PeptideGroups) + { + var webSearchCompleted = pg.ProteinMetadata.SetWebSearchCompleted(); + var peptideGroupDocNode = pg.ChangeProteinMetadata(webSearchCompleted); + var text = ProteinMetadataManager.ProteinModalDisplayText(peptideGroupDocNode); + Console.WriteLine($@"{text} -> {string.Join(", ", pg.Peptides.Select(p => p.SourceTextId))}"); + } + Console.WriteLine(); + if (optionsAndResult.GroupProteins) Console.WriteLine(@"GroupProteins = true,"); + if (optionsAndResult.GeneLevelParsimony) Console.WriteLine(@"GeneLevelParsimony = true,"); + if (optionsAndResult.FindMinimalProteinList) Console.WriteLine(@"FindMinimalProteinList = true,"); + if (optionsAndResult.RemoveSubsetProteins) Console.WriteLine(@"RemoveSubsetProteins = true,"); + Console.WriteLine($@"SharedPeptides = ProteinAssociation.SharedPeptides.{Enum.GetName(typeof(ProteinAssociation.SharedPeptides), optionsAndResult.SharedPeptides)},"); + Console.WriteLine($@"ExpectedFinalPeptides = {dlg.FinalResults.FinalPeptideCount},"); + Console.WriteLine($@"ExpectedFinalProteins = {dlg.FinalResults.FinalProteinCount},"); + Console.WriteLine($@"ExpectedMappedSharedPeptides = {dlg.FinalResults.TotalSharedPeptideCount},"); + Console.WriteLine($@"ExpectedFinalSharedPeptides = {dlg.FinalResults.FinalSharedPeptideCount},"); + } + else + { + Assert.AreEqual(optionsAndResult.ExpectedFinalProteins, dlg.FinalResults.FinalProteinCount, $"Test case {i + 1}.{j + 1} FinalProteinCount"); + Assert.AreEqual(optionsAndResult.ExpectedFinalPeptides, dlg.FinalResults.FinalPeptideCount, $"Test case {i + 1}.{j + 1} FinalPeptideCount"); + Assert.AreEqual(optionsAndResult.ExpectedMappedSharedPeptides, dlg.FinalResults.TotalSharedPeptideCount, $"Test case {i + 1}.{j + 1} TotalSharedPeptideCount"); + Assert.AreEqual(optionsAndResult.ExpectedFinalSharedPeptides, dlg.FinalResults.FinalSharedPeptideCount, $"Test case {i + 1}.{j + 1} FinalSharedPeptideCount"); + + Assert.AreEqual(testCase.ExpectedPeptidesMapped, dlg.FinalResults.PeptidesMapped, $"Test case {i + 1}.{j + 1} PeptidesMapped"); + Assert.AreEqual(testCase.ExpectedPeptidesUnmapped, dlg.FinalResults.PeptidesUnmapped, $"Test case {i + 1}.{j + 1} PeptidesUnmapped"); + Assert.AreEqual(testCase.ExpectedProteinsMapped, dlg.FinalResults.ProteinsMapped, $"Test case {i + 1}.{j + 1} ProteinsMapped"); + Assert.AreEqual(testCase.ExpectedProteinsUnmapped, dlg.FinalResults.ProteinsUnmapped, $"Test case {i + 1}.{j + 1} ProteinsUnmapped"); + } }); } OkAssociateProteinsDialog(dlg); } // test all cases again after an association has already been applied (all peptides should have been kept so the results should be the same) + if (!IsRecordMode) { var dlg = ShowDialog(SkylineWindow.ShowAssociateProteinsDlg); RunUI(() => { dlg.FastaFileName = fastaFilePath; }); @@ -525,6 +815,7 @@ private void TestParsimonyOptions() RunUI(() => { dlg.GroupProteins = optionsAndResult.GroupProteins; + dlg.GeneLevelParsimony = optionsAndResult.GeneLevelParsimony; dlg.FindMinimalProteinList = optionsAndResult.FindMinimalProteinList; dlg.RemoveSubsetProteins = optionsAndResult.RemoveSubsetProteins; dlg.SelectedSharedPeptides = optionsAndResult.SharedPeptides; @@ -553,35 +844,37 @@ private void TestParsimonyOptions() } int extraUnmappedPeptides = testCase.ExpectedPeptidesUnmapped * 2; - var findNodeDlg = ShowDialog(SkylineWindow.ShowFindNodeDlg); int expectedItems = testCase.OptionsAndResults.Last().ExpectedFinalSharedPeptides + extraUnmappedPeptides; - RunUI(() => + if (expectedItems > 0) { - findNodeDlg.AdvancedVisible = true; - findNodeDlg.FindOptions = new FindOptions().ChangeCustomFinders(Finders.ListAllFinders().Where(f => f is DuplicatedPeptideFinder)); - var duplicatePeptideNodes = new List(); - for (int k = 0; k < expectedItems; ++k) + var findNodeDlg = ShowDialog(SkylineWindow.ShowFindNodeDlg); + RunUI(() => { - findNodeDlg.FindNext(); - duplicatePeptideNodes.Add(SkylineWindow.SelectedNode); - } - Assert.AreEqual(expectedItems, duplicatePeptideNodes.Count); - - findNodeDlg.FindAll(); - }); - OkDialog(findNodeDlg, findNodeDlg.Close); + findNodeDlg.AdvancedVisible = true; + findNodeDlg.FindOptions = new FindOptions().ChangeCustomFinders(Finders.ListAllFinders().Where(f => f is DuplicatedPeptideFinder)); + var duplicatePeptideNodes = new List(); + for (int k = 0; k < expectedItems; ++k) + { + findNodeDlg.FindNext(); + duplicatePeptideNodes.Add(SkylineWindow.SelectedNode); + } + Assert.AreEqual(expectedItems, duplicatePeptideNodes.Count); + + findNodeDlg.FindAll(); + }); + OkDialog(findNodeDlg, findNodeDlg.Close); - var findView = WaitForOpenForm(); - try - { - WaitForConditionUI(1000, () => findView.ItemCount == expectedItems); - } - catch (AssertFailedException) - { - RunUI(() => Assert.AreEqual(expectedItems, findView.ItemCount)); + var findView = WaitForOpenForm(); + try + { + WaitForConditionUI(1000, () => findView.ItemCount == expectedItems); + } + catch (AssertFailedException) + { + RunUI(() => Assert.AreEqual(expectedItems, findView.ItemCount)); + } + OkDialog(findView, findView.Close); } - OkDialog(findView, findView.Close); - RunUI(() => { diff --git a/pwiz_tools/Skyline/TestPerf/PerfAssociateProteinsHugeTest.cs b/pwiz_tools/Skyline/TestPerf/PerfAssociateProteinsHugeTest.cs index a0196d9379..922fa01759 100644 --- a/pwiz_tools/Skyline/TestPerf/PerfAssociateProteinsHugeTest.cs +++ b/pwiz_tools/Skyline/TestPerf/PerfAssociateProteinsHugeTest.cs @@ -104,7 +104,10 @@ private void TestDialog(ImportType type) Assert.AreEqual(0, proteinsDlg.FinalResults.PeptidesUnmapped); Assert.AreEqual(84198, proteinsDlg.FinalResults.ProteinsMapped); Assert.AreEqual(4281, proteinsDlg.FinalResults.ProteinsUnmapped); - + }); + + RunUI(() => + { proteinsDlg.GroupProteins = true; proteinsDlg.FindMinimalProteinList = false; proteinsDlg.SelectedSharedPeptides = ProteinAssociation.SharedPeptides.AssignedToBestProtein; @@ -124,12 +127,17 @@ private void TestDialog(ImportType type) Assert.AreEqual(167189, proteinsDlg.FinalResults.FinalPeptideCount); Assert.AreEqual(41606, proteinsDlg.FinalResults.FinalProteinCount); - proteinsDlg.MinPeptidesPerProtein = 10; + proteinsDlg.GeneLevelParsimony = true; + //Console.WriteLine($"GeneLevelParsimony {proteinsDlg.FinalResults.FinalProteinCount} {proteinsDlg.FinalResults.FinalPeptideCount}"); + Assert.AreEqual(408175, proteinsDlg.FinalResults.FinalPeptideCount); + Assert.AreEqual(20649, proteinsDlg.FinalResults.FinalProteinCount); + + //proteinsDlg.MinPeptidesPerProtein = 10; //Console.WriteLine($"MinPeptidesPerProtein 10 {proteinsDlg.FinalResults.FinalProteinCount} {proteinsDlg.FinalResults.FinalPeptideCount}"); - Assert.AreEqual(88242, proteinsDlg.FinalResults.FinalPeptideCount); - Assert.AreEqual(3987, proteinsDlg.FinalResults.FinalProteinCount); + //Assert.AreEqual(88242, proteinsDlg.FinalResults.FinalPeptideCount); + //Assert.AreEqual(3987, proteinsDlg.FinalResults.FinalProteinCount); }); - // PauseTest(); + using (new WaitDocumentChange(null, true)) { OkDialog(proteinsDlg, proteinsDlg.OkDialog); @@ -147,7 +155,7 @@ private void TestDialog(ImportType type) else nonProteins.Add(docNode); } - Assert.AreEqual(3987, proteins.Count); + Assert.AreEqual(20649, proteins.Count); Assert.AreEqual(0, nonProteins.Count); // Unmapped Peptides }); } diff --git a/pwiz_tools/Skyline/TestUtil/Schemas/Skyline_Current.xsd b/pwiz_tools/Skyline/TestUtil/Schemas/Skyline_Current.xsd index 9e79ec65bf..3a4f0e6481 100644 --- a/pwiz_tools/Skyline/TestUtil/Schemas/Skyline_Current.xsd +++ b/pwiz_tools/Skyline/TestUtil/Schemas/Skyline_Current.xsd @@ -300,6 +300,7 @@ + From e72503268045f62ba47b0a2063603f7b4ca0b7f8 Mon Sep 17 00:00:00 2001 From: Matt Chambers Date: Wed, 3 Jan 2024 13:01:59 -0500 Subject: [PATCH 2/5] * resolved resx split conflicts --- .../Skyline/EditUI/AssociateProteinsDlg.cs | 12 +- .../EditUI/EditUIResources.designer.cs | 27 +++ .../Skyline/EditUI/EditUIResources.resx | 9 + .../Model/Proteome/ProteinAssociation.cs | 2 +- .../Proteome/ProteomeResources.designer.cs | 9 + .../Model/Proteome/ProteomeResources.resx | 3 + .../Skyline/Properties/Resources.Designer.cs | 195 ------------------ pwiz_tools/Skyline/Properties/Resources.resx | 12 -- 8 files changed, 55 insertions(+), 214 deletions(-) diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs index ef01459c1e..6cf854520f 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs @@ -341,16 +341,16 @@ private void cbGeneLevel_CheckedChanged(object sender, EventArgs e) { cbGroupProteins.Checked = true; cbGroupProteins.Enabled = false; - lblMinimalProteinList.Text = Resources.AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides; - lblRemoveSubsetProteins.Text = Resources.AssociateProteinsDlg_Remove_subset_genes; - lblMinPeptides.Text = Resources.AssociateProteinsDlg_Min_peptides_per_gene; + lblMinimalProteinList.Text = EditUIResources.AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides; + lblRemoveSubsetProteins.Text = EditUIResources.AssociateProteinsDlg_Remove_subset_genes; + lblMinPeptides.Text = EditUIResources.AssociateProteinsDlg_Min_peptides_per_gene; } else { cbGroupProteins.Enabled = true; - lblMinimalProteinList.Text = Resources.AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides; - lblRemoveSubsetProteins.Text = Resources.AssociateProteinsDlg_Remove_subset_protein_groups; - lblMinPeptides.Text = Resources.AssociateProteinsDlg_Min_peptides_per_protein_group; + lblMinimalProteinList.Text = EditUIResources.AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides; + lblRemoveSubsetProteins.Text = EditUIResources.AssociateProteinsDlg_Remove_subset_protein_groups; + lblMinPeptides.Text = EditUIResources.AssociateProteinsDlg_Min_peptides_per_protein_group; } UpdateParsimonyResults(); diff --git a/pwiz_tools/Skyline/EditUI/EditUIResources.designer.cs b/pwiz_tools/Skyline/EditUI/EditUIResources.designer.cs index c5fb4854fd..f2b47bd113 100644 --- a/pwiz_tools/Skyline/EditUI/EditUIResources.designer.cs +++ b/pwiz_tools/Skyline/EditUI/EditUIResources.designer.cs @@ -133,6 +133,15 @@ public static string AssociateProteinsDlg_CellValueNeeded_Shared_Peptides { } } + /// + /// Looks up a localized string similar to Find &minimal gene group list that explains all peptides. + /// + public static string AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides", resourceCulture); + } + } + /// /// Looks up a localized string similar to Find &minimal protein group list that explains all peptides. /// @@ -152,6 +161,15 @@ public static string AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches } } + /// + /// Looks up a localized string similar to Mi&n peptides per gene. + /// + public static string AssociateProteinsDlg_Min_peptides_per_gene { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Min_peptides_per_gene", resourceCulture); + } + } + /// /// Looks up a localized string similar to Mi&n peptides per protein group. /// @@ -180,6 +198,15 @@ public static string AssociateProteinsDlg_OnShown_Organize_all_document_peptides } } + /// + /// Looks up a localized string similar to &Remove subset genes. + /// + public static string AssociateProteinsDlg_Remove_subset_genes { + get { + return ResourceManager.GetString("AssociateProteinsDlg_Remove_subset_genes", resourceCulture); + } + } + /// /// Looks up a localized string similar to &Remove subset protein groups. /// diff --git a/pwiz_tools/Skyline/EditUI/EditUIResources.resx b/pwiz_tools/Skyline/EditUI/EditUIResources.resx index 4ef9d9a51d..9534707311 100644 --- a/pwiz_tools/Skyline/EditUI/EditUIResources.resx +++ b/pwiz_tools/Skyline/EditUI/EditUIResources.resx @@ -462,4 +462,13 @@ Example: Looplink: T [4] These proteins include: + + Find &minimal gene group list that explains all peptides + + + Mi&n peptides per gene + + + &Remove subset genes + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs index 713bad4fac..2040ffe890 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs @@ -669,7 +669,7 @@ IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary(geneLevel ? new GeneLevelEqualityComparer() : EqualityComparer.Default); diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.designer.cs b/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.designer.cs index 8007fad011..e36df9671d 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.designer.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.designer.cs @@ -136,6 +136,15 @@ public static string ProteinAssociation_CalculateProteinGroups_Calculating_prote } } + /// + /// Looks up a localized string similar to Calculating gene groups. + /// + public static string ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups { + get { + return ResourceManager.GetString("ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups", resourceCulture); + } + } + /// /// Looks up a localized string similar to Calculating protein clusters. /// diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.resx b/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.resx index 6fcc939f69..00f15771ae 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.resx +++ b/pwiz_tools/Skyline/Model/Proteome/ProteomeResources.resx @@ -160,4 +160,7 @@ Resolving protein details + + Calculating gene groups + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Properties/Resources.Designer.cs b/pwiz_tools/Skyline/Properties/Resources.Designer.cs index 5d811574b0..a3ec6aafc6 100644 --- a/pwiz_tools/Skyline/Properties/Resources.Designer.cs +++ b/pwiz_tools/Skyline/Properties/Resources.Designer.cs @@ -633,116 +633,6 @@ public static string AssociateProteinsDlg_ApplyChanges_Associated_proteins { } } - /// - /// Looks up a localized string similar to Shared Peptides. - /// - public static string AssociateProteinsDlg_CellValueNeeded_Shared_Peptides { - get { - return ResourceManager.GetString("AssociateProteinsDlg_CellValueNeeded_Shared_Peptides", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Find &minimal gene group list that explains all peptides. - /// - public static string AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Find_minimal_gene_group_list_that_explains_all_peptides", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Find &minimal protein group list that explains all peptides. - /// - public static string AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Find_minimal_protein_group_list_that_explains_all_peptides", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Finding peptides in FASTA file. - /// - public static string AssociateProteinsDlg_FindProteinMatchesWithFasta_Finding_peptides_in_FASTA_file { - get { - return ResourceManager.GetString("AssociateProteinsDlg_FindProteinMatchesWithFasta_Finding_peptides_in_FASTA_file", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to No matches were found using the imported fasta file.. - /// - public static string AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches_were_found_using_the_imported_fasta_file_ { - get { - return ResourceManager.GetString("AssociateProteinsDlg_FindProteinMatchesWithFasta_No_matches_were_found_using_the_" + - "imported_fasta_file_", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Mi&n peptides per gene. - /// - public static string AssociateProteinsDlg_Min_peptides_per_gene { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Min_peptides_per_gene", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Mi&n peptides per protein group. - /// - public static string AssociateProteinsDlg_Min_peptides_per_protein_group { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Min_peptides_per_protein_group", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Existing protein associations will be discarded.. - /// - public static string AssociateProteinsDlg_OnShown_Existing_protein_associations_will_be_discarded_ { - get { - return ResourceManager.GetString("AssociateProteinsDlg_OnShown_Existing_protein_associations_will_be_discarded_", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Organize all document peptides into associated proteins or protein groups.. - /// - public static string AssociateProteinsDlg_OnShown_Organize_all_document_peptides_into_associated_proteins_or_protein_groups { - get { - return ResourceManager.GetString("AssociateProteinsDlg_OnShown_Organize_all_document_peptides_into_associated_prote" + - "ins_or_protein_groups", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to &Remove subset genes. - /// - public static string AssociateProteinsDlg_Remove_subset_genes { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Remove_subset_genes", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to &Remove subset protein groups. - /// - public static string AssociateProteinsDlg_Remove_subset_protein_groups { - get { - return ResourceManager.GetString("AssociateProteinsDlg_Remove_subset_protein_groups", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Applying parsimony options. - /// - public static string AssociateProteinsDlg_UpdateParsimonyResults_Applying_parsimony_options { - get { - return ResourceManager.GetString("AssociateProteinsDlg_UpdateParsimonyResults_Applying_parsimony_options", resourceCulture); - } - } - /// /// Looks up a localized string similar to No background proteome defined, see the Digestion tab in Peptide Settings for more information.. /// @@ -7864,91 +7754,6 @@ public static System.Drawing.Bitmap Protein { } } - /// - /// Looks up a localized string similar to Are you sure you want to delete the molecule list '{0}'?. - /// - public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_molecule_list___0___ { - get { - return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_molecule_list__" + - "_0___", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Are you sure you want to delete the protein '{0}'?. - /// - public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_protein___0___ { - get { - return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_the_protein___0___", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Are you sure you want to delete these {0} molecule lists?. - /// - public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__molecule_lists_ { - get { - return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__molecule_" + - "lists_", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Are you sure you want to delete these {0} proteins?. - /// - public static string Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__proteins_ { - get { - return ResourceManager.GetString("Protein_GetDeleteConfirmation_Are_you_sure_you_want_to_delete_these__0__proteins_" + - "", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Calculating protein groups. - /// - public static string ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups { - get { - return ResourceManager.GetString("ProteinAssociation_CalculateProteinGroups_Calculating_protein_groups", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Group of {0} proteins. - /// - public static string ProteinAssociation_CalculateProteinGroups_Group_of__0__proteins { - get { - return ResourceManager.GetString("ProteinAssociation_CalculateProteinGroups_Group_of__0__proteins", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Calculating gene groups. - /// - public static string ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups { - get { - return ResourceManager.GetString("ProteinAssociation_CalculateProteinOrGeneGroups_Calculating_gene_groups", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Calculating protein clusters. - /// - public static string ProteinAssociation_Calculating_protein_clusters { - get { - return ResourceManager.GetString("ProteinAssociation_Calculating_protein_clusters", resourceCulture); - } - } - - /// - /// Looks up a localized string similar to Creating protein targets and assigning their peptides. - /// - public static string ProteinAssociation_CreateDocTree_Creating_protein_targets_and_assigning_their_peptides { - get { - return ResourceManager.GetString("ProteinAssociation_CreateDocTree_Creating_protein_targets_and_assigning_their_pep" + - "tides", resourceCulture); - } - } - /// /// Looks up a localized string similar to Unmapped Peptides. /// diff --git a/pwiz_tools/Skyline/Properties/Resources.resx b/pwiz_tools/Skyline/Properties/Resources.resx index 1d20a8d5f1..e7b3917e86 100644 --- a/pwiz_tools/Skyline/Properties/Resources.resx +++ b/pwiz_tools/Skyline/Properties/Resources.resx @@ -4357,16 +4357,4 @@ If you choose Disable, you can enable Auto-select later with the "Refine > Ad Error: Could not find background proteome file {0} - - Calculating gene groups - - - Find &minimal gene group list that explains all peptides - - - &Remove subset genes - - - Mi&n peptides per gene - \ No newline at end of file From 0d715853715d03348b865fd4d44f10ab14d54586 Mon Sep 17 00:00:00 2001 From: Matt Chambers Date: Tue, 9 Jan 2024 16:55:29 -0500 Subject: [PATCH 3/5] * fixed duplicate calls to parsimony calculation when changing some options * greatly improved speed by using Nick's ReferenceValue for PeptideDocNodes * fixed issues with cancellation in some parsimony calculations --- .../Skyline/EditUI/AssociateProteinsDlg.cs | 29 +++-- .../Skyline/Model/Lib/BiblioSpecLite.cs | 22 +++- .../Model/Proteome/ProteinAssociation.cs | 108 +++++++++++------- 3 files changed, 106 insertions(+), 53 deletions(-) diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs index 6cf854520f..6a733780bc 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs @@ -42,7 +42,7 @@ namespace pwiz.Skyline.EditUI IAuditLogModifier { private readonly SrmDocument _document; - private bool _isFasta; + private bool? _isFasta; private ProteinAssociation _proteinAssociation; private readonly SettingsListComboDriver _driverBackgroundProteome; public SrmDocument DocumentFinal { get; private set; } @@ -54,6 +54,7 @@ namespace pwiz.Skyline.EditUI private readonly IrtStandard _irtStandard; private readonly string _decoyGenerationMethod; private readonly double _decoysPerTarget; + private bool _updatingLabels = false; private string _statusBarResultFormat; private static string[] _sharedPeptideOptionNames = Enum.GetNames(typeof(ProteinAssociation.SharedPeptides)); @@ -257,8 +258,14 @@ private void UpdateTargetCounts() private void UpdateParsimonyResults() { DocumentFinal = null; - if (Results == null) + if (AssociatedProteins == null) + { + if (_isFasta == true) + UseFastaFile(FastaFileName); + else if (_isFasta == false) + UseBackgroundProteome(); return; + } var groupProteins = GroupProteins; var geneLevel = GeneLevelParsimony; @@ -299,14 +306,14 @@ private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) if (GeneLevelParsimony) return; - comboSharedPeptides.SelectedIndexChanged -= comboParsimony_SelectedIndexChanged; + _updatingLabels = true; // adjust labels to reflect whether proteins or protein groups are used for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( (GroupProteins ? @"SharedPeptidesGroup_" : @"SharedPeptides_") + _sharedPeptideOptionNames[i]) ?? throw new InvalidOperationException(_sharedPeptideOptionNames[i]); - comboSharedPeptides.SelectedIndexChanged += comboParsimony_SelectedIndexChanged; + _updatingLabels = false; if (GroupProteins) { @@ -327,14 +334,14 @@ private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) private void cbGeneLevel_CheckedChanged(object sender, EventArgs e) { - comboSharedPeptides.SelectedIndexChanged -= comboParsimony_SelectedIndexChanged; + _updatingLabels = true; // adjust labels to reflect whether genes or protein groups are used for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( (GeneLevelParsimony ? @"SharedPeptidesGene_" : @"SharedPeptidesGroup_") + _sharedPeptideOptionNames[i]) ?? throw new InvalidOperationException(_sharedPeptideOptionNames[i]); - comboSharedPeptides.SelectedIndexChanged += comboParsimony_SelectedIndexChanged; + _updatingLabels = false; // gene level parsimony implies grouping, so force the checkbox on and disable it if (GeneLevelParsimony) @@ -358,6 +365,9 @@ private void cbGeneLevel_CheckedChanged(object sender, EventArgs e) private void comboParsimony_SelectedIndexChanged(object sender, EventArgs e) { + if (_updatingLabels) + return; + UpdateParsimonyResults(); } @@ -499,7 +509,8 @@ private SrmDocument CreateDocTree(SrmDocument current) longWaitDlg.PerformWork(this, 1000, monitor => { result = _proteinAssociation.CreateDocTree(current, monitor); - result = AddIrtAndDecoys(result); + if (result != null) + result = AddIrtAndDecoys(result); }); if (longWaitDlg.IsCanceled) @@ -547,7 +558,9 @@ public AssociateProteinsSettings FormSettings get { var fileName = FastaFileName; - return new AssociateProteinsSettings(_proteinAssociation, _isFasta && _overrideFastaPath == null ? fileName : null, _isFasta ? null : fileName); + return new AssociateProteinsSettings(_proteinAssociation, + _isFasta == true && _overrideFastaPath == null ? fileName : null, + _isFasta == true ? null : fileName); } } diff --git a/pwiz_tools/Skyline/Model/Lib/BiblioSpecLite.cs b/pwiz_tools/Skyline/Model/Lib/BiblioSpecLite.cs index da7c45044b..7d7f4abd32 100644 --- a/pwiz_tools/Skyline/Model/Lib/BiblioSpecLite.cs +++ b/pwiz_tools/Skyline/Model/Lib/BiblioSpecLite.cs @@ -142,6 +142,7 @@ public sealed class BiblioSpecLiteLibrary : CachedLibrary msDataFileUriLookup; public static string GetLibraryCachePath(string libraryPath) { @@ -1133,6 +1134,7 @@ private bool Load(ILoadMonitor loader, IProgressStatus status, bool cached) } _librarySourceFiles = librarySourceFiles.ToArray(); + msDataFileUriLookup = new Dictionary(); var scoreTypes = new Dictionary(); if (locationScoreTypes != 0) @@ -1825,16 +1827,27 @@ private IDictionary ReadRetentionTimes(SQLiteConnection connec private int FindSource(MsDataFileUri filePath) { - if (filePath == null) + if (filePath == null || _librarySourceFiles.Length == 0) { return -1; } + Assume.IsNotNull(msDataFileUriLookup); + lock (msDataFileUriLookup) + { + if (msDataFileUriLookup.TryGetValue(filePath, out int index)) + { + return index; + } + } string filePathToString = filePath.ToString(); // First look for an exact path match int i = _librarySourceFiles.IndexOf(info => Equals(filePathToString, info.FilePath)); // filePath.ToString may include decorators e.g. "C:\\data\\mydata.raw?centroid_ms1=true", try unadorned name ("mydata.raw") if (i == -1) - i = _librarySourceFiles.IndexOf(info => Equals(filePath.GetFileName(), info.FilePath)); + { + string fileName = filePath.GetFileName(); + i = _librarySourceFiles.IndexOf(info => Equals(fileName, info.FilePath)); + } // Or a straight basename match, which we sometimes use internally if (i == -1) i = _librarySourceFiles.IndexOf(info => Equals(filePathToString, info.BaseName)); @@ -1852,6 +1865,11 @@ private int FindSource(MsDataFileUri filePath) // Handle: Illegal characters in path } } + + lock (msDataFileUriLookup) + { + msDataFileUriLookup[filePath] = i; + } return i; } diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs index 2040ffe890..7f3a56aa92 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs @@ -41,17 +41,17 @@ public class ProteinAssociation { private SrmDocument _document; private StringSearch _peptideTrie; - private Dictionary> _peptideToPath; - private Dictionary> _peptideToProteins; + private Dictionary>> _peptideToPath; + private Dictionary, List> _peptideToProteins; private MappingResultsInternal _results, _finalResults; - private HashSet _peptidesRemovedByFilters; + private HashSet> _peptidesRemovedByFilters; public IDictionary _proteinToMetadata { get; private set; } internal class ProteinOrGeneGroupResultCache { public MappingResultsInternal Results { get; set; } public Dictionary PeptideGroupByProteinOrGeneGroup { get; set; } - public Dictionary> PeptideToProteinOrGeneGroup { get; set; } + public Dictionary, List> PeptideToProteinOrGeneGroup { get; set; } } private Dictionary _proteinOrGeneGroupResultCacheByGeneLevel; @@ -96,6 +96,8 @@ private void ResetMapping() _peptideToProteins = null; _peptidesRemovedByFilters = null; _proteinToMetadata = null; + AssociatedProteins = null; + ParsimoniousProteins = null; } public void UseFastaFile(string file, Func> digestProteinToPeptides, ILongWaitBroker broker) @@ -132,7 +134,7 @@ public void UseBackgroundProteome(BackgroundProteome backgroundProteome, Func FindProteinMatches(IProteinSource proteinSource, Func> digestProteinToPeptides, ILongWaitBroker broker) { var localResults = new MappingResultsInternal(); - var peptideToProteins = new Dictionary>(); + var peptideToProteins = new Dictionary, List>(); var proteinAssociations = new Dictionary(); int maxProgressValue = 0; @@ -143,7 +145,7 @@ private Dictionary FindProteinMatches(I int progressValue = fastaRecord.Progress; var fasta = fastaRecord.Sequence; var trieResults = _peptideTrie.FindAll(fasta.Sequence); - var matches = new List(); + var matches = new List>(); // don't count the same peptide twice in a protein var peptidesMatched = new HashSet(); @@ -194,14 +196,17 @@ private Dictionary FindProteinMatches(I ++localResults.ProteinsUnmapped; } }); - + + if (broker.IsCanceled) + return null; + Assume.IsTrue(localResults.ProteinsMapped + localResults.ProteinsUnmapped > 0); var distinctPeptideDocNodes = _peptideToPath.SelectMany(kvp => kvp.Value); - int distinctTargetPeptideCount = distinctPeptideDocNodes.Where(p => !p.IsDecoy).Select(p => p.Peptide.Target).Distinct().Count(); + int distinctTargetPeptideCount = distinctPeptideDocNodes.Where(p => !p.Value.IsDecoy).Select(p => p.Value.Peptide.Target).Distinct().Count(); _peptideToProteins = peptideToProteins; _results = localResults; - _results.PeptidesMapped = peptideToProteins.Keys.Select(p => p.Peptide.Target).Distinct().Count(); + _results.PeptidesMapped = peptideToProteins.Keys.Select(p => p.Value.Peptide.Target).Distinct().Count(); _results.PeptidesUnmapped = distinctTargetPeptideCount - _results.PeptidesMapped; _results.FinalProteinCount = proteinAssociations.Count; @@ -414,17 +419,17 @@ public MappingResultsInternal Clone() public class PeptideAssociationGroup { - public List Peptides { get; } + public List> Peptides { get; } private int _hash; - public PeptideAssociationGroup(List peptides) + public PeptideAssociationGroup(List> peptides) { Peptides = peptides; _hash = 397; foreach(var peptide in peptides) - _hash = (_hash * 397) ^ peptide.Peptide.Sequence.GetHashCode(); + _hash = (_hash * 397) ^ peptide.Value.Peptide.Sequence.GetHashCode(); } public override bool Equals(object x) @@ -443,17 +448,17 @@ public override int GetHashCode() public override string ToString() { - return string.Join(TextUtil.SEPARATOR_CSV.ToString(), Peptides.Select(p => p.ModifiedSequenceDisplay)); + return string.Join(TextUtil.SEPARATOR_CSV.ToString(), Peptides.Select(p => p.Value.ModifiedSequenceDisplay)); } } public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findMinimalProteinList, bool removeSubsetProteins, SharedPeptides sharedPeptides, int minPeptidesPerProtein, ILongWaitBroker broker) { - Dictionary> peptideToProteinGroups = _peptideToProteins; + Dictionary, List> peptideToProteinGroups = _peptideToProteins; broker.Message = ProteomeResources.AssociateProteinsDlg_UpdateParsimonyResults_Applying_parsimony_options; - _peptidesRemovedByFilters = new HashSet(); + _peptidesRemovedByFilters = new HashSet>(); if (groupProteins || geneLevel) { @@ -487,7 +492,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM var allPeptidesRemaining = new HashSet(); var sharedPeptidesRemaining = new Dictionary(); foreach (var kvp in ParsimoniousProteins) - foreach (var peptide in kvp.Value.Peptides.GroupBy(p => p.ModifiedSequence)) + foreach (var peptide in kvp.Value.Peptides.GroupBy(p => p.Value.ModifiedSequence)) if (!allPeptidesRemaining.Add(peptide.Key)) { if (!sharedPeptidesRemaining.ContainsKey(peptide.Key)) @@ -500,7 +505,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM // FindProteinMatches already duplicates results between proteins if (sharedPeptides != SharedPeptides.DuplicatedBetweenProteins) { - var filteredProteinAssociations = new Dictionary>(); + var filteredProteinAssociations = new Dictionary>>(); _finalResults = _finalResults.Clone(); _finalResults.FinalPeptideCount = 0; foreach (var kvp in peptideToProteinGroups) @@ -541,7 +546,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM { ++_finalResults.FinalPeptideCount; if (!filteredProteinAssociations.ContainsKey(protein)) - filteredProteinAssociations.Add(protein, new List {kvp.Key}); + filteredProteinAssociations.Add(protein, new List> {kvp.Key}); else filteredProteinAssociations[protein].Add(kvp.Key); } @@ -590,7 +595,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM allPeptidesRemaining.Clear(); sharedPeptidesRemaining.Clear(); foreach(var kvp in ParsimoniousProteins) - foreach(var peptide in kvp.Value.Peptides.GroupBy(p => p.ModifiedSequence)) + foreach(var peptide in kvp.Value.Peptides.GroupBy(p => p.Value.ModifiedSequence)) if (!allPeptidesRemaining.Add(peptide.Key)) { if (!sharedPeptidesRemaining.ContainsKey(peptide.Key)) @@ -630,21 +635,21 @@ private Dictionary CalculateProteinOrGe if (geneLevel) { // gene to protein to peptides; the top level dictionary uses the GeneLevelEqualityComparer - var geneToPeptides = new Dictionary>>(new GeneLevelEqualityComparer()); + var geneToPeptides = new Dictionary>>>(new GeneLevelEqualityComparer()); foreach (var kvp in AssociatedProteins) if (!geneToPeptides.ContainsKey(kvp.Key)) - geneToPeptides.Add(kvp.Key, new Dictionary> { { kvp.Key, kvp.Value.Peptides } }); + geneToPeptides.Add(kvp.Key, new Dictionary>> { { kvp.Key, kvp.Value.Peptides } }); else geneToPeptides[kvp.Key][kvp.Key] = kvp.Value.Peptides; // now pick the protein with the longest sequence if it contains all the peptides, or a concatenation of all of the sequences if not - IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary> proteinToPeptides) + IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary>> proteinToPeptides) { if (proteinToPeptides.Count == 1) return proteinToPeptides.Keys.First(); var longestProtein = proteinToPeptides.OrderByDescending(kvp2 => kvp2.Key.Sequence.Sequence.Length).First().Key; var allPeptides = proteinToPeptides.Values.SelectMany(o => o).Distinct().ToList(); - if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Peptide.Sequence))) + if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Value.Peptide.Sequence))) return longestProtein; // each protein's individual metadata is kept, but all protein sequences are replaced by the concatenated sequence @@ -674,7 +679,7 @@ IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary(geneLevel ? new GeneLevelEqualityComparer() : EqualityComparer.Default); - var peptideToProteinOrGeneGroups = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].PeptideToProteinOrGeneGroup = new Dictionary>(); + var peptideToProteinOrGeneGroups = _proteinOrGeneGroupResultCacheByGeneLevel[geneLevel].PeptideToProteinOrGeneGroup = new Dictionary, List>(); Action addPeptideAssociations = (protein, peptides) => { @@ -724,7 +729,7 @@ string ProteinOrGeneGroupName(IProteinRecord p) /// /// Calculate clusters (connected components) for protein/peptide associations /// - private Dictionary> CalculateClusters(Dictionary> peptideToProteinGroups, ILongWaitBroker broker) + private Dictionary> CalculateClusters(Dictionary, List> peptideToProteinGroups, ILongWaitBroker broker) { var clusterByProteinGroup = new Dictionary(); int clusterId = 0; @@ -770,7 +775,7 @@ private Dictionary> CalculateClusters(Dictionar return clusterByProteinGroup.GroupBy(kvp => kvp.Value, kvp => kvp.Key).ToDictionary(kvp => kvp.Key, kvp => (IEnumerable) kvp); } - private ISet FindMinimalProteinSet(Dictionary> peptideToProteinGroups, ILongWaitBroker broker) + private ISet FindMinimalProteinSet(Dictionary, List> peptideToProteinGroups, ILongWaitBroker broker) { var proteinsByCluster = CalculateClusters(peptideToProteinGroups, broker); @@ -786,20 +791,20 @@ private ISet FindMinimalProteinSet(Dictionary ParsimoniousProteins[p].Peptides.Select(p2 => p2.Peptide.Sequence)).ToHashSet(); + var unexplainedPeptideSetByCluster = clusterProteins.SelectMany(p => ParsimoniousProteins[p].Peptides.Select(p2 => p2.Value.Peptide.Sequence)).ToHashSet(); var peptidesExplainedByProtein = new Dictionary(); while (unexplainedPeptideSetByCluster.Count > 0) // stop once all peptides are explained { // find cluster protein(s) with most unexplained peptides foreach (var protein in clusterProteins) - peptidesExplainedByProtein[protein] = ParsimoniousProteins[protein].Peptides.Count(p => unexplainedPeptideSetByCluster.Contains(p.Peptide.Sequence)); + peptidesExplainedByProtein[protein] = ParsimoniousProteins[protein].Peptides.Count(p => unexplainedPeptideSetByCluster.Contains(p.Value.Peptide.Sequence)); var proteinsWithMostUnexplainedPeptides = peptidesExplainedByProtein.Where(kvp => kvp.Value == peptidesExplainedByProtein.Values.Max()).Select(kvp => kvp.Key); // add this protein(s) to the minimal set foreach (var protein in proteinsWithMostUnexplainedPeptides) { - unexplainedPeptideSetByCluster.ExceptWith(ParsimoniousProteins[protein].Peptides.Select(p2 => p2.Peptide.Sequence)); + unexplainedPeptideSetByCluster.ExceptWith(ParsimoniousProteins[protein].Peptides.Select(p2 => p2.Value.Peptide.Sequence)); minimalProteinList[protein] = true; } } @@ -808,7 +813,7 @@ private ISet FindMinimalProteinSet(Dictionary FindSubsetProteins(Dictionary> peptideToProteinGroups, ILongWaitBroker broker) + private ISet FindSubsetProteins(Dictionary, List> peptideToProteinGroups, ILongWaitBroker broker) { var proteinsByCluster = CalculateClusters(peptideToProteinGroups, broker); @@ -870,7 +875,7 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) if (_peptideToPath == null) { - var peptidesForMatching = new HashSet(new PeptideComparer()); + var peptidesForMatching = new HashSet>(new PeptideComparer()); var doc = _document; foreach (var nodePepGroup in doc.PeptideGroups) @@ -881,7 +886,7 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) continue; }*/ - peptidesForMatching.UnionWith(nodePepGroup.Peptides); + peptidesForMatching.UnionWith(nodePepGroup.Peptides.Select(ReferenceValue.Of)); } if (peptidesForMatching.Count == 0) @@ -891,7 +896,7 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) throw new InvalidOperationException(Resources.ImportFastaControl_ImportFasta_The_document_does_not_contain_any_peptides_); } - _peptideToPath = peptidesForMatching.GroupBy(node => GetPeptideSequence(node.Peptide)).ToDictionary(k => k.Key, g => g.ToList()); + _peptideToPath = peptidesForMatching.GroupBy(node => GetPeptideSequence(node.Value.Peptide)).ToDictionary(k => k.Key, g => g.ToList()); } _peptideTrie = new StringSearch(_peptideToPath.Keys, broker.CancellationToken); @@ -899,16 +904,16 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) _peptideTrie = null; } - public class PeptideComparer : IEqualityComparer + public class PeptideComparer : IEqualityComparer> { - public bool Equals(PeptideDocNode x, PeptideDocNode y) + public bool Equals(ReferenceValue x, ReferenceValue y) { - return Equals(x?.SequenceKey, y?.SequenceKey); + return Equals(x.Value.SequenceKey, y.Value.SequenceKey); } - public int GetHashCode(PeptideDocNode obj) + public int GetHashCode(ReferenceValue obj) { - return obj.SequenceKey.GetHashCode(); + return obj.Value.SequenceKey.GetHashCode(); } } @@ -927,13 +932,16 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) var appendPeptideLists = new List(); // Move unmapped peptides from FastaSequence node to "Unmapped Peptides" list - var unmappedPeptideNodes = _peptideToPath.SelectMany(kvp => kvp.Value).Where(p => !p.IsDecoy).ToHashSet(); + var unmappedPeptideNodes = _peptideToPath.Values.SelectMany(list => list).Where(p => !p.Value.IsDecoy).ToHashSet(); unmappedPeptideNodes.ExceptWith(ParsimoniousProteins.Values.SelectMany(pag => pag.Peptides)); unmappedPeptideNodes.ExceptWith(_peptidesRemovedByFilters); // Modifies and adds old groups that still contain unmatched peptides to newPeptideGroups foreach (var nodePepGroup in current.MoleculeGroups) { + if (monitor.IsCanceled) + return null; + var peptideDocNodes = nodePepGroup.Children.Where(node => node is PeptideDocNode).Cast().ToList(); // Drop empty peptide lists @@ -954,21 +962,35 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) if (peptideDocNodes.All(node => node.GlobalStandardType == StandardType.IRT)) { newPeptideGroups.Add(nodePepGroup); - unmappedPeptideNodes.ExceptWith(peptideDocNodes); // do not count iRT peptides as unmapped + unmappedPeptideNodes.ExceptWith(peptideDocNodes.Select(ReferenceValue.Of)); // do not count iRT peptides as unmapped continue; } // Keep peptide lists that contain unmapped peptides if (nodePepGroup.IsProteomic && nodePepGroup.IsPeptideList) { + var mappedTargets = _peptideToProteins.Select(node => node.Key.Value.Target).ToHashSet(); + // If a peptide list peptide is unmapped, leave it in the peptide list but remove it from the global unmapped list - var peptidesByMappedStatus = peptideDocNodes.ToLookup(node => _peptideToProteins.Contains(kvp => kvp.Key.Target == node.Target), node => node); + var peptidesByMappedStatus = new Dictionary>> + { + { false, new List>() }, + { true, new List>() } + }; + + foreach (var node in peptideDocNodes) + { + if (monitor.IsCanceled) + return null; + + peptidesByMappedStatus[mappedTargets.Contains(node.Target)].Add(node); + } var unmappedPeptides = peptidesByMappedStatus[false].ToHashSet(); unmappedPeptideNodes.ExceptWith(unmappedPeptides); // If it was mapped, remove it from the peptide list var mappedPeptides = peptidesByMappedStatus[true]; - var mappedPeptideIndexes = mappedPeptides.Select(node => node.Peptide.GlobalIndex); + var mappedPeptideIndexes = mappedPeptides.Select(node => node.Value.Peptide.GlobalIndex); var newPeptideList = (PeptideGroupDocNode) nodePepGroup.RemoveAll(mappedPeptideIndexes.ToList()); // Only keep the list if it still has peptides @@ -1002,7 +1024,7 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) if (unmappedPeptideNodes.Count > 0) { - var unmappedNakedPeptides = unmappedPeptideNodes.Select(node => node.RemoveFastaSequence()); + var unmappedNakedPeptides = unmappedPeptideNodes.Select(node => node.Value.RemoveFastaSequence()); var unmappedPeptideList = new PeptideGroupDocNode(new PeptideGroup(), Annotations.EMPTY, Resources.ProteinAssociation_CreateDocTree_Unmapped_Peptides, string.Empty, unmappedNakedPeptides.ToArray()); appendPeptideLists.Add(unmappedPeptideList); From d50fc2149dbaf30ee980b4b189ca23bff6804539 Mon Sep 17 00:00:00 2001 From: Nicholas Shulman Date: Tue, 9 Jan 2024 16:42:17 -0800 Subject: [PATCH 4/5] Change a couple of "ReferenceValue" back to "PeptideDocNode" because reference equality was not actually being used. Check for cancellation in loop which digests every protein sequence. --- .../Skyline/EditUI/AssociateProteinsDlg.cs | 2 +- .../Model/Proteome/ProteinAssociation.cs | 62 ++++++++++--------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs index 6a733780bc..86f32f95bd 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs @@ -54,7 +54,7 @@ namespace pwiz.Skyline.EditUI private readonly IrtStandard _irtStandard; private readonly string _decoyGenerationMethod; private readonly double _decoysPerTarget; - private bool _updatingLabels = false; + private bool _updatingLabels; private string _statusBarResultFormat; private static string[] _sharedPeptideOptionNames = Enum.GetNames(typeof(ProteinAssociation.SharedPeptides)); diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs index 7f3a56aa92..63b356b333 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs @@ -41,7 +41,7 @@ public class ProteinAssociation { private SrmDocument _document; private StringSearch _peptideTrie; - private Dictionary>> _peptideToPath; + private Dictionary> _peptideToPath; private Dictionary, List> _peptideToProteins; private MappingResultsInternal _results, _finalResults; private HashSet> _peptidesRemovedByFilters; @@ -145,7 +145,7 @@ private Dictionary FindProteinMatches(I int progressValue = fastaRecord.Progress; var fasta = fastaRecord.Sequence; var trieResults = _peptideTrie.FindAll(fasta.Sequence); - var matches = new List>(); + var matches = new List(); // don't count the same peptide twice in a protein var peptidesMatched = new HashSet(); @@ -154,6 +154,10 @@ private Dictionary FindProteinMatches(I foreach (var result in trieResults) { + if (broker.IsCanceled) + { + break; + } if (!peptidesMatched.Add(result.Keyword)) continue; @@ -203,7 +207,7 @@ private Dictionary FindProteinMatches(I Assume.IsTrue(localResults.ProteinsMapped + localResults.ProteinsUnmapped > 0); var distinctPeptideDocNodes = _peptideToPath.SelectMany(kvp => kvp.Value); - int distinctTargetPeptideCount = distinctPeptideDocNodes.Where(p => !p.Value.IsDecoy).Select(p => p.Value.Peptide.Target).Distinct().Count(); + int distinctTargetPeptideCount = distinctPeptideDocNodes.Where(p => !p.IsDecoy).Select(p => p.Peptide.Target).Distinct().Count(); _peptideToProteins = peptideToProteins; _results = localResults; _results.PeptidesMapped = peptideToProteins.Keys.Select(p => p.Value.Peptide.Target).Distinct().Count(); @@ -419,17 +423,17 @@ public MappingResultsInternal Clone() public class PeptideAssociationGroup { - public List> Peptides { get; } + public List Peptides { get; } private int _hash; - public PeptideAssociationGroup(List> peptides) + public PeptideAssociationGroup(List peptides) { Peptides = peptides; _hash = 397; foreach(var peptide in peptides) - _hash = (_hash * 397) ^ peptide.Value.Peptide.Sequence.GetHashCode(); + _hash = (_hash * 397) ^ peptide.Peptide.Sequence.GetHashCode(); } public override bool Equals(object x) @@ -448,7 +452,7 @@ public override int GetHashCode() public override string ToString() { - return string.Join(TextUtil.SEPARATOR_CSV.ToString(), Peptides.Select(p => p.Value.ModifiedSequenceDisplay)); + return string.Join(TextUtil.SEPARATOR_CSV.ToString(), Peptides.Select(p => p.ModifiedSequenceDisplay)); } } @@ -492,7 +496,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM var allPeptidesRemaining = new HashSet(); var sharedPeptidesRemaining = new Dictionary(); foreach (var kvp in ParsimoniousProteins) - foreach (var peptide in kvp.Value.Peptides.GroupBy(p => p.Value.ModifiedSequence)) + foreach (var peptide in kvp.Value.Peptides.GroupBy(p => p.ModifiedSequence)) if (!allPeptidesRemaining.Add(peptide.Key)) { if (!sharedPeptidesRemaining.ContainsKey(peptide.Key)) @@ -505,7 +509,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM // FindProteinMatches already duplicates results between proteins if (sharedPeptides != SharedPeptides.DuplicatedBetweenProteins) { - var filteredProteinAssociations = new Dictionary>>(); + var filteredProteinAssociations = new Dictionary>(); _finalResults = _finalResults.Clone(); _finalResults.FinalPeptideCount = 0; foreach (var kvp in peptideToProteinGroups) @@ -546,7 +550,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM { ++_finalResults.FinalPeptideCount; if (!filteredProteinAssociations.ContainsKey(protein)) - filteredProteinAssociations.Add(protein, new List> {kvp.Key}); + filteredProteinAssociations.Add(protein, new List {kvp.Key}); else filteredProteinAssociations[protein].Add(kvp.Key); } @@ -595,7 +599,7 @@ public void ApplyParsimonyOptions(bool groupProteins, bool geneLevel, bool findM allPeptidesRemaining.Clear(); sharedPeptidesRemaining.Clear(); foreach(var kvp in ParsimoniousProteins) - foreach(var peptide in kvp.Value.Peptides.GroupBy(p => p.Value.ModifiedSequence)) + foreach(var peptide in kvp.Value.Peptides.GroupBy(p => p.ModifiedSequence)) if (!allPeptidesRemaining.Add(peptide.Key)) { if (!sharedPeptidesRemaining.ContainsKey(peptide.Key)) @@ -635,21 +639,21 @@ private Dictionary CalculateProteinOrGe if (geneLevel) { // gene to protein to peptides; the top level dictionary uses the GeneLevelEqualityComparer - var geneToPeptides = new Dictionary>>>(new GeneLevelEqualityComparer()); + var geneToPeptides = new Dictionary>>(new GeneLevelEqualityComparer()); foreach (var kvp in AssociatedProteins) if (!geneToPeptides.ContainsKey(kvp.Key)) - geneToPeptides.Add(kvp.Key, new Dictionary>> { { kvp.Key, kvp.Value.Peptides } }); + geneToPeptides.Add(kvp.Key, new Dictionary> { { kvp.Key, kvp.Value.Peptides } }); else geneToPeptides[kvp.Key][kvp.Key] = kvp.Value.Peptides; // now pick the protein with the longest sequence if it contains all the peptides, or a concatenation of all of the sequences if not - IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary>> proteinToPeptides) + IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary> proteinToPeptides) { if (proteinToPeptides.Count == 1) return proteinToPeptides.Keys.First(); var longestProtein = proteinToPeptides.OrderByDescending(kvp2 => kvp2.Key.Sequence.Sequence.Length).First().Key; var allPeptides = proteinToPeptides.Values.SelectMany(o => o).Distinct().ToList(); - if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Value.Peptide.Sequence))) + if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Peptide.Sequence))) return longestProtein; // each protein's individual metadata is kept, but all protein sequences are replaced by the concatenated sequence @@ -791,20 +795,20 @@ private ISet FindMinimalProteinSet(Dictionary ParsimoniousProteins[p].Peptides.Select(p2 => p2.Value.Peptide.Sequence)).ToHashSet(); + var unexplainedPeptideSetByCluster = clusterProteins.SelectMany(p => ParsimoniousProteins[p].Peptides.Select(p2 => p2.Peptide.Sequence)).ToHashSet(); var peptidesExplainedByProtein = new Dictionary(); while (unexplainedPeptideSetByCluster.Count > 0) // stop once all peptides are explained { // find cluster protein(s) with most unexplained peptides foreach (var protein in clusterProteins) - peptidesExplainedByProtein[protein] = ParsimoniousProteins[protein].Peptides.Count(p => unexplainedPeptideSetByCluster.Contains(p.Value.Peptide.Sequence)); + peptidesExplainedByProtein[protein] = ParsimoniousProteins[protein].Peptides.Count(p => unexplainedPeptideSetByCluster.Contains(p.Peptide.Sequence)); var proteinsWithMostUnexplainedPeptides = peptidesExplainedByProtein.Where(kvp => kvp.Value == peptidesExplainedByProtein.Values.Max()).Select(kvp => kvp.Key); // add this protein(s) to the minimal set foreach (var protein in proteinsWithMostUnexplainedPeptides) { - unexplainedPeptideSetByCluster.ExceptWith(ParsimoniousProteins[protein].Peptides.Select(p2 => p2.Value.Peptide.Sequence)); + unexplainedPeptideSetByCluster.ExceptWith(ParsimoniousProteins[protein].Peptides.Select(p2 => p2.Peptide.Sequence)); minimalProteinList[protein] = true; } } @@ -875,7 +879,7 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) if (_peptideToPath == null) { - var peptidesForMatching = new HashSet>(new PeptideComparer()); + var peptidesForMatching = new HashSet(new PeptideComparer()); var doc = _document; foreach (var nodePepGroup in doc.PeptideGroups) @@ -886,7 +890,7 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) continue; }*/ - peptidesForMatching.UnionWith(nodePepGroup.Peptides.Select(ReferenceValue.Of)); + peptidesForMatching.UnionWith(nodePepGroup.Peptides); } if (peptidesForMatching.Count == 0) @@ -896,7 +900,8 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) throw new InvalidOperationException(Resources.ImportFastaControl_ImportFasta_The_document_does_not_contain_any_peptides_); } - _peptideToPath = peptidesForMatching.GroupBy(node => GetPeptideSequence(node.Value.Peptide)).ToDictionary(k => k.Key, g => g.ToList()); + _peptideToPath = peptidesForMatching.GroupBy(node => GetPeptideSequence(node.Peptide)) + .ToDictionary(grouping => grouping.Key, grouping => grouping.ToList()); } _peptideTrie = new StringSearch(_peptideToPath.Keys, broker.CancellationToken); @@ -904,16 +909,16 @@ private void ListPeptidesForMatching(ILongWaitBroker broker) _peptideTrie = null; } - public class PeptideComparer : IEqualityComparer> + public class PeptideComparer : IEqualityComparer { - public bool Equals(ReferenceValue x, ReferenceValue y) + public bool Equals(PeptideDocNode x, PeptideDocNode y) { - return Equals(x.Value.SequenceKey, y.Value.SequenceKey); + return Equals(x?.SequenceKey, y?.SequenceKey); } - public int GetHashCode(ReferenceValue obj) + public int GetHashCode(PeptideDocNode obj) { - return obj.Value.SequenceKey.GetHashCode(); + return obj.SequenceKey.GetHashCode(); } } @@ -932,8 +937,9 @@ public SrmDocument CreateDocTree(SrmDocument current, IProgressMonitor monitor) var appendPeptideLists = new List(); // Move unmapped peptides from FastaSequence node to "Unmapped Peptides" list - var unmappedPeptideNodes = _peptideToPath.Values.SelectMany(list => list).Where(p => !p.Value.IsDecoy).ToHashSet(); - unmappedPeptideNodes.ExceptWith(ParsimoniousProteins.Values.SelectMany(pag => pag.Peptides)); + var unmappedPeptideNodes = _peptideToPath.Values.SelectMany(list => list).Where(p => !p.IsDecoy) + .Select(ReferenceValue.Of).ToHashSet(); + unmappedPeptideNodes.ExceptWith(ParsimoniousProteins.Values.SelectMany(pag => pag.Peptides.Select(ReferenceValue.Of))); unmappedPeptideNodes.ExceptWith(_peptidesRemovedByFilters); // Modifies and adds old groups that still contain unmatched peptides to newPeptideGroups From 1bd5be06dd48856df13723ebaadaa253509b94da Mon Sep 17 00:00:00 2001 From: Matt Chambers Date: Wed, 17 Jan 2024 16:51:36 -0500 Subject: [PATCH 5/5] * incorporated Nick's review recommendations --- .../Skyline/EditUI/AssociateProteinsDlg.cs | 41 ++++++++++----- .../Model/Proteome/ProteinAssociation.cs | 52 ++++++++++--------- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs index 86f32f95bd..325a9b1be7 100644 --- a/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs +++ b/pwiz_tools/Skyline/EditUI/AssociateProteinsDlg.cs @@ -299,6 +299,21 @@ private void checkBoxParsimony_CheckedChanged(object sender, EventArgs e) UpdateParsimonyResults(); } + private void IfNotUpdatingLabels(Action action) + { + if (!_updatingLabels) + { + try + { + _updatingLabels = true; + action(); + } + finally + { + _updatingLabels = false; + } + } + } private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) { @@ -306,14 +321,15 @@ private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) if (GeneLevelParsimony) return; - _updatingLabels = true; - // adjust labels to reflect whether proteins or protein groups are used - for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) - comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( - (GroupProteins ? @"SharedPeptidesGroup_" : @"SharedPeptides_") + - _sharedPeptideOptionNames[i]) ?? - throw new InvalidOperationException(_sharedPeptideOptionNames[i]); - _updatingLabels = false; + IfNotUpdatingLabels(() => + { + // adjust labels to reflect whether proteins or protein groups are used + for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) + comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( + (GroupProteins ? @"SharedPeptidesGroup_" : @"SharedPeptides_") + + _sharedPeptideOptionNames[i]) ?? + throw new InvalidOperationException(_sharedPeptideOptionNames[i]); + }); if (GroupProteins) { @@ -334,14 +350,15 @@ private void cbGroupProteins_CheckedChanged(object sender, EventArgs e) private void cbGeneLevel_CheckedChanged(object sender, EventArgs e) { - _updatingLabels = true; - // adjust labels to reflect whether genes or protein groups are used - for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) + IfNotUpdatingLabels(() => + { + // adjust labels to reflect whether genes or protein groups are used + for (int i = 0; i < _sharedPeptideOptionNames.Length; ++i) comboSharedPeptides.Items[i] = EnumNames.ResourceManager.GetString( (GeneLevelParsimony ? @"SharedPeptidesGene_" : @"SharedPeptidesGroup_") + _sharedPeptideOptionNames[i]) ?? throw new InvalidOperationException(_sharedPeptideOptionNames[i]); - _updatingLabels = false; + }); // gene level parsimony implies grouping, so force the checkbox on and disable it if (GeneLevelParsimony) diff --git a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs index 63b356b333..fa46317d9a 100644 --- a/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs +++ b/pwiz_tools/Skyline/Model/Proteome/ProteinAssociation.cs @@ -45,7 +45,9 @@ public class ProteinAssociation private Dictionary, List> _peptideToProteins; private MappingResultsInternal _results, _finalResults; private HashSet> _peptidesRemovedByFilters; - public IDictionary _proteinToMetadata { get; private set; } + + private static IEqualityComparer ReferenceEqualityComparer = ReferenceValue.EQUALITY_COMPARER; + private IDictionary _proteinToMetadata { get; set; } internal class ProteinOrGeneGroupResultCache { @@ -630,6 +632,25 @@ public override int GetHashCode(IProteinRecord obj) return obj.Metadata.Gene.GetHashCode(); } } + + public static IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary> proteinToPeptides) + { + if (proteinToPeptides.Count == 1) + return proteinToPeptides.Keys.First(); + + var longestProtein = proteinToPeptides.OrderByDescending(kvp2 => kvp2.Key.Sequence.Sequence.Length).First().Key; + var allPeptides = proteinToPeptides.Values.SelectMany(o => o).Distinct(ReferenceEqualityComparer).ToList(); + if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Peptide.Sequence))) + return longestProtein; + + // each protein's individual metadata is kept, but all protein sequences are replaced by the concatenated sequence + var concatenatedSequence = string.Concat(proteinToPeptides.Keys.Select(p => p.Sequence.Sequence)); + return new FastaRecord(longestProtein.RecordIndex, 0, + new FastaSequenceGroup(longestProtein.Sequence.Name, + proteinToPeptides.Keys.Select(p => new FastaSequence(p.Sequence.Name, + p.Sequence.Description, p.Sequence.Alternatives, concatenatedSequence)).ToList()), + new ProteinGroupMetadata(proteinToPeptides.Keys.Select(p => p.Metadata).ToList())); + } private Dictionary CalculateProteinOrGeneGroups(MappingResultsInternal results, bool geneLevel, ILongWaitBroker broker) { @@ -639,33 +660,14 @@ private Dictionary CalculateProteinOrGe if (geneLevel) { // gene to protein to peptides; the top level dictionary uses the GeneLevelEqualityComparer + var proteinsByGene = AssociatedProteins.GroupBy(kvp => kvp.Key, new GeneLevelEqualityComparer()); var geneToPeptides = new Dictionary>>(new GeneLevelEqualityComparer()); - foreach (var kvp in AssociatedProteins) - if (!geneToPeptides.ContainsKey(kvp.Key)) - geneToPeptides.Add(kvp.Key, new Dictionary> { { kvp.Key, kvp.Value.Peptides } }); - else - geneToPeptides[kvp.Key][kvp.Key] = kvp.Value.Peptides; + foreach (var group in proteinsByGene) + geneToPeptides.Add(group.Key, group.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Peptides)); + // now pick the protein with the longest sequence if it contains all the peptides, or a concatenation of all of the sequences if not - IProteinRecord GenerateConcatenatedSequenceIfNecessary(Dictionary> proteinToPeptides) - { - if (proteinToPeptides.Count == 1) - return proteinToPeptides.Keys.First(); - - var longestProtein = proteinToPeptides.OrderByDescending(kvp2 => kvp2.Key.Sequence.Sequence.Length).First().Key; - var allPeptides = proteinToPeptides.Values.SelectMany(o => o).Distinct().ToList(); - if (allPeptides.All(node => longestProtein.Sequence.Sequence.Contains(node.Peptide.Sequence))) - return longestProtein; - - // each protein's individual metadata is kept, but all protein sequences are replaced by the concatenated sequence - var concatenatedSequence = string.Join(string.Empty, proteinToPeptides.Keys.Select(p => p.Sequence.Sequence)); - return new FastaRecord(longestProtein.RecordIndex, 0, - new FastaSequenceGroup(longestProtein.Sequence.Name, - proteinToPeptides.Keys.Select(p => new FastaSequence(p.Sequence.Name, - p.Sequence.Description, p.Sequence.Alternatives, concatenatedSequence)).ToList()), - new ProteinGroupMetadata(proteinToPeptides.Keys.Select(p => p.Metadata).ToList())); - } proteinOrGeneToPeptideGroup = geneToPeptides.ToDictionary(kvp => GenerateConcatenatedSequenceIfNecessary(kvp.Value), - kvp => new PeptideAssociationGroup(kvp.Value.Values.SelectMany(o => o).Distinct().ToList())); + kvp => new PeptideAssociationGroup(kvp.Value.Values.SelectMany(o => o).Distinct(ReferenceEqualityComparer).ToList())); } foreach(var kvp in proteinOrGeneToPeptideGroup)