From 139b2305024360e0b01c9205506032a87cdacdd3 Mon Sep 17 00:00:00 2001 From: TJ Lane Date: Mon, 28 Oct 2024 17:41:47 +0000 Subject: [PATCH] =?UTF-8?q?`phaseboost`=20=F0=9F=9A=80=20=E2=98=84?= =?UTF-8?q?=EF=B8=8F=20=20(#47)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * phaseboost in, need to work on tests * dont forget the CLI * progress on metadata * testing in place * pool settings * clear warnings * cleanup * typos, last cleaning * pass through convergence params * last cleanup * ruff * better -h (#49) * reduce iterations * stronger test * lint --- meteor/diffmaps.py | 4 +- meteor/io.py | 32 +--- meteor/iterative.py | 44 ++++- meteor/scripts/common.py | 156 ++++++++++++++---- meteor/scripts/compute_difference_map.py | 88 ++-------- meteor/scripts/compute_iterative_tv_map.py | 113 +++++++++++++ meteor/settings.py | 44 ++++- meteor/tv.py | 6 +- meteor/utils.py | 2 +- meteor/validate.py | 2 +- pyproject.toml | 1 + .../test_compute_iterative_tv_map.py | 63 +++++++ test/unit/conftest.py | 13 ++ test/unit/scripts/conftest.py | 2 +- test/unit/scripts/test_common.py | 51 +++++- .../scripts/test_compute_difference_map.py | 51 ------ .../scripts/test_compute_iterative_tv_map.py | 121 ++++++++++++++ test/unit/test_iterative.py | 2 + test/unit/test_tv.py | 15 +- 19 files changed, 590 insertions(+), 220 deletions(-) create mode 100644 meteor/scripts/compute_iterative_tv_map.py create mode 100644 test/functional/test_compute_iterative_tv_map.py create mode 100644 test/unit/scripts/test_compute_iterative_tv_map.py diff --git a/meteor/diffmaps.py b/meteor/diffmaps.py index d905940..bfd3f85 100644 --- a/meteor/diffmaps.py +++ b/meteor/diffmaps.py @@ -6,12 +6,10 @@ import reciprocalspaceship as rs from .rsmap import Map, _assert_is_map -from .settings import MAP_SAMPLING +from .settings import DEFAULT_KPARAMS_TO_SCAN, MAP_SAMPLING from .utils import filter_common_indices from .validate import ScalarMaximizer, negentropy -DEFAULT_KPARAMS_TO_SCAN = np.linspace(0.0, 1.0, 101) - def set_common_crystallographic_metadata(map1: Map, map2: Map, *, output: Map) -> None: if hasattr(map1, "cell"): diff --git a/meteor/io.py b/meteor/io.py index 8a12a62..f89aa54 100644 --- a/meteor/io.py +++ b/meteor/io.py @@ -5,30 +5,14 @@ from __future__ import annotations import re -from typing import Final - -OBSERVED_INTENSITY_COLUMNS: Final[list[str]] = [ - "I", # generic - "IMEAN", # CCP4 - "I-obs", # phenix -] - -OBSERVED_AMPLITUDE_COLUMNS: Final[list[str]] = [ - "F", # generic - "FP", # CCP4 & GLPh native - r"FPH\d", # CCP4 derivative - "F-obs", # phenix -] - -OBSERVED_UNCERTAINTY_COLUMNS: Final[list[str]] = [ - "SIGF", # generic - "SIGFP", # CCP4 & GLPh native - r"SIGFPH\d", # CCP4 -] - -COMPUTED_AMPLITUDE_COLUMNS: Final[list[str]] = ["FC"] - -COMPUTED_PHASE_COLUMNS: Final[list[str]] = ["PHIC"] + +from .settings import ( + COMPUTED_AMPLITUDE_COLUMNS, + COMPUTED_PHASE_COLUMNS, + OBSERVED_AMPLITUDE_COLUMNS, + OBSERVED_INTENSITY_COLUMNS, + OBSERVED_UNCERTAINTY_COLUMNS, +) class AmbiguousMtzColumnError(ValueError): ... diff --git a/meteor/iterative.py b/meteor/iterative.py index 5e6bab7..82f2a67 100644 --- a/meteor/iterative.py +++ b/meteor/iterative.py @@ -4,15 +4,21 @@ import numpy as np import pandas as pd +import structlog from .rsmap import Map +from .settings import ( + DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION, + ITERATIVE_TV_CONVERGENCE_TOLERANCE, + ITERATIVE_TV_MAX_ITERATIONS, +) from .tv import TvDenoiseResult, tv_denoise_difference_map from .utils import ( average_phase_diff_in_degrees, complex_array_to_rs_dataseries, ) -DEFAULT_TV_WEIGHTS_TO_SCAN = [0.001, 0.01, 0.1, 1.0] +log = structlog.get_logger() def _project_derivative_on_experimental_set( @@ -53,13 +59,14 @@ def _project_derivative_on_experimental_set( return projected_derivative -def _complex_derivative_from_iterative_tv( +def _complex_derivative_from_iterative_tv( # noqa: PLR0913 *, native: np.ndarray, initial_derivative: np.ndarray, tv_denoise_function: Callable[[np.ndarray], tuple[np.ndarray, TvDenoiseResult]], - convergence_tolerance: float = 1e-4, - max_iterations: int = 1000, + convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE, + max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS, + verbose: bool = False, ) -> tuple[np.ndarray, pd.DataFrame]: """ Estimate the derivative phases using the iterative TV algorithm. @@ -87,6 +94,9 @@ def _complex_derivative_from_iterative_tv( max_iterations: int If this number of iterations is reached, stop early. Default 1000. + verbose: bool + Log or not. + Returns ------- estimated_complex_derivative: np.ndarray @@ -127,6 +137,13 @@ def _complex_derivative_from_iterative_tv( "average_phase_change": phase_change, }, ) + if verbose: + log.info( + f" iteration {num_iterations:04d}", # noqa: G004 + phase_change=round(phase_change, 4), + negentropy=round(tv_metadata.optimal_negentropy, 4), + tv_weight=tv_metadata.optimal_tv_weight, + ) if num_iterations > max_iterations: break @@ -134,13 +151,14 @@ def _complex_derivative_from_iterative_tv( return derivative, pd.DataFrame(metadata) -def iterative_tv_phase_retrieval( +def iterative_tv_phase_retrieval( # noqa: PLR0913 initial_derivative: Map, native: Map, *, - convergence_tolerance: float = 1e-4, - max_iterations: int = 1000, - tv_weights_to_scan: list[float] = DEFAULT_TV_WEIGHTS_TO_SCAN, + convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE, + max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS, + tv_weights_to_scan: list[float] = DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION, + verbose: bool = False, ) -> tuple[Map, pd.DataFrame]: """ Here is a brief pseudocode sketch of the alogrithm. Structure factors F below are complex unless @@ -182,6 +200,9 @@ def iterative_tv_phase_retrieval( A list of TV regularization weights (λ values) to be scanned for optimal results, by default [0.001, 0.01, 0.1, 1.0]. + verbose: bool + Log or not. + Returns ------- output_map: Map @@ -209,12 +230,19 @@ def tv_denoise_closure(difference: np.ndarray) -> tuple[np.ndarray, TvDenoiseRes return denoised_map.complex_amplitudes, tv_metadata # estimate the derivative phases using the iterative TV algorithm + if verbose: + log.info( + "convergence criteria:", + phase_tolerance=convergence_tolerance, + max_iterations=max_iterations, + ) it_tv_complex_derivative, metadata = _complex_derivative_from_iterative_tv( native=native.complex_amplitudes, initial_derivative=initial_derivative.complex_amplitudes, tv_denoise_function=tv_denoise_closure, convergence_tolerance=convergence_tolerance, max_iterations=max_iterations, + verbose=verbose, ) _, derivative_phases = complex_array_to_rs_dataseries( it_tv_complex_derivative, diff --git a/meteor/scripts/common.py b/meteor/scripts/common.py index ce1877a..3b60d6d 100644 --- a/meteor/scripts/common.py +++ b/meteor/scripts/common.py @@ -1,18 +1,30 @@ +from __future__ import annotations + import argparse +import json import re from dataclasses import dataclass from enum import StrEnum, auto +from io import StringIO from pathlib import Path from typing import Any +import numpy as np +import pandas as pd import reciprocalspaceship as rs import structlog +from meteor.diffmaps import ( + compute_difference_map, + compute_kweighted_difference_map, + max_negentropy_kweighted_difference_map, +) from meteor.io import find_observed_amplitude_column, find_observed_uncertainty_column from meteor.rsmap import Map from meteor.scale import scale_maps from meteor.settings import COMPUTED_MAP_RESOLUTION_LIMIT, KWEIGHT_PARAMETER_DEFAULT from meteor.sfcalc import structure_file_to_calculated_map +from meteor.tv import TvDenoiseResult log = structlog.get_logger() @@ -64,41 +76,48 @@ class DiffmapArgParser(argparse.ArgumentParser): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - derivative_group = self.add_argument_group( - "derivative", - description=( - "The 'derivative' diffraction data, typically: light-triggered, ligand-bound, etc. " - ), + required_group = self.add_argument_group("required") + required_group.add_argument( + "derivative_mtz", + type=Path, + help="Path to MTZ containing the `derivative` data; positional arg (order matters).", + ) + required_group.add_argument( + "native_mtz", + type=Path, + help="Path to MTZ containing the `native` data; positional arg (order matters)", ) - derivative_group.add_argument("derivative_mtz", type=Path) - derivative_group.add_argument( + required_group.add_argument( + "-s", + "--structure", + type=Path, + required=True, + help="Specify CIF or PDB file path, for phases (usually a native model)", + ) + + labels_group = self.add_argument_group("mtz column labels (input)") + labels_group.add_argument( "-da", "--derivative-amplitude-column", type=str, default=INFER_COLUMN_NAME, help="specify the MTZ column for the amplitudes; will try to guess if not provided", ) - derivative_group.add_argument( + labels_group.add_argument( "-du", "--derivative-uncertainty-column", type=str, default=INFER_COLUMN_NAME, help="specify the MTZ column for the uncertainties; will try to guess if not provided", ) - - native_group = self.add_argument_group( - "native", - description=("The 'native' diffraction data, typically: dark, apo, etc."), - ) - native_group.add_argument("native_mtz", type=Path) - native_group.add_argument( + labels_group.add_argument( "-na", "--native-amplitude-column", type=str, default=INFER_COLUMN_NAME, help="specify the MTZ column for the amplitudes; will try to guess if not provided", ) - native_group.add_argument( + labels_group.add_argument( "-nu", "--native-uncertainty-column", type=str, @@ -106,23 +125,15 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: help="specify the MTZ column for the uncertainties; will try to guess if not provided", ) - self.add_argument( - "-s", - "--structure", - type=Path, - required=True, - help="Specify CIF or PDB file path, for phases (usually a native model). Required.", - ) - - self.add_argument( + output_group = self.add_argument_group("output") + output_group.add_argument( "-o", "--mtzout", type=Path, default=DEFAULT_OUTPUT_MTZ, help=f"Specify output MTZ file path. Default: {DEFAULT_OUTPUT_MTZ}.", ) - - self.add_argument( + output_group.add_argument( "-m", "--metadataout", type=Path, @@ -130,7 +141,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: help=f"Specify output metadata file path. Default: {DEFAULT_OUTPUT_METADATA_FILE}.", ) - self.add_argument( + kweight_group = self.add_argument_group("k weighting settings") + kweight_group.add_argument( "-k", "--kweight-mode", type=WeightMode, @@ -138,8 +150,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: choices=list(WeightMode), help="How to pick the k-parameter. Optimize means max negentropy. Default: `optimize`.", ) - - self.add_argument( + kweight_group.add_argument( "-w", "--kweight-parameter", type=float, @@ -235,3 +246,88 @@ def load_difference_maps(args: argparse.Namespace) -> DiffMapSet: mapset.scale() return mapset + + +def kweight_diffmap_according_to_mode( + *, mapset: DiffMapSet, kweight_mode: WeightMode, kweight_parameter: float | None = None +) -> tuple[Map, float | None]: + """ + Make and k-weight a difference map using a specified `WeightMode`. + + Three modes are possible to pick the k-parameter: + * `WeightMode.optimize`, max-negentropy value will and picked, this may take some time + * `WeightMode.fixed`, `kweight_parameter` is used + * `WeightMode.none`, then no k-weighting is done (note this is NOT equivalent to + kweight_parameter=0.0) + + Parameters + ---------- + mapset: DiffMapSet + The set of `derivative`, `native`, `computed` maps to use to compute the diffmap. + + kweight_mode: WeightMode + How to set the k-parameter: {optimize, fixed, none}. See above. If `fixed`, then + `kweight_parameter` is required. + + kweight_parameter: float | None + If kweight_mode == WeightMode.fixed, then this must be a float that specifies the + k-parameter to use. + + Returns + ------- + diffmap: meteor.rsmap.Map + The difference map, k-weighted if requested. + + kweight_parameter: float | None + The `kweight_parameter` used. Only really interesting if WeightMode.optimize. + """ + log.info("Computing difference map.") + + if kweight_mode == WeightMode.optimize: + diffmap, kweight_parameter = max_negentropy_kweighted_difference_map( + mapset.derivative, mapset.native + ) + log.info(" using negentropy optimized", kparameter=kweight_parameter) + if kweight_parameter is np.nan: + msg = "determined `k-parameter` is NaN, something went wrong..." + raise RuntimeError(msg) + + elif kweight_mode == WeightMode.fixed: + if not isinstance(kweight_parameter, float): + msg = f"`kweight_parameter` is type `{type(kweight_parameter)}`, must be `float`" + raise TypeError(msg) + + diffmap = compute_kweighted_difference_map( + mapset.derivative, mapset.native, k_parameter=kweight_parameter + ) + + log.info(" using fixed", kparameter=kweight_parameter) + + elif kweight_mode == WeightMode.none: + diffmap = compute_difference_map(mapset.derivative, mapset.native) + kweight_parameter = None + log.info(" requested no k-weighting") + + else: + raise InvalidWeightModeError(kweight_mode) + + return diffmap, kweight_parameter + + +def write_combined_metadata( + *, filename: Path, it_tv_metadata: pd.DataFrame, final_tv_metadata: TvDenoiseResult +) -> None: + combined_metadata = { + "iterative_tv": it_tv_metadata.to_json(orient="records", indent=4), + "final_tv_pass": final_tv_metadata.json(), + } + with filename.open("w") as f: + json.dump(combined_metadata, f, indent=4) + + +def read_combined_metadata(*, filename: Path) -> tuple[pd.DataFrame, TvDenoiseResult]: + with filename.open("r") as f: + combined_metadata = json.load(f) + it_tv_metadata = pd.read_json(StringIO(combined_metadata["iterative_tv"])) + final_tv_metadata = TvDenoiseResult.from_json(combined_metadata["final_tv_pass"]) + return it_tv_metadata, final_tv_metadata diff --git a/meteor/scripts/compute_difference_map.py b/meteor/scripts/compute_difference_map.py index e3df432..fd81cab 100644 --- a/meteor/scripts/compute_difference_map.py +++ b/meteor/scripts/compute_difference_map.py @@ -5,17 +5,17 @@ import numpy as np import structlog -from meteor.diffmaps import ( - compute_difference_map, - compute_kweighted_difference_map, - max_negentropy_kweighted_difference_map, -) from meteor.rsmap import Map from meteor.settings import MAP_SAMPLING, TV_WEIGHT_DEFAULT from meteor.tv import TvDenoiseResult, tv_denoise_difference_map from meteor.validate import negentropy -from .common import DiffmapArgParser, DiffMapSet, InvalidWeightModeError, WeightMode +from .common import ( + DiffmapArgParser, + InvalidWeightModeError, + WeightMode, + kweight_diffmap_according_to_mode, +) log = structlog.get_logger() @@ -23,7 +23,9 @@ class TvDiffmapArgParser(DiffmapArgParser): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - self.add_argument( + + tv_group = self.add_argument_group("TV denoising settings") + tv_group.add_argument( "-tv", "--tv-denoise-mode", type=WeightMode, @@ -34,7 +36,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: "negentropy. Default: `optimize`." ), ) - self.add_argument( + tv_group.add_argument( "-l", "--tv-weight", type=float, @@ -46,72 +48,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: ) -def kweight_diffmap_according_to_mode( - *, mapset: DiffMapSet, kweight_mode: WeightMode, kweight_parameter: float | None = None -) -> tuple[Map, float | None]: - """ - Make and k-weight a difference map using a specified `WeightMode`. - - Three modes are possible to pick the k-parameter: - * `WeightMode.optimize`, max-negentropy value will and picked, this may take some time - * `WeightMode.fixed`, `kweight_parameter` is used - * `WeightMode.none`, then no k-weighting is done (note this is NOT equivalent to - kweight_parameter=0.0) - - Parameters - ---------- - mapset: DiffMapSet - The set of `derivative`, `native`, `computed` maps to use to compute the diffmap. - - kweight_mode: WeightMode - How to set the k-parameter: {optimize, fixed, none}. See above. If `fixed`, then - `kweight_parameter` is required. - - kweight_parameter: float | None - If kweight_mode == WeightMode.fixed, then this must be a float that specifies the - k-parameter to use. - - Returns - ------- - diffmap: meteor.rsmap.Map - The difference map, k-weighted if requested. - - kweight_parameter: float | None - The `kweight_parameter` used. Only really interesting if WeightMode.optimize. - """ - log.info("Computing difference map.") - - if kweight_mode == WeightMode.optimize: - diffmap, kweight_parameter = max_negentropy_kweighted_difference_map( - mapset.derivative, mapset.native - ) - log.info(" using negentropy optimized", kparameter=kweight_parameter) - if kweight_parameter is np.nan: - msg = "determined `k-parameter` is NaN, something went wrong..." - raise RuntimeError(msg) - - elif kweight_mode == WeightMode.fixed: - if not isinstance(kweight_parameter, float): - msg = f"`kweight_parameter` is type `{type(kweight_parameter)}`, must be `float`" - raise TypeError(msg) - - diffmap = compute_kweighted_difference_map( - mapset.derivative, mapset.native, k_parameter=kweight_parameter - ) - - log.info(" using fixed", kparameter=kweight_parameter) - - elif kweight_mode == WeightMode.none: - diffmap = compute_difference_map(mapset.derivative, mapset.native) - kweight_parameter = None - log.info(" requested no k-weighting") - - else: - raise InvalidWeightModeError(kweight_mode) - - return diffmap, kweight_parameter - - def denoise_diffmap_according_to_mode( *, diffmap: Map, @@ -157,7 +93,7 @@ def denoise_diffmap_according_to_mode( "Optimal TV weight found", weight=metadata.optimal_tv_weight, initial_negentropy=f"{metadata.initial_negentropy:.2e}", - final_negetropy=f"{metadata.optimal_negentropy:.2e}", + final_negentropy=f"{metadata.optimal_negentropy:.2e}", ) elif tv_denoise_mode == WeightMode.fixed: @@ -174,7 +110,7 @@ def denoise_diffmap_according_to_mode( "Map TV-denoised with fixed weight", weight=tv_weight, initial_negentropy=f"{metadata.initial_negentropy:.2e}", - final_negetropy=f"{metadata.optimal_negentropy:.2e}", + final_negentropy=f"{metadata.optimal_negentropy:.2e}", ) elif tv_denoise_mode == WeightMode.none: diff --git a/meteor/scripts/compute_iterative_tv_map.py b/meteor/scripts/compute_iterative_tv_map.py new file mode 100644 index 0000000..83ad72f --- /dev/null +++ b/meteor/scripts/compute_iterative_tv_map.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from typing import Any + +import structlog + +from meteor.iterative import iterative_tv_phase_retrieval +from meteor.settings import ( + DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION, + ITERATIVE_TV_CONVERGENCE_TOLERANCE, + ITERATIVE_TV_MAX_ITERATIONS, +) +from meteor.tv import tv_denoise_difference_map + +from .common import DiffmapArgParser, kweight_diffmap_according_to_mode, write_combined_metadata + +log = structlog.get_logger() + + +class IterativeTvArgParser(DiffmapArgParser): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + it_tv_group = self.add_argument_group("iterative TV settings") + it_tv_group.add_argument( + "-x", + "--tv-weights-to-scan", + nargs="+", + type=float, + default=DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION, + help=( + "Choose what TV weights to evaluate at every iteration. Can be a single float." + f"Default: {DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION}." + ), + ) + it_tv_group.add_argument( + "--convergence-tolerance", + type=float, + default=ITERATIVE_TV_CONVERGENCE_TOLERANCE, + help=( + "If the average phase change drops below this value at each iteration, stop." + f"Default: {ITERATIVE_TV_CONVERGENCE_TOLERANCE}." + ), + ) + it_tv_group.add_argument( + "--max-iterations", + type=float, + default=ITERATIVE_TV_MAX_ITERATIONS, + help=( + "If the number of iterations exceeds this value, stop." + f"Default: {ITERATIVE_TV_MAX_ITERATIONS}." + ), + ) + + +def main(command_line_arguments: list[str] | None = None) -> None: + parser = IterativeTvArgParser( + description=( + "Compute an difference map, where the phases of the derivative structure are estimated " + "using the assumption that the resulting map should have a low total variation. Phases " + "are estimated using a crystallographic analog of the Gerchberg-Saxton algorithm, with " + "TV denoising as the real-space constraint.\n\n K-weighting can optionally be used to " + "weight the algorithm input. \n\n In the terminology adopted, this script computes a " + "`derivative` minus a `native` map, modifying the derivative phases. Native phases," + "typically from a model of the `native` data, are computed from a CIF/PDB model you " + "must provide." + ) + ) + args = parser.parse_args(command_line_arguments) + parser.check_output_filepaths(args) + mapset = parser.load_difference_maps(args) + + log.info("Launching iterative TV phase retrieval", tv_weights_to_scan=args.tv_weights_to_scan) + log.info("This will take time, typically minutes...") + new_derivative_map, it_tv_metadata = iterative_tv_phase_retrieval( + mapset.derivative, + mapset.native, + tv_weights_to_scan=args.tv_weights_to_scan, + convergence_tolerance=args.convergence_tolerance, + max_iterations=args.max_iterations, + verbose=True, + ) + mapset.derivative = new_derivative_map + log.info("Convergence.") + + diffmap, kparameter_used = kweight_diffmap_according_to_mode( + kweight_mode=args.kweight_mode, kweight_parameter=args.kweight_parameter, mapset=mapset + ) + + log.info("Final real-space TV denoising pass...", method="golden-section search") + log.info("This may take some time (up to a few minutes)...") + final_map, final_tv_metadata = tv_denoise_difference_map(diffmap, full_output=True) + + log.info( + "Optimal TV weight found", + weight=final_tv_metadata.optimal_tv_weight, + final_negentropy=round(final_tv_metadata.optimal_negentropy, 4), + ) + + log.info("Writing output.", file=str(args.mtzout)) + final_map.write_mtz(args.mtzout) + + log.info("Writing metadata.", file=str(args.metadataout)) + final_tv_metadata.k_parameter_used = kparameter_used + write_combined_metadata( + filename=args.metadataout, + it_tv_metadata=it_tv_metadata, + final_tv_metadata=final_tv_metadata, + ) + + +if __name__ == "__main__": + main() diff --git a/meteor/settings.py b/meteor/settings.py index 279b4e6..533b86c 100644 --- a/meteor/settings.py +++ b/meteor/settings.py @@ -1,12 +1,46 @@ from __future__ import annotations -BRACKET_FOR_GOLDEN_OPTIMIZATION: tuple[float, float] = (0.0, 0.05) -TV_STOP_TOLERANCE: float = 0.00000005 -TV_MAX_NUM_ITER: int = 50 +import numpy as np + +# map computation +COMPUTED_MAP_RESOLUTION_LIMIT: float = 1.0 +GEMMI_HIGH_RESOLUTION_BUFFER: float = 1e-6 MAP_SAMPLING: int = 3 + +# known map labels +OBSERVED_INTENSITY_COLUMNS: list[str] = [ + "I", # generic + "IMEAN", # CCP4 + "I-obs", # phenix +] +OBSERVED_AMPLITUDE_COLUMNS: list[str] = [ + "F", # generic + "FP", # CCP4 & GLPh native + r"FPH\d", # CCP4 derivative + "F-obs", # phenix +] +OBSERVED_UNCERTAINTY_COLUMNS: list[str] = [ + "SIGF", # generic + "SIGFP", # CCP4 & GLPh native + r"SIGFPH\d", # CCP4 +] +COMPUTED_AMPLITUDE_COLUMNS: list[str] = ["FC"] +COMPUTED_PHASE_COLUMNS: list[str] = ["PHIC"] + + +# k-weighting KWEIGHT_PARAMETER_DEFAULT: float = 0.05 +DEFAULT_KPARAMS_TO_SCAN = np.linspace(0.0, 1.0, 101) + + +# tv denoising TV_WEIGHT_DEFAULT: float = 0.01 +BRACKET_FOR_GOLDEN_OPTIMIZATION: tuple[float, float] = (0.0, 0.05) +TV_STOP_TOLERANCE: float = 0.00000005 # inner loop; not for iterative-tv phase retrieval +TV_MAX_NUM_ITER: int = 50 # inner loop; not for iterative-tv phase retrieval -COMPUTED_MAP_RESOLUTION_LIMIT: float = 1.0 -GEMMI_HIGH_RESOLUTION_BUFFER: float = 1e-6 +# iterative tv +ITERATIVE_TV_CONVERGENCE_TOLERANCE: float = 0.001 +ITERATIVE_TV_MAX_ITERATIONS: int = 100 +DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION: list[float] = [0.001, 0.01, 0.1] diff --git a/meteor/tv.py b/meteor/tv.py index 06f0aae..6909a85 100644 --- a/meteor/tv.py +++ b/meteor/tv.py @@ -200,9 +200,9 @@ def negentropy_objective(tv_weight: float) -> float: if full_output: initial_negentropy = negentropy(realspace_map_array) tv_result = TvDenoiseResult( - initial_negentropy=initial_negentropy, - optimal_tv_weight=maximizer.argument_optimum, - optimal_negentropy=maximizer.objective_maximum, + initial_negentropy=float(initial_negentropy), + optimal_tv_weight=float(maximizer.argument_optimum), + optimal_negentropy=float(maximizer.objective_maximum), map_sampling_used_for_tv=MAP_SAMPLING, tv_weights_scanned=maximizer.values_evaluated, negentropy_at_weights=maximizer.objective_at_values, diff --git a/meteor/utils.py b/meteor/utils.py index ccc9eb2..11055b5 100644 --- a/meteor/utils.py +++ b/meteor/utils.py @@ -85,7 +85,7 @@ def average_phase_diff_in_degrees(array1: np.ndarray, array2: np.ndarray) -> flo phase2 = np.rad2deg(np.angle(array2)) diff = phase2 - phase1 diff = (diff + 180) % 360 - 180 - return np.sum(np.abs(diff)) / float(np.prod(array1.shape)) + return float(np.sum(np.abs(diff)) / float(np.prod(array1.shape))) def complex_array_to_rs_dataseries( diff --git a/meteor/validate.py b/meteor/validate.py index 74678d1..36a946e 100644 --- a/meteor/validate.py +++ b/meteor/validate.py @@ -99,7 +99,7 @@ def __init__(self, *, objective: Callable[[float], float]) -> None: self.objective_at_values: list[float] = [] def _update_optima(self, argument_test_value: float) -> float: - objective_value = self.objective(argument_test_value) + objective_value = float(self.objective(argument_test_value)) if objective_value > self.objective_maximum: self.argument_optimum = argument_test_value self.objective_maximum = objective_value diff --git a/pyproject.toml b/pyproject.toml index 737e61a..1ae7a35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ build-backend = "setuptools.build_meta" [project.scripts] "meteor.diffmap" = "meteor.scripts.compute_difference_map:main" +"meteor.phaseboost" = "meteor.scripts.compute_iterative_tv_map:main" [tool.mypy] disallow_untyped_calls = true diff --git a/test/functional/test_compute_iterative_tv_map.py b/test/functional/test_compute_iterative_tv_map.py new file mode 100644 index 0000000..efff9d0 --- /dev/null +++ b/test/functional/test_compute_iterative_tv_map.py @@ -0,0 +1,63 @@ +from pathlib import Path + +import numpy as np +import reciprocalspaceship as rs + +from meteor.rsmap import Map +from meteor.scripts import compute_iterative_tv_map +from meteor.scripts.common import read_combined_metadata +from meteor.utils import filter_common_indices + + +def test_script_produces_consistent_results( + testing_pdb_file: Path, + testing_mtz_file: Path, + tmp_path: Path, +) -> None: + output_mtz = tmp_path / "test-output.mtz" + output_metadata = tmp_path / "test-output-metadata.csv" + + cli_args = [ + str(testing_mtz_file), # derivative + "--derivative-amplitude-column", + "F_on", + "--derivative-uncertainty-column", + "SIGF_on", + str(testing_mtz_file), # native + "--native-amplitude-column", + "F_off", + "--native-uncertainty-column", + "SIGF_off", + "--structure", + str(testing_pdb_file), + "-o", + str(output_mtz), + "-m", + str(output_metadata), + "-x", + "0.01", + "--max-iterations", + "5", + ] + + compute_iterative_tv_map.main(cli_args) + + iterative_tv_metadata, final_tv_metadata = read_combined_metadata(filename=output_metadata) + result_map = Map.read_mtz_file(output_mtz) + + # 1. make sure the negentropy increased during iterative TV + negentropy_over_iterations = iterative_tv_metadata["negentropy_after_tv"].to_numpy() + assert negentropy_over_iterations[-1] > negentropy_over_iterations[0] + + # 2. make sure negentropy increased in the final TV pass + assert final_tv_metadata.optimal_negentropy >= final_tv_metadata.initial_negentropy + + # 3. make sure computed DF are close to those stored on disk + reference_dataset = rs.read_mtz(str(testing_mtz_file)) + reference_amplitudes = reference_dataset["F_itTV"] + + result_amplitudes, reference_amplitudes = filter_common_indices( + result_map.amplitudes, reference_amplitudes + ) + rho = np.corrcoef(result_amplitudes.to_numpy(), reference_amplitudes.to_numpy())[0, 1] + assert rho > 0.95 diff --git a/test/unit/conftest.py b/test/unit/conftest.py index 2571bd5..9322ced 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -17,6 +17,19 @@ NP_RNG = np.random.default_rng() +@pytest.fixture +def tv_denoise_result_source_data() -> dict: + return { + "initial_negentropy": 0.0, + "optimal_tv_weight": 1.0, + "optimal_negentropy": 5.0, + "map_sampling_used_for_tv": 5, + "tv_weights_scanned": [0.0, 1.0], + "negentropy_at_weights": [0.0, 5.0], + "k_parameter_used": 0.0, + } + + @pytest.fixture def test_map_columns() -> MapColumns: return MapColumns( diff --git a/test/unit/scripts/conftest.py b/test/unit/scripts/conftest.py index cc080db..b7017ee 100644 --- a/test/unit/scripts/conftest.py +++ b/test/unit/scripts/conftest.py @@ -17,7 +17,7 @@ def diffmap_set(random_difference_map: Map) -> DiffMapSet: @pytest.fixture def fixed_kparameter() -> float: - return 0.75 + return 0.05 @pytest.fixture diff --git a/test/unit/scripts/test_common.py b/test/unit/scripts/test_common.py index 21b2ad4..7fb047b 100644 --- a/test/unit/scripts/test_common.py +++ b/test/unit/scripts/test_common.py @@ -10,7 +10,15 @@ import reciprocalspaceship as rs from meteor.rsmap import Map -from meteor.scripts.common import DiffmapArgParser, DiffMapSet, WeightMode +from meteor.scripts.common import ( + DiffmapArgParser, + DiffMapSet, + WeightMode, + kweight_diffmap_according_to_mode, + read_combined_metadata, + write_combined_metadata, +) +from meteor.tv import TvDenoiseResult def mocked_read_mtz(dummy_filename: str) -> rs.DataSet: @@ -43,7 +51,9 @@ def test_diffmap_set_scale(diffmap_set: DiffMapSet, use_uncertainties: bool) -> assert np.all(derivative_amps_before * 2 == diffmap_set.derivative["F"].to_numpy()) -def test_diffmap_argparser_parse_args(base_cli_arguments: list[str]) -> None: +def test_diffmap_argparser_parse_args( + base_cli_arguments: list[str], fixed_kparameter: float +) -> None: parser = DiffmapArgParser() args = parser.parse_args(base_cli_arguments) @@ -57,7 +67,7 @@ def test_diffmap_argparser_parse_args(base_cli_arguments: list[str]) -> None: assert args.mtzout == Path("fake-output.mtz") assert args.metadataout == Path("fake-output-metadata.csv") assert args.kweight_mode == WeightMode.fixed - assert args.kweight_parameter == 0.75 + assert args.kweight_parameter == fixed_kparameter def test_diffmap_argparser_check_output_filepaths( @@ -129,3 +139,38 @@ def return_a_map(*args: Any, **kwargs: Any) -> Map: assert isinstance(mapset.native, Map) assert isinstance(mapset.derivative, Map) assert isinstance(mapset.calculated, Map) + + +@pytest.mark.parametrize("mode", list(WeightMode)) +def test_kweight_diffmap_according_to_mode( + mode: WeightMode, diffmap_set: DiffMapSet, fixed_kparameter: float +) -> None: + # ensure the two maps aren't exactly the same to prevent numerical issues + diffmap_set.derivative.loc[0, diffmap_set.derivative._amplitude_column] += 1.0 + + diffmap, _ = kweight_diffmap_according_to_mode( + mapset=diffmap_set, kweight_mode=mode, kweight_parameter=fixed_kparameter + ) + assert len(diffmap) > 0 + assert isinstance(diffmap, Map) + + if mode == WeightMode.fixed: + with pytest.raises(TypeError): + _ = kweight_diffmap_according_to_mode( + mapset=diffmap_set, kweight_mode=mode, kweight_parameter=None + ) + + +def test_read_write_combined_metadata(tmp_path: Path, tv_denoise_result_source_data: dict) -> None: + filename = tmp_path / "tmp.json" + + fake_ittv_metadata = pd.DataFrame([1, 2, 3]) + fake_tv_metadata = TvDenoiseResult(**tv_denoise_result_source_data) + + write_combined_metadata( + filename=filename, it_tv_metadata=fake_ittv_metadata, final_tv_metadata=fake_tv_metadata + ) + obtained_ittv_metadata, obtained_tv_metadata = read_combined_metadata(filename=filename) + + pd.testing.assert_frame_equal(fake_ittv_metadata, obtained_ittv_metadata) + assert fake_tv_metadata == obtained_tv_metadata diff --git a/test/unit/scripts/test_compute_difference_map.py b/test/unit/scripts/test_compute_difference_map.py index 70452ad..ca0ad2c 100644 --- a/test/unit/scripts/test_compute_difference_map.py +++ b/test/unit/scripts/test_compute_difference_map.py @@ -7,15 +7,11 @@ import pytest -from meteor.rsmap import Map from meteor.scripts import compute_difference_map from meteor.scripts.common import DiffMapSet, WeightMode from meteor.scripts.compute_difference_map import ( TvDiffmapArgParser, - denoise_diffmap_according_to_mode, - kweight_diffmap_according_to_mode, ) -from meteor.tv import TvDenoiseResult TV_WEIGHT = 0.1 @@ -42,53 +38,6 @@ def test_tv_diffmap_parser(parsed_tv_cli_args: argparse.Namespace) -> None: assert parsed_tv_cli_args.tv_weight == TV_WEIGHT -@pytest.mark.parametrize("mode", list(WeightMode)) -def test_kweight_diffmap_according_to_mode( - mode: WeightMode, diffmap_set: DiffMapSet, fixed_kparameter: float -) -> None: - # ensure the two maps aren't exactly the same to prevent numerical issues - diffmap_set.derivative.amplitudes.iloc[0] += 1.0 - - diffmap, _ = kweight_diffmap_according_to_mode( - mapset=diffmap_set, kweight_mode=mode, kweight_parameter=fixed_kparameter - ) - assert len(diffmap) > 0 - assert isinstance(diffmap, Map) - - if mode == WeightMode.fixed: - with pytest.raises(TypeError): - _ = kweight_diffmap_according_to_mode( - mapset=diffmap_set, kweight_mode=mode, kweight_parameter=None - ) - - -@pytest.mark.parametrize("mode", list(WeightMode)) -def test_denoise_diffmap_according_to_mode(mode: WeightMode, random_difference_map: Map) -> None: - diffmap, metadata = denoise_diffmap_according_to_mode( - diffmap=random_difference_map, - tv_denoise_mode=mode, - tv_weight=TV_WEIGHT, - ) - - assert len(diffmap) > 0 - assert isinstance(diffmap, Map) - assert isinstance(metadata, TvDenoiseResult) - - if mode == WeightMode.optimize: - # random test; changes - assert 0.04 < metadata.optimal_tv_weight < 0.06 - - elif mode == WeightMode.fixed: - assert metadata.optimal_tv_weight == TV_WEIGHT - with pytest.raises(TypeError): - _, _ = denoise_diffmap_according_to_mode( - diffmap=random_difference_map, tv_denoise_mode=mode, tv_weight=None - ) - - elif mode == WeightMode.none: - assert metadata.optimal_tv_weight == 0.0 - - def test_main(diffmap_set: DiffMapSet, tmp_path: Path, fixed_kparameter: float) -> None: def mock_load_difference_maps(self: Any, args: argparse.Namespace) -> DiffMapSet: return diffmap_set diff --git a/test/unit/scripts/test_compute_iterative_tv_map.py b/test/unit/scripts/test_compute_iterative_tv_map.py new file mode 100644 index 0000000..94f229e --- /dev/null +++ b/test/unit/scripts/test_compute_iterative_tv_map.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import Any, Sequence +from unittest import mock + +import numpy as np +import pandas as pd +import pytest + +from meteor.rsmap import Map +from meteor.scripts import compute_iterative_tv_map +from meteor.scripts.common import DiffMapSet +from meteor.scripts.compute_iterative_tv_map import ( + IterativeTvArgParser, +) +from meteor.tv import TvDenoiseResult + +TV_WEIGHTS_TO_SCAN = [0.01, 0.05] + + +def mock_compute_it_tv( + derivative: Map, native: Map, *, tv_weights_to_scan: list[float], verbose: bool +) -> tuple[Map, pd.DataFrame]: + assert isinstance(derivative, Map) + assert isinstance(native, Map) + fake_map = derivative + fake_metadata = pd.DataFrame.from_dict( + { + "iteration": [0], + "tv_weight": [tv_weights_to_scan[0]], + "negentropy_after_tv": [0.1], + "average_phase_change": [0.0001], + } + ) + return fake_map, fake_metadata + + +def mock_tv_denoise_difference_map( + diffmap: Map, *, full_output: bool, weights_to_scan: Sequence[float] | np.ndarray | None = None +) -> tuple[Map, TvDenoiseResult]: + fake_metadata = TvDenoiseResult( + initial_negentropy=0.001, + optimal_tv_weight=0.1, + optimal_negentropy=1.0, + map_sampling_used_for_tv=3, + tv_weights_scanned=[0.1], + negentropy_at_weights=[ + 1.0, + ], + k_parameter_used=None, + ) + return diffmap, fake_metadata + + +@pytest.fixture +def tv_cli_arguments(base_cli_arguments: list[str]) -> list[str]: + new_cli_arguments = [ + "--tv-weights-to-scan", + *[str(weight) for weight in TV_WEIGHTS_TO_SCAN], + "--convergence-tolerance", + "0.1", + "--max-iterations", + "3", + ] + return [*base_cli_arguments, *new_cli_arguments] + + +@pytest.fixture +def parsed_tv_cli_args(tv_cli_arguments: list[str]) -> argparse.Namespace: + parser = IterativeTvArgParser() + return parser.parse_args(tv_cli_arguments) + + +def test_tv_diffmap_parser(parsed_tv_cli_args: argparse.Namespace) -> None: + assert parsed_tv_cli_args.tv_weights_to_scan == TV_WEIGHTS_TO_SCAN + + +def test_main(diffmap_set: DiffMapSet, tmp_path: Path, fixed_kparameter: float) -> None: + def mock_load_maps(self: Any, args: argparse.Namespace) -> DiffMapSet: + return diffmap_set + + output_mtz_path = tmp_path / "out.mtz" + output_metadata_path = tmp_path / "metadata.csv" + + cli_arguments = [ + "fake-derivative.mtz", + "fake-native.mtz", + "--structure", + "fake.pdb", + "-o", + str(output_mtz_path), + "-m", + str(output_metadata_path), + "--kweight-mode", + "fixed", + "--kweight-parameter", + str(fixed_kparameter), + "-x", + *[str(weight) for weight in TV_WEIGHTS_TO_SCAN], + "--convergence-tolerance", + "0.1", + "--max-iterations", + "3", + ] + + patch1 = mock.patch( + "meteor.scripts.compute_iterative_tv_map.IterativeTvArgParser.load_difference_maps", + mock_load_maps, + ) + patch2 = mock.patch( + "meteor.scripts.compute_iterative_tv_map.tv_denoise_difference_map", + mock_tv_denoise_difference_map, + ) + + with patch1, patch2: + compute_iterative_tv_map.main(cli_arguments) + + assert output_mtz_path.exists() + assert output_metadata_path.exists() diff --git a/test/unit/test_iterative.py b/test/unit/test_iterative.py index 44b19f6..b711253 100644 --- a/test/unit/test_iterative.py +++ b/test/unit/test_iterative.py @@ -76,6 +76,8 @@ def test_complex_derivative_from_iterative_tv() -> None: native=constant_image_ft, initial_derivative=test_image_noisy_ft, tv_denoise_function=simple_tv_function, + convergence_tolerance=0.001, + max_iterations=1000, ) denoised_test_image = np.fft.ifftn(denoised_derivative).real diff --git a/test/unit/test_tv.py b/test/unit/test_tv.py index d85a955..183ee25 100644 --- a/test/unit/test_tv.py +++ b/test/unit/test_tv.py @@ -26,19 +26,6 @@ def rms_between_coefficients(map1: Map, map2: Map) -> float: return float(np.linalg.norm(map2_array - map1_array)) -@pytest.fixture -def tv_denoise_result_source_data() -> dict: - return { - "initial_negentropy": 0.0, - "optimal_tv_weight": 1.0, - "optimal_negentropy": 5.0, - "map_sampling_used_for_tv": 5, - "tv_weights_scanned": [0.0, 1.0], - "negentropy_at_weights": [0.0, 5.0], - "k_parameter_used": 0.0, - } - - def test_tv_denoise_result(tv_denoise_result_source_data: dict) -> None: tdr_obj = tv.TvDenoiseResult(**tv_denoise_result_source_data) assert tv_denoise_result_source_data == asdict(tdr_obj) @@ -91,7 +78,7 @@ def test_tv_denoise_zero_weight(random_difference_map: Map) -> None: ) random_difference_map.canonicalize_amplitudes() output.canonicalize_amplitudes() - pd.testing.assert_frame_equal(random_difference_map, output, atol=1e-3, rtol=1e-2) + pd.testing.assert_frame_equal(random_difference_map, output, atol=1e-2, rtol=1e-2) @pytest.mark.parametrize("weights_to_scan", [None, DEFAULT_WEIGHTS_TO_SCAN])