Skip to content

Commit

Permalink
uses plate object to translate luigi filenames to positions
Browse files Browse the repository at this point in the history
  • Loading branch information
David Erb committed May 2, 2023
1 parent f6df723 commit bd12530
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 29 deletions.
34 changes: 18 additions & 16 deletions src/rockingester_lib/collectors/direct_poll.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from dls_utilpack.visit import VisitNotFound, get_xchem_directory
from PIL import Image

# Crystal plate object interface.
from xchembku_api.crystal_plate_objects.interface import (
Interface as CrystalPlateInterface,
)

# Dataface client context.
from xchembku_api.datafaces.context import Context as XchembkuDatafaceClientContext
from xchembku_api.models.crystal_plate_filter_model import CrystalPlateFilterModel
Expand Down Expand Up @@ -290,7 +295,7 @@ async def scrape_plate_directory_when_complete(
)

# Get all the well images in the plate directory.
well_names = [
subwell_names = [
entry.name for entry in os.scandir(plate_directory) if entry.is_file()
]

Expand All @@ -301,19 +306,20 @@ async def scrape_plate_directory_when_complete(

# Don't handle the plate directory until all images have arrived.
# TODO: Put in some kind of failsafe in direct_poll.py to handle case where all the well images never arrive.
if len(well_names) < crystal_plate_object.get_well_count():
if len(subwell_names) < crystal_plate_object.get_well_count():
return

# Sort wells by name so that tests are deterministic.
well_names.sort()
subwell_names.sort()

crystal_well_models: List[CrystalWellModel] = []
for well_name in well_names:
for subwell_name in subwell_names:
# Make the well model, including image width/height.
crystal_well_model = await self.ingest_well(
plate_directory,
well_name,
subwell_name,
crystal_plate_model,
crystal_plate_object,
target,
)

Expand All @@ -331,7 +337,7 @@ async def scrape_plate_directory_when_complete(
)

logger.info(
f"copied {len(well_names)} well images from plate {plate_directory.name} to {target}"
f"copied {len(subwell_names)} well images from plate {plate_directory.name} to {target}"
)

# Remember we "handled" this one.
Expand All @@ -341,8 +347,9 @@ async def scrape_plate_directory_when_complete(
async def ingest_well(
self,
plate_directory: Path,
well_name: str,
subwell_name: str,
crystal_plate_model: CrystalPlateModel,
crystal_plate_object: CrystalPlateInterface,
target: Path,
) -> CrystalWellModel:
"""
Expand All @@ -351,17 +358,12 @@ async def ingest_well(
Move the well image file to the ingested area.
"""

input_well_filename = plate_directory / well_name
ingested_well_filename = target / well_name
input_well_filename = plate_directory / subwell_name
ingested_well_filename = target / subwell_name

# Stems are like "9acx_01A_1".
# TODO: Improve safety by ignoring wrongly formatted and non-jpg well filenames.
parts = Path(well_name).stem.split("_")
if len(parts) > 1:
# Strip off the leading 4-letter barcode and underscore.
position = "".join(parts[1:])
else:
position = parts[0]
# Convert the stem into a position as shown in soakdb3.
position = crystal_plate_object.normalize_subwell_name(subwell_name)

error = None
try:
Expand Down
44 changes: 31 additions & 13 deletions tests/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,12 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
# Make the plate on which the wells reside.
visit = "cm00001-1_otherstuff"
created_crystal_plate_models = []

scrabable_barcode = "98ab"
created_crystal_plate_models.append(
CrystalPlateModel(
formulatrix__plate__id=10,
barcode="98ab",
barcode=scrabable_barcode,
visit=visit,
thing_type=CrystalPlateObjectThingTypes.SWISS3,
)
Expand Down Expand Up @@ -183,8 +185,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
# This one gets scraped as normal.
plate_directory1 = plates_directory / "98ab_2023-04-06_RI1000-0276-3drop"
plate_directory1.mkdir(parents=True)
for i in range(10, 10 + scrapable_image_count):
filename = plate_directory1 / ("98ab_%03dA_1.jpg" % (i))
for i in range(scrapable_image_count):
filename = plate_directory1 / self.__subwell_filename(scrabable_barcode, i)
with open(filename, "w") as stream:
stream.write("")

Expand All @@ -195,8 +197,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
)
plate_directory2.mkdir(parents=True)
nobarcode_image_count = 3
for i in range(10, 10 + nobarcode_image_count):
filename = plate_directory2 / ("%s_%03dA_1.jpg" % (nobarcode_barcode, i))
for i in range(nobarcode_image_count):
filename = plate_directory2 / self.__subwell_filename(nobarcode_barcode, i)
with open(filename, "w") as stream:
stream.write("")

Expand All @@ -207,8 +209,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
)
plate_directory3.mkdir(parents=True)
novisit_image_count = 6
for i in range(10, 10 + novisit_image_count):
filename = plate_directory3 / ("%s_%03dA_1.jpg" % (novisit_barcode, i))
for i in range(novisit_image_count):
filename = plate_directory3 / self.__subwell_filename(novisit_barcode, i)
with open(filename, "w") as stream:
stream.write("")

Expand All @@ -219,8 +221,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
)
plate_directory4.mkdir(parents=True)
excluded_image_count = 2
for i in range(10, 10 + excluded_image_count):
filename = plate_directory4 / ("%s_%03dA_1.jpg" % (excluded_barcode, i))
for i in range(excluded_image_count):
filename = plate_directory4 / self.__subwell_filename(excluded_barcode, i)
with open(filename, "w") as stream:
stream.write("")

Expand Down Expand Up @@ -259,10 +261,8 @@ async def __run_part1(self, scrapable_image_count, constants, output_directory):
), "images after scraping"

# Make sure the positions got recorded right in the wells.
i = 10
for crystal_well_model in crystal_well_models:
assert crystal_well_model.position == "%03dA1" % (i)
i += 1
assert crystal_well_models[0].position == "A01a"
assert crystal_well_models[-1].position == "H12d"

# The first "scrapable" plate directory should still exist.
count = sum(1 for _ in plate_directory1.glob("*") if _.is_file())
Expand Down Expand Up @@ -305,3 +305,21 @@ async def __run_part2(self, scrapable_image_count, constants, output_directory):
records = await xchembku.fetch_crystal_wells_filenames()

assert len(records) == scrapable_image_count, "images after restarting scraper"

# ----------------------------------------------------------------------------------------

def __subwell_filename(self, barcode, index):
"""
Make a subwell image name which can be parsed by swiss3.
"""

well_letters = "ABCDEFGH"

well = int(index / 3)
subwell = index % 3 + 1
row = well_letters[int(well / 12)]
col = "%02d" % (well % 12 + 1)

subwell_filename = f"{barcode}_{col}{row}_{subwell}"

return subwell_filename

0 comments on commit bd12530

Please sign in to comment.