adding multi-threading, tolerance config

stashapp · axxeman23 · Aug 19, 2022 · Aug 19, 2022 · Aug 19, 2022 · Aug 23, 2022
commit a1ff79552b4499747d95cce46f08054ec7375bf9
diff --git a/plugins/starIdentifier/README.md b/plugins/starIdentifier/README.md
@@ -9,6 +9,7 @@ Star Identifier uses [facial recognition](https://github.com/ageitgey/face_recog
 ## Requirements
 
 ### Python3
+__version: 3.10.x +__
 
 #### Installing Python
 
@@ -66,6 +67,10 @@ Star Identifier uses a tag to find images or scenes you would like identified. B
 
 Since the recognition is based on a single performer image, that image needs to have a pretty clear front-facing view of the performer's face. If face_recognition fails to find a performer's face, Star Identifier will tag that performer with `star identifier performer error` by default.
 
+### Star Identifier Settings
+
+You can adjust the tolerance for identification here. `0.6` is default and typical, but I've found `0.5` to work well. Lower is more strict.
+
 ## Running
 
 ### Export Performers
@@ -80,4 +85,8 @@ This loads all images in the stash database tagged with `star identifier` (by de
 
 ### Identify Scene Screenshots
 
-This loads all scene screenshots in the stash database tagged with `star identifier` (by default), compares the recognized faces to the exported face database, and then adds all potential matches to those scenes as performers.
+This loads the screenshot for every scene in the stash database tagged with `star identifier` (by default), compares the recognized faces to the exported face database, and then adds all potential matches to those scenes as performers.
+
+## Upcoming roadmap
+
+See [issues](https://github.com/axxeman23/star_identifier/issues)
diff --git a/plugins/starIdentifier/star_identifier.py b/plugins/starIdentifier/star_identifier.py
@@ -5,6 +5,7 @@
 import sys
 import os
 import pathlib
+from concurrent.futures import ProcessPoolExecutor
 
 # external
 import urllib.request
@@ -130,8 +131,6 @@ def debug_func(client):
 #
 
 def export_known(client):
-  # This would be faster multi-threaded, but that seems to break face_recognition
-
   log.LogInfo('Getting all performer images...')
 
   performers = client.getPerformerImages()
@@ -151,19 +150,24 @@ def export_known(client):
 
   log.LogInfo('Starting performer image export (this might take a while)')
 
-  for performer in performers:
-    log.LogProgress(count / total)
+  futures_list = []
+
+  with ProcessPoolExecutor(max_workers=10) as executor:
+    for performer in performers:
+      futures_list.append(executor.submit(encode_performer_from_url, performer))
 
-    image = face_recognition.load_image_file(urllib.request.urlopen(performer['image_path']))
-    try:
-      encoding = face_recognition.face_encodings(image)[0]
-      outputDict[performer['id']] = encoding
-    except IndexError:
-      log.LogInfo(f"No face found for {performer['name']}")
-      errorList.append(performer)
+    for future in futures_list:
+      log.LogProgress(count / total)
 
-    count += 1
+      try:
+        result = future.result()
+        outputDict[result['id']] = result['encodings']
+      except IndexError:
+        log.LogInfo(f"No face found for {result['name']}")
+        errorList.append({ 'id': result['id'], 'name': result['name'] })
 
+      count += 1
+
   np.savez(encodings_path, **outputDict)
   json_print(errorList, errors_path)
 
@@ -179,33 +183,75 @@ def export_known(client):
 # Facial recognition functions
 #
 
+# Encoding
+
+def encode_performer_from_url(performer):
+  image = face_recognition.load_image_file(urllib.request.urlopen(performer['image_path']))
+  performer['encodings'] = face_recognition.face_encodings(image)[0]
+  return performer
+
+
+# Matching
+
+def get_recognized_ids_from_image(image, known_face_encodings, ids):
+  image['matched_ids'] = get_recognized_ids(face_recognition.load_image_file(image['path']), known_face_encodings, ids)
+
+  return image
 
-def get_recognized_ids_from_path(image_path, known_face_encodings, ids):
-  return get_recognized_ids(face_recognition.load_image_file(image_path), known_face_encodings, ids)
+def get_recognized_ids_from_scene_screenshot(scene, known_face_encodings, ids):
+  image = urllib.request.urlopen(scene['paths']['screenshot'])
+  scene['matched_ids'] = get_recognized_ids(face_recognition.load_image_file(image), known_face_encodings, ids)
 
-def get_recognized_ids_from_url(image_url, known_face_encodings, ids):
-  image = urllib.request.urlopen(image_url)
-  return get_recognized_ids(face_recognition.load_image_file(image), known_face_encodings, ids)
+  return scene
 
 def get_recognized_ids(image_file, known_face_encodings, ids):
   unknown_face_encodings = face_recognition.face_encodings(image_file)
 
   recognized_ids = np.empty((0,0), int)
 
   for unknown_face in unknown_face_encodings:
-    results = face_recognition.compare_faces(known_face_encodings, unknown_face)
+    results = face_recognition.compare_faces(known_face_encodings, unknown_face, tolerance=config.tolerance)
 
     recognized_ids = np.append(recognized_ids, [ids[i] for i in range(len(results)) if results[i] == True])
 
   return np.unique(recognized_ids).tolist()
 
+# Execution
+
+def execute_identification_list(known_face_encodings, ids, args):
+  count = 0
+  futures_list = []
+
+  with ProcessPoolExecutor(max_workers=10) as executor:
+    for item in args['items']:
+      futures_list.append(executor.submit(args['executor_func'], *[item, known_face_encodings, ids]))
+
+    for future in futures_list:
+      log.LogProgress(count / args['total'])
+
+      debug_print(future)
+
+      try:
+        result = future.result()
+
+        if not len(result['matched_ids']):
+          log.LogInfo(f"No matching performer found for {args['name']} id {result['id']}. Moving on to next {args['name']}...")
+        else:
+          log.LogDebug(f"updating {args['name']} {result['id']} with ")
+          args['submit_func'](result['id'], result['matched_ids'])
+      except IndexError:
+        log.LogError(f"No face found in tagged {args['name']} id {result['id']}. Moving on to next {args['name']}...")
+      except:
+        log.LogError(f"Unknown error comparing tagged {args['name']} id {result['id']}. Moving on to next {args['name']}...")
+
+      count += 1
+
 # Imgs
 
 def identify_imgs(client, ids, known_face_encodings):
   log.LogInfo(f"Getting images tagged with '{config.tag_name_identify}'...")
 
   images = client.findImages(get_scrape_tag_filter(client))
-  count = 0
   total = len(images)
 
   if not total:
@@ -214,28 +260,19 @@ def identify_imgs(client, ids, known_face_encodings):
 
   log.LogInfo(f"Found {total} tagged images. Starting identification...")
 
-  for image in images:
-    log.LogProgress(count / total)
-
-    try:
-      matching_performer_ids = get_recognized_ids_from_path(image['path'], known_face_encodings, ids)
-    except IndexError:
-      log.LogError(f"No face found in tagged image id {image['id']}. Moving on to next image...")
-      continue
-    except:
-      log.LogError(f"Unknown error comparing tagged image id {image['id']}. Moving on to next image...")
-      continue
-
-    if not len(matching_performer_ids):
-      log.LogInfo(f"No matching performer found for image id {image['id']}. Moving on to next image...")
-      continue
-
-    client.updateImage({
-      'id': image['id'],
-      'performer_ids': matching_performer_ids
-    })
+  execution_args = {
+    'name': 'image',
+    'items': images,
+    'total': total,
+    'executor_func': get_recognized_ids_from_image,
+    'submit_func': client.addPerformersToImage
+  }
 
-    count += 1
+  execute_identification_list(
+    known_face_encodings, 
+    ids,
+    execution_args
+    )
 
   log.LogInfo('Image identification complete!')
 
@@ -245,7 +282,6 @@ def identify_scene_screenshots(client, ids, known_face_encodings):
   log.LogInfo(f"Getting scenes tagged with '{config.tag_name_identify}'...")
 
   scenes = client.getScenePaths(get_scrape_tag_filter(client))
-  count = 0
   total = len(scenes)
 
   if not total:
@@ -254,34 +290,24 @@ def identify_scene_screenshots(client, ids, known_face_encodings):
 
   log.LogInfo(f"Found {total} tagged scenes. Starting identification...")
 
-  for scene in scenes:
-    log.LogProgress(count / total)
-
-    matching_performer_ids = np.empty((0,0), int)
-    screenshot = scene['paths']['screenshot']
-
-    try:
-      matches = get_recognized_ids_from_url(screenshot, known_face_encodings, ids)
-      log.LogInfo(f"{len(matches)} performers identified in scene id {scene['id']}'s screenshot")
-      matching_performer_ids = np.append(matching_performer_ids, matches)
-    except IndexError:
-      log.LogError(f"No face found in screenshot for scene id {scene['id']}. Moving on to next image...")
-      continue
-    except Exception as error:
-      log.LogError(f"Error type = {type(error).__name__} comparing screenshot for scene id {scene['id']}. Moving on to next image...")
-      continue
-
-    matching_performer_ids = np.unique(matching_performer_ids).tolist()
-
-    log.LogDebug(f"Found performers in scene id {scene['id']} : {matching_performer_ids}")
+  execution_args = {
+    'name': 'scene',
+    'items': scenes,
+    'total': total,
+    'executor_func': get_recognized_ids_from_scene_screenshot,
+    'submit_func': client.addPerformersToScene
+  }
 
-    client.addPerformersToScene(scene['id'], matching_performer_ids)
+  execute_identification_list(
+    known_face_encodings, 
+    ids,
+    execution_args
+    )
 
-    count += 1
-
-  log.LogInfo("Screenshot identification complete!")
+  log.LogInfo("Scene screenshot identification complete!")
 
-main()
+if __name__ == "__main__":
+  main()
 
 
 # https://github.com/ageitgey/face_recognition

diff --git a/plugins/starIdentifier/star_identifier_config.py b/plugins/starIdentifier/star_identifier_config.py
@@ -16,4 +16,12 @@
 
 # If the identifier can't find a face for a performer, 
 # it will add this tag to that performer
-tag_name_encoding_error = 'star identifier performer error'
+tag_name_encoding_error = 'star identifier performer error'
+
+#
+# Star Identifier Settings
+#
+
+# Tolerance: How much distance between faces to consider it a match. 
+# Lower is more strict. 0.6 is typical best performance.
+tolerance = 0.6
diff --git a/plugins/starIdentifier/star_identifier_interface.py b/plugins/starIdentifier/star_identifier_interface.py
@@ -257,6 +257,12 @@ def updateImage(self, image_data):
 
 		self.__callGraphQL(query, variables)
 
+	def addPerformersToImage(self, image_id, performer_ids):
+		self.updateImage({
+			'id': image_id,
+			'performer_ids': performer_ids
+		})
+
 	def bulkPerformerAddTags(self, performer_ids, tag_ids):
 		query = """
 		mutation($ids: [ID!], $tag_ids: BulkUpdateIds) {