diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index 0cb4402c8..469cd8d50 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -293,6 +293,50 @@ "print(\"\\n\".join(str(p) for p in outputs.out_file))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Executing tasks in parallel\n", + "\n", + "By default, Pydra will use the *debug* worker, which executes each task sequentially.\n", + "This makes it easier to debug tasks and workflows, however, in most cases, once a workflow\n", + "is ready to go, a concurrent worker is preferable so tasks can be executed in parallel\n", + "(see [Workers](./2-advanced-execution.html#Workers)). To use multiple processes on a\n", + "workstation, select the `cf` worker option when executing the task/workflow.\n", + "\n", + "Note that when multiprocessing in Python on Windows and macOS (and good practice on Linux/POSIX\n", + "OSs for compatibility), you need to place a `if __name__ == \"__main__\"` block when\n", + "executing in top-level scripts to allow the script to be imported, but not executed,\n", + "by subprocesses." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nifti_dir' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;66;03m# <-- Add this block to allow the script to imported by subprocesses\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m MrGrid(operation\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregrid\u001b[39m\u001b[38;5;124m\"\u001b[39m, voxel\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\u001b[38;5;241m.\u001b[39msplit(in_file\u001b[38;5;241m=\u001b[39m\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 5\u001b[0m outputs \u001b[38;5;241m=\u001b[39m mrgrid(worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcf\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# <-- Select the \"cf\" worker here\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(p) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m outputs\u001b[38;5;241m.\u001b[39mout_file))\n", + "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" + ] + } + ], + "source": [ + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "if __name__ == \"__main__\": # <-- Add this block to allow the script to imported by subprocesses\n", + " mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", + " outputs = mrgrid(worker=\"cf\") # <-- Select the \"cf\" worker here\n", + " print(\"\\n\".join(str(p) for p in outputs.out_file))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 581ac71f4..7d54fd55b 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -64,38 +64,21 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" - ] - }, - { - "ename": "RuntimeError", - "evalue": "Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m ten_to_the_power \u001b[38;5;241m=\u001b[39m TenToThePower()\u001b[38;5;241m.\u001b[39msplit(p\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m])\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Run the 5 tasks in parallel split across 3 processes\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mten_to_the_power\u001b[49m\u001b[43m(\u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_procs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 14\u001b[0m p1, p2, p3, p4, p5 \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mout\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m10^5 = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp5\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:194\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, **kwargs)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter(\n\u001b[1;32m 184\u001b[0m audit_flags\u001b[38;5;241m=\u001b[39maudit_flags,\n\u001b[1;32m 185\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 193\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[0;32m--> 194\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__notes__\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m WORKER_KWARG_FAIL_NOTE \u001b[38;5;129;01min\u001b[39;00m e\u001b[38;5;241m.\u001b[39m__notes__:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:156\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, task_def)\u001b[0m\n\u001b[1;32m 154\u001b[0m task \u001b[38;5;241m=\u001b[39m Task(task_def, submitter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask\u001b[39m\u001b[38;5;124m\"\u001b[39m, environment\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment)\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async: \u001b[38;5;66;03m# Only workflow tasks can be async\u001b[39;00m\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_async\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 316\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:51\u001b[0m, in \u001b[0;36mWorker.run_async\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m, rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39mrun_async(rerun\u001b[38;5;241m=\u001b[39mrerun)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:416\u001b[0m, in \u001b[0;36mTask.run_async\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39m_run_async(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 417\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:709\u001b[0m, in \u001b[0;36mWorkflowDef._run_async\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 708\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow asynchronously.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 709\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39msubmitter\u001b[38;5;241m.\u001b[39mexpand_workflow_async(task)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:285\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow_async\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hashes_have_changed:\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 274\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSet loglevel to \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdebug\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m in order to track hash changes \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthroughout the execution of the workflow.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor more types in your interface inputs.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 284\u001b[0m )\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(msg)\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async:\n", - "\u001b[0;31mRuntimeError\u001b[0m: Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n" + "[TenToThePower(p=1, function=), TenToThePower(p=2, function=), TenToThePower(p=3, function=), TenToThePower(p=4, function=), TenToThePower(p=5, function=)]\n", + "8ebcb54492b1642d6ea257afdb33786d\n", + "8ebcb54492b1642d6ea257afdb33786d\n" ] } ], "source": [ + "\n", "\n", "from pydra.design import python\n", "\n", @@ -107,12 +90,37 @@ "\n", " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", "\n", - " # Run the 5 tasks in parallel split across 3 processes\n", - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + " from pydra.design import workflow\n", + " from pydra.engine.core import Workflow\n", + " from pydra.engine.specs import TaskDef\n", + " from pydra.engine.helpers import list_fields\n", + "\n", + " output_types = {o.name: list[o.type] for o in list_fields(ten_to_the_power.Outputs)}\n", + " \n", + " @workflow.define(outputs=output_types)\n", + " def Split(defn: TaskDef):\n", + " node = workflow.add(defn)\n", + " return tuple(getattr(node, o) for o in output_types)\n", + "\n", + " split = Split(defn=ten_to_the_power)\n", + "\n", + " wf = Workflow.construct(split)\n", + " splits = list(wf[\"TenToThePower\"]._split_definition().values())\n", + "\n", + " print(splits)\n", + "\n", + " print(splits[0]._hash)\n", + " print(splits[0]._hash)\n", + " \n", + "\n", + " \n", + "\n", + " # # Run the 5 tasks in parallel split across 3 processes\n", + " # outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", "\n", - " p1, p2, p3, p4, p5 = outputs.out\n", + " # p1, p2, p3, p4, p5 = outputs.out\n", "\n", - " print(f\"10^5 = {p5}\")" + " # print(f\"10^5 = {p5}\")" ] }, { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 60000fd95..2a91a6edb 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -9,7 +9,7 @@ def TenToThePower(p: int) -> int: ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) # Run the 5 tasks in parallel split across 3 processes - outputs = ten_to_the_power(worker="cf", n_procs=3) + outputs = ten_to_the_power(worker="cf", n_procs=3, clean_stale_locks=True) p1, p2, p3, p4, p5 = outputs.out diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 292235acd..7213881d5 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -3,6 +3,7 @@ import json import logging import os +import inspect import sys from pathlib import Path import typing as ty @@ -23,6 +24,7 @@ from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from .node import Node +from datetime import datetime from fileformats.generic import FileSet from .specs import ( RuntimeSpec, @@ -458,6 +460,13 @@ def done(self): return True return False + @property + def run_start_time(self) -> datetime | None: + """Check whether the task is currently running.""" + if not self.lockfile.exists(): + return None + return datetime.fromtimestamp(self.lockfile.stat().st_ctime) + def _combined_output(self, return_inputs=False): combined_results = [] for gr, ind_l in self.state.final_combined_ind_mapping.items(): @@ -523,7 +532,7 @@ def _check_for_hash_changes(self): field = getattr(attr.fields(type(self.definition)), changed) val = getattr(self.definition, changed) field_type = type(val) - if issubclass(field.type, FileSet): + if inspect.isclass(field.type) and issubclass(field.type, FileSet): details += ( f"- {changed}: value passed to the {field.type} field is of type " f"{field_type} ('{val}'). If it is intended to contain output data " diff --git a/pydra/engine/state.py b/pydra/engine/state.py index c97d71a53..ef65487ca 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -39,6 +39,11 @@ def __init__(self, indices: dict[str, int] | None = None): else: self.indices = OrderedDict(sorted(indices.items())) + def __repr__(self): + return ( + "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")" + ) + def __hash__(self): return hash(tuple(self.indices.items())) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index b9af10488..40fef6d8f 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -7,6 +7,7 @@ from pathlib import Path from tempfile import mkdtemp from copy import copy +from datetime import datetime from collections import defaultdict from .workers import Worker, WORKERS from .graph import DiGraph @@ -21,7 +22,7 @@ from .core import Task from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils import user_cache_dir - +from pydra.design import workflow import logging logger = logging.getLogger("pydra.submitter") @@ -62,20 +63,37 @@ class Submitter: Messengers, by default None messenger_args : dict, optional Messenger arguments, by default None + clean_stale_locks : bool, optional + Whether to clean stale lock files, i.e. lock files that were created before the + start of the current run. Don't set if using a global cache where there are + potentially multiple workflows that are running concurrently. By default (None), + lock files will be cleaned if the *debug* worker is used **kwargs : dict Keyword arguments to pass on to the worker initialisation """ + cache_dir: os.PathLike + worker: Worker + environment: "Environment | None" + rerun: bool + cache_locations: list[os.PathLike] + audit_flags: AuditFlag + messengers: ty.Iterable[Messenger] + messenger_args: dict[str, ty.Any] + clean_stale_locks: bool + run_start_time: datetime | None + def __init__( self, cache_dir: os.PathLike | None = None, - worker: ty.Union[str, ty.Type[Worker]] = "debug", + worker: str | ty.Type[Worker] | Worker = "debug", environment: "Environment | None" = None, rerun: bool = False, cache_locations: list[os.PathLike] | None = None, audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, + clean_stale_locks: bool | None = None, **kwargs, ): @@ -113,6 +131,12 @@ def __init__( except TypeError as e: e.add_note(WORKER_KWARG_FAIL_NOTE) raise + self.run_start_time = None + self.clean_stale_locks = ( + clean_stale_locks + if clean_stale_locks is not None + else (self.worker_name == "debug") + ) self.worker_kwargs = kwargs self._worker.loop = self.loop @@ -133,18 +157,16 @@ def __call__( task_def._check_rules() # If the outer task is split, create an implicit workflow to hold the split nodes if task_def._splitter: - - from pydra.design import workflow from pydra.engine.specs import TaskDef output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} @workflow.define(outputs=output_types) - def Split(defn: TaskDef): + def Split(defn: TaskDef, output_types: dict): node = workflow.add(defn) return tuple(getattr(node, o) for o in output_types) - task_def = Split(defn=task_def) + task_def = Split(defn=task_def, output_types=output_types) elif task_def._combiner: raise ValueError( @@ -152,17 +174,23 @@ def Split(defn: TaskDef): "Use the `split` method to split the task before combining." ) task = Task(task_def, submitter=self, name="task", environment=self.environment) - if task.is_async: # Only workflow tasks can be async - self.loop.run_until_complete(self.worker.run_async(task, rerun=self.rerun)) - else: - self.worker.run(task, rerun=self.rerun) + try: + self.run_start_time = datetime.now() + if task.is_async: # Only workflow tasks can be async + self.loop.run_until_complete( + self.worker.run_async(task, rerun=self.rerun) + ) + else: + self.worker.run(task, rerun=self.rerun) + finally: + self.run_start_time = None PersistentCache().clean_up() result = task.result() if result is None: if task.lockfile.exists(): raise RuntimeError( f"Task {task} has a lockfile, but no result was found. " - "This may be due to another submission process running, or the hard " + "This may be due to another submission process queued, or the hard " "interrupt (e.g. a debugging abortion) interrupting a previous run. " f"In the case of an interrupted run, please remove {str(task.lockfile)!r} " "and resubmit." @@ -228,18 +256,30 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non # this might be related to some delays saving the files # so try to get_runnable_tasks for another minute ii = 0 - while not tasks and exec_graph.nodes: + while not tasks and any(not n.done for n in exec_graph.nodes): tasks = self.get_runnable_tasks(exec_graph) ii += 1 # don't block the event loop! await asyncio.sleep(1) - if ii > 60: + if ii > 10: + not_done = "\n".join( + ( + f"{n.name}: started={bool(n.started)}, " + f"blocked={list(n.blocked)}, queued={list(n.queued)}" + ) + for n in exec_graph.nodes + if not n.done + ) msg = ( - f"Graph of '{wf}' workflow is not empty, but not able to get " - "more tasks - something has gone wrong when retrieving the " - "results predecessors:\n\n" + "Something has gone wrong when retrieving the predecessor " + f"results. Not able to get any more tasks but he following " + f"nodes of the {wf.name!r} workflow are not done:\n{not_done}\n\n" ) - # Get blocked tasks and the predecessors they are waiting on + not_done = [n for n in exec_graph.nodes if not n.done] + msg += "\n" + ", ".join( + f"{t.name}: {t.done}" for t in not_done[0].queued.values() + ) + # Get blocked tasks and the predecessors they are blocked on outstanding: dict[Task[DefType], list[Task[DefType]]] = { t: [ p for p in exec_graph.predecessors[t.name] if not p.done @@ -248,11 +288,11 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non } hashes_have_changed = False - for task, waiting_on in outstanding.items(): - if not waiting_on: + for task, blocked_on in outstanding.items(): + if not blocked_on: continue msg += f"- '{task.name}' node blocked due to\n" - for pred in waiting_on: + for pred in blocked_on: if ( pred.checksum != wf.inputs._graph_checksums[pred.name] @@ -302,13 +342,21 @@ def close(self): """ Close submitter. - Do not close previously running loop. + Do not close previously queued loop. """ self.worker.close() if self._own_loop: self.loop.close() + def _check_locks(self, tasks: list[Task]) -> None: + """Check for stale lock files and remove them.""" + if self.clean_stale_locks: + for task in tasks: + start_time = task.run_start_time + if start_time and start_time < self.run_start_time: + task.lockfile.unlink() + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """Parse a graph and return all runnable tasks. @@ -338,6 +386,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: if not node.started: not_started.add(node) tasks.extend(node.get_runnable_tasks(graph)) + self._check_locks(tasks) return tasks @property @@ -369,10 +418,12 @@ class NodeExecution(ty.Generic[DefType]): errored: dict[StateIndex | None, "Task[DefType]"] # List of tasks that couldn't be run due to upstream errors unrunnable: dict[StateIndex | None, list["Task[DefType]"]] - # List of tasks that are running - running: dict[StateIndex | None, "Task[DefType]"] - # List of tasks that are waiting on other tasks to complete before they can be run - waiting: dict[StateIndex | None, "Task[DefType]"] + # List of tasks that are queued + queued: dict[StateIndex | None, "Task[DefType]"] + # List of tasks that are queued + running: dict[StateIndex | None, tuple["Task[DefType]", datetime]] + # List of tasks that are blocked on other tasks to complete before they can be run + blocked: dict[StateIndex | None, "Task[DefType]"] _tasks: dict[StateIndex | None, "Task[DefType]"] | None @@ -391,10 +442,11 @@ def __init__( self.submitter = submitter # Initialize the state dictionaries self._tasks = None - self.waiting = {} + self.blocked = {} self.successful = {} self.errored = {} - self.running = {} + self.queued = {} + self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) self.state_names = self.node.state.names self.workflow_inputs = workflow_inputs @@ -430,18 +482,44 @@ def started(self) -> bool: self.successful or self.errored or self.unrunnable - or self.running - or self.waiting + or self.queued + or self.blocked ) @property def done(self) -> bool: - return self.started and not (self.running or self.waiting) + self.update_status() + if not self.started: + return False + # Check to see if any previously queued tasks have completed + return not (self.queued or self.blocked or self.running) + + def update_status(self) -> None: + """Updates the status of the tasks in the node.""" + if not self.started: + return + # Check to see if any previously queued tasks have completed + for index, task in list(self.queued.items()): + if task.done: + self.successful[task.state_index] = self.queued.pop(index) + elif task.errored: + self.errored[task.state_index] = self.queued.pop(index) + elif task.run_start_time: + self.running[task.state_index] = ( + self.queued.pop(index), + task.run_start_time, + ) + # Check to see if any previously running tasks have completed + for index, (task, start_time) in list(self.running.items()): + if task.done: + self.successful[task.state_index] = self.running.pop(index)[0] + elif task.errored: + self.errored[task.state_index] = self.running.pop(index)[0] @property def all_failed(self) -> bool: return (self.unrunnable or self.errored) and not ( - self.successful or self.waiting or self.running + self.successful or self.blocked or self.queued ) def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: @@ -470,7 +548,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready - to run, can't be run due to upstream errors, or are waiting on other tasks to complete. + to run, can't be run due to upstream errors, or are blocked on other tasks to complete. Parameters ---------- @@ -488,29 +566,23 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: runnable: list["Task[DefType]"] = [] self.tasks # Ensure tasks are loaded if not self.started: - self.waiting = copy(self._tasks) - # Check to see if any previously running tasks have completed - for index, task in list(self.running.items()): - if task.done: - self.successful[task.state_index] = self.running.pop(index) - elif task.errored: - self.errored[task.state_index] = self.running.pop(index) - # Check to see if any waiting tasks are now runnable/unrunnable - for index, task in list(self.waiting.items()): + self.blocked = copy(self._tasks) + # Check to see if any blocked tasks are now runnable/unrunnable + for index, task in list(self.blocked.items()): pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: if index not in pred.successful: is_runnable = False if index in pred.errored: - self.unrunnable[index].append(self.waiting.pop(index)) + self.unrunnable[index].append(self.blocked.pop(index)) if index in pred.unrunnable: self.unrunnable[index].extend(pred.unrunnable[index]) - self.waiting.pop(index) + self.blocked.pop(index) break if is_runnable: - runnable.append(self.waiting.pop(index)) - self.running.update({t.state_index: t for t in runnable}) + runnable.append(self.blocked.pop(index)) + self.queued.update({t.state_index: t for t in runnable}) return runnable diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 224af25fb..a836eaddf 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -4,12 +4,9 @@ import os import struct import inspect -import re from datetime import datetime import typing as ty import types -import ast -import cloudpickle as cp from pathlib import Path from collections.abc import Mapping from functools import singledispatch @@ -331,7 +328,17 @@ def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: elif hasattr(obj, "__slots__"): dct = {attr: getattr(obj, attr) for attr in obj.__slots__} else: - dct = obj.__dict__ + try: + dct = obj.__dict__ + except AttributeError: + dct = { + n: getattr(obj, n) + for n in dir(obj) + if not ( + (n.startswith("__") and n.endswith("__")) + or inspect.ismethod(getattr(obj, n)) + ) + } yield from bytes_repr_mapping_contents(dct, cache) yield b"}" @@ -525,31 +532,39 @@ def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: + yield b"code:(" + yield from bytes_repr_sequence_contents( + ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_stacksize, + obj.co_flags, + obj.co_code, + obj.co_consts, + obj.co_names, + obj.co_varnames, + obj.co_filename, + obj.co_freevars, + obj.co_name, + obj.co_firstlineno, + obj.co_lnotab, + obj.co_cellvars, + ), + cache, + ) + yield b")" + + @register_serializer def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: """Serialize a function, attempting to use the AST of the source code if available otherwise falling back to using cloudpickle to serialize the byte-code of the function.""" - try: - src = inspect.getsource(obj) - except OSError: - # Fallback to using cloudpickle to serialize the function if the source - # code is not available - bytes_repr = cp.dumps(obj) - else: - indent = re.match(r"(\s*)", src).group(1) - if indent: - src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) - src_ast = ast.parse(src) - # Remove the function definition from the source code - bytes_repr = ast.dump( - src_ast, annotate_fields=False, include_attributes=False - ).encode() - - yield b"function:(" - for i in range(0, len(bytes_repr), FUNCTION_SRC_CHUNK_LEN_DEFAULT): - yield hash_single(bytes_repr[i : i + FUNCTION_SRC_CHUNK_LEN_DEFAULT], cache) - yield b")" + yield from bytes_repr(obj.__code__, cache) def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: