From 8b77aea2c4cac0035f5fbbbac74bb6fea819e397 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 12 Apr 2024 03:32:10 -0400 Subject: [PATCH] [CLEANUP] --- Swarm Orchestrator_state.json | 6 +- example.py | 8 +- examples/test.py | 45 ++++++++++ neo_sapiens/few_shot_prompts.py | 150 ++++++++++++++++++++++++++++++++ neo_sapiens/hass_schema.py | 29 +----- 5 files changed, 205 insertions(+), 33 deletions(-) create mode 100644 examples/test.py diff --git a/Swarm Orchestrator_state.json b/Swarm Orchestrator_state.json index 27763c2..944e303 100644 --- a/Swarm Orchestrator_state.json +++ b/Swarm Orchestrator_state.json @@ -1,10 +1,10 @@ { - "agent_id": "", + "agent_id": "", "agent_name": "Swarm Orchestrator", "agent_description": null, - "system_prompt": "Create an instruction prompt for an swarm orchestrator to create a series of personalized, agents for the following objective: what is the current squad of Ukrainian football national team? Who and where are they playing soon? to decompose a very complicated problem or tasks, the orchestrator is the team leader. Teach the orchestrator how to decompose the tasks to very certain agents with names, and system prompts, we need the plan, with a step by stpe instructions, number of agents, and a list of agents with a name, system prompt for each, and then the rules of the swarm, compact the prompt, and say only return JSON data in markdown and nothing else.Follow the schema here: \n{\n \"plan\": [\"Step 1\", \"Step 2\", \"Step 3\"],\n \"agents\": [\n {\n \"name\": \"Agent 1\",\n \"system_prompt\": \"Prompt 1\"\n },\n {\n \"name\": \"Agent 2\",\n \"system_prompt\": \"Prompt 2\"\n }\n ]\n}\n *############ Here are some examples:\n{\n \"plan\": [\"Room Management\", \"Guest Services\", \"Reservations Handling\", \"Facility Maintenance\", \"Staff Coordination\"],\n \"agents\": [\n {\n \"name\": \"Room Management Agent\",\n \"system_prompt\": \"Automate room assignments, minibar restocking, and housekeeping schedules\"\n },\n {\n \"name\": \"Guest Services Agent\",\n \"system_prompt\": \"Handle check-ins, check-outs, guest requests, and complaints efficiently\"\n },\n {\n \"name\": \"Reservations Agent\",\n \"system_prompt\": \"Manage room bookings, table reservations, and special requests\"\n },\n {\n \"name\": \"Maintenance Agent\",\n \"system_prompt\": \"Schedule and track maintenance tasks for facilities and rooms\"\n },\n {\n \"name\": \"Staff Coordination Agent\",\n \"system_prompt\": \"Optimize staff schedules, task assignments, and workload distribution\"\n }\n ]\n}\n and another example\n{\n \"plan\": [\"Problem Identification\", \"Solution Design\", \"Implementation\", \"Testing\", \"Deployment\"],\n \"agents\": [\n {\n \"name\": \"Identification Agent\",\n \"system_prompt\": \"Identify the problem\"\n },\n {\n \"name\": \"Design Agent\",\n \"system_prompt\": \"Design the solution\"\n },\n {\n \"name\": \"Implementation Agent\",\n \"system_prompt\": \"Implement the solution\"\n },\n {\n \"name\": \"Deployment Agent\",\n \"system_prompt\": \"Deploy the solution\"\n }\n ]\n}\n ", + "system_prompt": "Create an instruction prompt for an swarm orchestrator to create a series of personalized, agents for the following objective: Let's create a team of AI engineers to create a facial recognition model to decompose a very complicated problem or tasks, the orchestrator is the team leader. Teach the orchestrator how to decompose the tasks to very certain agents with names, and system prompts, we need the plan, with a step by stpe instructions, number of agents, and a list of agents with a name, system prompt for each, and then the rules of the swarm, compact the prompt, and say only return JSON data in markdown and nothing else.Follow the schema here: \n{\n \"plan\": [\"Step 1\", \"Step 2\", \"Step 3\"],\n \"agents\": [\n {\n \"name\": \"Agent 1\",\n \"system_prompt\": \"Prompt 1\"\n },\n {\n \"name\": \"Agent 2\",\n \"system_prompt\": \"Prompt 2\"\n }\n ]\n}\n *############ Here are some examples:\n{\n \"plan\": [\"Room Management\", \"Guest Services\", \"Reservations Handling\", \"Facility Maintenance\", \"Staff Coordination\"],\n \"agents\": [\n {\n \"name\": \"Room Management Agent\",\n \"system_prompt\": \"Automate room assignments, minibar restocking, and housekeeping schedules\"\n },\n {\n \"name\": \"Guest Services Agent\",\n \"system_prompt\": \"Handle check-ins, check-outs, guest requests, and complaints efficiently\"\n },\n {\n \"name\": \"Reservations Agent\",\n \"system_prompt\": \"Manage room bookings, table reservations, and special requests\"\n },\n {\n \"name\": \"Maintenance Agent\",\n \"system_prompt\": \"Schedule and track maintenance tasks for facilities and rooms\"\n },\n {\n \"name\": \"Staff Coordination Agent\",\n \"system_prompt\": \"Optimize staff schedules, task assignments, and workload distribution\"\n }\n ]\n}\n and another example\n{\n \"plan\": [\"Problem Identification\", \"Solution Design\", \"Implementation\", \"Testing\", \"Deployment\"],\n \"agents\": [\n {\n \"name\": \"Identification Agent\",\n \"system_prompt\": \"Identify the problem\"\n },\n {\n \"name\": \"Design Agent\",\n \"system_prompt\": \"Design the solution\"\n },\n {\n \"name\": \"Implementation Agent\",\n \"system_prompt\": \"Implement the solution\"\n },\n {\n \"name\": \"Deployment Agent\",\n \"system_prompt\": \"Deploy the solution\"\n }\n ]\n}\n ", "sop": null, - "short_memory": "system: Create an instruction prompt for an swarm orchestrator to create a series of personalized, agents for the following objective: what is the current squad of Ukrainian football national team? Who and where are they playing soon? to decompose a very complicated problem or tasks, the orchestrator is the team leader. Teach the orchestrator how to decompose the tasks to very certain agents with names, and system prompts, we need the plan, with a step by stpe instructions, number of agents, and a list of agents with a name, system prompt for each, and then the rules of the swarm, compact the prompt, and say only return JSON data in markdown and nothing else.Follow the schema here: \n{\n \"plan\": [\"Step 1\", \"Step 2\", \"Step 3\"],\n \"agents\": [\n {\n \"name\": \"Agent 1\",\n \"system_prompt\": \"Prompt 1\"\n },\n {\n \"name\": \"Agent 2\",\n \"system_prompt\": \"Prompt 2\"\n }\n ]\n}\n *############ Here are some examples:\n{\n \"plan\": [\"Room Management\", \"Guest Services\", \"Reservations Handling\", \"Facility Maintenance\", \"Staff Coordination\"],\n \"agents\": [\n {\n \"name\": \"Room Management Agent\",\n \"system_prompt\": \"Automate room assignments, minibar restocking, and housekeeping schedules\"\n },\n {\n \"name\": \"Guest Services Agent\",\n \"system_prompt\": \"Handle check-ins, check-outs, guest requests, and complaints efficiently\"\n },\n {\n \"name\": \"Reservations Agent\",\n \"system_prompt\": \"Manage room bookings, table reservations, and special requests\"\n },\n {\n \"name\": \"Maintenance Agent\",\n \"system_prompt\": \"Schedule and track maintenance tasks for facilities and rooms\"\n },\n {\n \"name\": \"Staff Coordination Agent\",\n \"system_prompt\": \"Optimize staff schedules, task assignments, and workload distribution\"\n }\n ]\n}\n and another example\n{\n \"plan\": [\"Problem Identification\", \"Solution Design\", \"Implementation\", \"Testing\", \"Deployment\"],\n \"agents\": [\n {\n \"name\": \"Identification Agent\",\n \"system_prompt\": \"Identify the problem\"\n },\n {\n \"name\": \"Design Agent\",\n \"system_prompt\": \"Design the solution\"\n },\n {\n \"name\": \"Implementation Agent\",\n \"system_prompt\": \"Implement the solution\"\n },\n {\n \"name\": \"Deployment Agent\",\n \"system_prompt\": \"Deploy the solution\"\n }\n ]\n}\n \n\n\nHuman:: what is the current squad of Ukrainian football national team? Who and where are they playing soon?\n\n\nSwarm Orchestrator: \n```json\n{\n \"plan\": [\n \"Research current Ukrainian national football team roster\",\n \"Identify upcoming matches and opponents\", \n \"Research player backgrounds and current club teams\"\n ],\n \"agents\": [\n {\n \"name\": \"Roster Agent\",\n \"system_prompt\": \"Provide the names and positions of the players currently on the Ukraine national football team roster.\"\n },\n { \n \"name\": \"Matches Agent\",\n \"system_prompt\": \"List the dates, opponents, locations and tournaments for the upcoming matches involving the Ukrainian national team.\"\n },\n {\n \"name\": \"Player Background Agent\", \n \"system_prompt\": \"For each player currently on the Ukraine roster, provide their date of birth, height, current club team and a brief bio.\"\n }\n ]\n}\n```\n\nThis breaks down the overall objective into three key subtasks, each handled by a specialized agent. The Roster Agent identifies the current players, the Matches Agent provides the upcoming schedule, and the Player Background Agent gives background details on each national team member. The plan shows the high-level workflow and agents allow delegation of distinct aspects.\n\n", + "short_memory": "system: Create an instruction prompt for an swarm orchestrator to create a series of personalized, agents for the following objective: Let's create a team of AI engineers to create a facial recognition model to decompose a very complicated problem or tasks, the orchestrator is the team leader. Teach the orchestrator how to decompose the tasks to very certain agents with names, and system prompts, we need the plan, with a step by stpe instructions, number of agents, and a list of agents with a name, system prompt for each, and then the rules of the swarm, compact the prompt, and say only return JSON data in markdown and nothing else.Follow the schema here: \n{\n \"plan\": [\"Step 1\", \"Step 2\", \"Step 3\"],\n \"agents\": [\n {\n \"name\": \"Agent 1\",\n \"system_prompt\": \"Prompt 1\"\n },\n {\n \"name\": \"Agent 2\",\n \"system_prompt\": \"Prompt 2\"\n }\n ]\n}\n *############ Here are some examples:\n{\n \"plan\": [\"Room Management\", \"Guest Services\", \"Reservations Handling\", \"Facility Maintenance\", \"Staff Coordination\"],\n \"agents\": [\n {\n \"name\": \"Room Management Agent\",\n \"system_prompt\": \"Automate room assignments, minibar restocking, and housekeeping schedules\"\n },\n {\n \"name\": \"Guest Services Agent\",\n \"system_prompt\": \"Handle check-ins, check-outs, guest requests, and complaints efficiently\"\n },\n {\n \"name\": \"Reservations Agent\",\n \"system_prompt\": \"Manage room bookings, table reservations, and special requests\"\n },\n {\n \"name\": \"Maintenance Agent\",\n \"system_prompt\": \"Schedule and track maintenance tasks for facilities and rooms\"\n },\n {\n \"name\": \"Staff Coordination Agent\",\n \"system_prompt\": \"Optimize staff schedules, task assignments, and workload distribution\"\n }\n ]\n}\n and another example\n{\n \"plan\": [\"Problem Identification\", \"Solution Design\", \"Implementation\", \"Testing\", \"Deployment\"],\n \"agents\": [\n {\n \"name\": \"Identification Agent\",\n \"system_prompt\": \"Identify the problem\"\n },\n {\n \"name\": \"Design Agent\",\n \"system_prompt\": \"Design the solution\"\n },\n {\n \"name\": \"Implementation Agent\",\n \"system_prompt\": \"Implement the solution\"\n },\n {\n \"name\": \"Deployment Agent\",\n \"system_prompt\": \"Deploy the solution\"\n }\n ]\n}\n \n\n\nHuman:: Let's create a team of AI engineers to create a facial recognition model\n\n\nSwarm Orchestrator: \n{\n \"plan\": [\"Decompose Task\", \"Gather Data\", \"Model Architectures\", \"Train Model\", \"Evaluate and Tune Model\", \"Package and Deploy Model\"], \n \"agents\": [\n {\n \"name\": \"Task Agent\", \n \"system_prompt\": \"Break down the overall facial recognition objective into smaller, well-defined sub-tasks and milestones\"\n },\n {\n \"name\": \"Data Agent\",\n \"system_prompt\": \"Assemble relevant facial image datasets; perform data cleaning, labeling, and preprocessing\"\n },\n {\n \"name\": \"Architecture Agent\", \n \"system_prompt\": \"Research and design appropriate neural network architectures for facial recognition\"\n },\n {\n \"name\": \"Model Agent\",\n \"system_prompt\": \"Iteratively train neural network models using prepared datasets\" \n },\n {\n \"name\": \"Evaluation Agent\",\n \"system_prompt\": \"Test models and benchmarks performance to identify needed improvements\"\n },\n {\n \"name\": \"Deployment Agent\",\n \"system_prompt\": \"Optimize, package and integrate top performing model for real-world applications\"\n }\n ]\n}\n\n", "loop_interval": 0, "retry_attempts": 3, "retry_interval": 1, diff --git a/example.py b/example.py index 45e33e9..e0407f6 100644 --- a/example.py +++ b/example.py @@ -1,8 +1,10 @@ -from neo_sapiens import run_swarm +from neo_sapiens.hass_schema import run_swarm # Run the swarm out = run_swarm( - "what is the current squad of Ukrainian football national team?" - " Who and where are they playing soon?" + ( + "Let's create a team of AI engineers to create a facial" + " recognition model" + ), ) print(out) diff --git a/examples/test.py b/examples/test.py new file mode 100644 index 0000000..a4999a7 --- /dev/null +++ b/examples/test.py @@ -0,0 +1,45 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class FaceNet(nn.Module): + """ + Convolutional neural network architecture for facial recognition, + trained to generate 128-d embeddings of face images. + + Architecture details: + - Takes 160x160 RGB face images as input + - Several convolutional layers, BatchNorm, MaxPooling + -culminating in a linear layer outputting 128-d embedding vector + """ + + def __init__(self): + super().__init__() + + self.conv1 = nn.Conv2d(3, 32, 5) + self.bn1 = nn.BatchNorm2d(32) + self.pool1 = nn.MaxPool2d(2, 2) + + self.conv2 = nn.Conv2d(32, 64, 5) + self.bn2 = nn.BatchNorm2d(64) + self.pool2 = nn.MaxPool2d(2, 2) + + self.conv3 = nn.Conv2d(64, 96, 3) + self.bn3 = nn.BatchNorm2d(96) + + self.conv4 = nn.Conv2d(96, 128, 3) + self.bn4 = nn.BatchNorm2d(128) + + self.fc1 = nn.Linear(128 * 8 * 8, 256) + self.fc2 = nn.Linear(256, 128) # 128-d embedding + + def forward(self, x): + x = self.pool1(F.relu(self.bn1(self.conv1(x)))) + x = self.pool2(F.relu(self.bn2(self.conv2(x)))) + x = F.relu(self.bn3(self.conv3(x))) + x = F.relu(self.bn4(self.conv4(x))) + + x = x.view(-1, 128 * 8 * 8) + x = F.relu(self.fc1(x)) + x = self.fc2(x) + return x diff --git a/neo_sapiens/few_shot_prompts.py b/neo_sapiens/few_shot_prompts.py index 5d67aaa..0f8b670 100644 --- a/neo_sapiens/few_shot_prompts.py +++ b/neo_sapiens/few_shot_prompts.py @@ -230,3 +230,153 @@ def select_workers(agents: str, task: str): f"These are the agents available for the task: {task} Agents" f" available: {agents}" ) + + +kaggle = """ + +Leash Bio - Predict New Medicines with BELKA +Predict small molecule-protein interactions using the Big Encoded Library for Chemical Assessment (BELKA) + + + +Overview + +Data + +Code + +Models + +Discussion + +Leaderboard + +Rules +Overview +In this competition, you’ll develop machine learning (ML) models to predict the binding affinity of small molecules to specific protein targets – a critical step in drug development for the pharmaceutical industry that would pave the way for more accurate drug discovery. You’ll help predict which drug-like small molecules (chemicals) will bind to three possible protein targets. + +Start + +8 days ago +Close +3 months to go +Merger & Entry +Description +Small molecule drugs are chemicals that interact with cellular protein machinery and affect the functions of this machinery in some way. Often, drugs are meant to inhibit the activity of single protein targets, and those targets are thought to be involved in a disease process. A classic approach to identify such candidate molecules is to physically make them, one by one, and then expose them to the protein target of interest and test if the two interact. This can be a fairly laborious and time-intensive process. + +The US Food and Drug Administration (FDA) has approved roughly 2,000 novel molecular entities in its entire history. However, the number of chemicals in druglike space has been estimated to be 10^60, a space far too big to physically search. There are likely effective treatments for human ailments hiding in that chemical space, and better methods to find such treatments are desirable to us all. + +To evaluate potential search methods in small molecule chemistry, competition host Leash Biosciences physically tested some 133M small molecules for their ability to interact with one of three protein targets using DNA-encoded chemical library (DEL) technology. This dataset, the Big Encoded Library for Chemical Assessment (BELKA), provides an excellent opportunity to develop predictive models that may advance drug discovery. + +Datasets of this size are rare and restricted to large pharmaceutical companies. The current best-curated public dataset of this kind is perhaps bindingdb, which, at 2.8M binding measurements, is much smaller than BELKA. + +This competition aims to revolutionize small molecule binding prediction by harnessing ML techniques. Recent advances in ML approaches suggest it might be possible to search chemical space by inference using well-trained computational models rather than running laboratory experiments. Similar progress in other fields suggest using ML to search across vast spaces could be a generalizable approach applicable to many domains. We hope that by providing BELKA we will democratize aspects of computational drug discovery and assist the community in finding new lifesaving medicines. + +Here, you’ll build predictive models to estimate the binding affinity of unknown chemical compounds to specified protein targets. You may use the training data provided; alternatively, there are a number of methods to make small molecule binding predictions without relying on empirical binding data (e.g. DiffDock, and this contest was designed to allow for such submissions). + +Your work will contribute to advances in small molecule chemistry used to accelerate drug discovery. + +Evaluation +This metric for this competition is the Mean Average Precision (micro) between the predicted probability and the observed target. + +Submission File +For each id in the test set, you must predict a probability for the binary target binds target. The file should contain a header and have the following format: + +id,binds +295246830,0.5 +295246831,0.5 +295246832,0.5 +etc. +Timeline +April 4, 2024 - Start Date. +July 1, 2024 - Entry Deadline. You must accept the competition rules before this date in order to compete. +July 1, 2024 - Team Merger Deadline. This is the last day participants may join or merge teams. +July 8, 2024 - Final Submission Deadline. +All deadlines are at 11:59 PM UTC on the corresponding day unless otherwise noted. The competition organizers reserve the right to update the contest timeline if they deem it necessary. + +Prizes +First Prize: $12,000 +Second Prize: $10,000 +Third Prize: $10,000 +Fourth Prize: $8,000 +Fifth Prize: $5,000 +Top Student Group: $5,000 to the highest performing student team. A team would be considered a student team if majority members (e.g. at least 3 out of a 5 member team) are students enrolled in a high school or university degree. In the case of an even number of members, half of them must be students. +Competition Host +Leash Biosciences is a discovery-stage biotechnology company that seeks to improve medicinal chemistry with machine learning approaches and massive data collection. Leash is comprised of wet lab scientists and dry lab scientists in equal numbers, and is proudly headquartered in Salt Lake City, Utah, USA. + +Additional Details +Chemical Representations +One of the goals of this competition is to explore and compare many different ways of representing molecules. Small molecules have been represented with SMILES, graphs, 3D structures, and more, including more esoteric methods such as spherical convolutional neural nets. We encourage competitors to explore not only different methods of making predictions but also to try different ways of representing the molecules. + +We provide the molecules in SMILES format. + +SMILES +SMILES is a concise string notation used to represent the structure of chemical molecules. It encodes the molecular graph, including atoms, bonds, connectivity, and stereochemistry as a linear sequence of characters, by traversing the molecule graph. SMILES is widely used in machine learning applications for chemistry, such as molecular property prediction, drug discovery, and materials design, as it provides a standardized and machine-readable format for representing and manipulating chemical structures. + +The SMILES in this dataset should be sufficient to be translated into any other chemical representation format that you want to try. A simple way to perform some of these translations is with RDKit. + +Details about the experiments +DELs are libraries of small molecules with unique DNA barcodes covalently attached +Traditional high-throughput screening requires keeping individual small molecules in separate, identifiable tubes and demands a lot of liquid handling to test each one of those against the protein target of interest in a separate reaction. The logistical overhead of these efforts tends to restrict screening collections, called libraries, to 50K-5M small molecules. A scalable solution to this problem, DNA-encoded chemical libraries, was described in 2009. As DNA sequencing got cheaper and cheaper, it became clear that DNA itself could be used as a label to identify, and deconvolute, collections of molecules in a complex mixture. DELs leverage this DNA sequencing technology. + +These barcoded small molecules are in a pool (many in a single tube, rather than one tube per small molecule) and are exposed to the protein target of interest in solution. The protein target of interest is then rinsed to remove small molecules in the DEL that don’t bind the target, and the remaining binders are collected and their DNA sequenced. + +DELs are manufactured by combining different building blocks +An intuitive way to think about DELs is to imagine a Mickey Mouse head as an example of a small molecule in the DEL. We attach the DNA barcode to Mickey’s chin. Mickey’s left ear is connected by a zipper; Mickey’s right ear is connected by velcro. These attachment points of zippers and velcro are analogies to different chemical reactions one might use to construct the DEL. + +We could purchase ten different Mickey Mouse faces, ten different zipper ears, and ten different velcro ears, and use them to construct our small molecule library. By creating every combination of these three, we’ll have 1,000 small molecules, but we only needed thirty building blocks (faces and ears) to make them. This combinatorial approach is what allows DELs to have so many members: the library in this competition is composed of 133M small molecules. The 133M small molecule library used here, AMA014, was provided by AlphaMa. It has a triazine core and superficially resembles the DELs described here. + + + +Dataset Description +Overview +The examples in the competition dataset are represented by a binary classification of whether a given small molecule is a binder or not to one of three protein targets. The data were collected using DNA-encoded chemical library (DEL) technology. + +We represent chemistry with SMILES (Simplified Molecular-Input Line-Entry System) and the labels as binary binding classifications, one per protein target of three targets. + +Files +[train/test].[csv/parquet] - The train or test data, available in both the csv and parquet formats. + +id - A unique example_id that we use to identify the molecule-binding target pair. +buildingblock1_smiles - The structure, in SMILES, of the first building block +buildingblock2_smiles - The structure, in SMILES, of the second building block +buildingblock3_smiles - The structure, in SMILES, of the third building block +molecule_smiles - The structure of the fully assembled molecule, in SMILES. This includes the three building blocks and the triazine core. Note we use a [Dy] as the stand-in for the DNA linker. +protein_name - The protein target name +binds - The target column. A binary class label of whether the molecule binds to the protein. Not available for the test set. +sample_submission.csv - A sample submission file in the correct format + +Competition data +All data were generated in-house at Leash Biosciences. We are providing roughly 98M training examples per protein, 200K validation examples per protein, and 360K test molecules per protein. To test generalizability, the test set contains building blocks that are not in the training set. These datasets are very imbalanced: roughly 0.5% of examples are classified as binders; we used 3 rounds of selection in triplicate to identify binders experimentally. Following the competition, Leash will make all the data available for future use (3 targets * 3 rounds of selection * 3 replicates * 133M molecules, or 3.6B measurements). + +Targets +Proteins are encoded in the genome, and names of the genes encoding those proteins are typically bestowed by their discoverers and regulated by the Hugo Gene Nomenclature Committee. The protein products of these genes can sometimes have different names, often due to the history of their discovery. + +We screened three protein targets for this competition. + +EPHX2 (sEH) +The first target, epoxide hydrolase 2, is encoded by the EPHX2 genetic locus, and its protein product is commonly named “soluble epoxide hydrolase”, or abbreviated to sEH. Hydrolases are enzymes that catalyze certain chemical reactions, and EPHX2/sEH also hydrolyzes certain phosphate groups. EPHX2/sEH is a potential drug target for high blood pressure and diabetes progression, and small molecules inhibiting EPHX2/sEH from earlier DEL efforts made it to clinical trials. + +EPHX2/sEH was also screened with DELs, and hits predicted with ML approaches, in a recent study but the screening data were not published. We included EPHX2/sEH to allow contestants an external gut check for model performance by comparing to these previously-published results. + +We screened EPHX2/sEH purchased from Cayman Chemical, a life sciences commercial vendor. For those contestants wishing to incorporate protein structural information in their submissions, the amino sequence is positions 2-555 from UniProt entry P34913, the crystal structure can be found in PDB entry 3i28, and predicted structure can be found in AlphaFold2 entry 34913. Additional EPHX2/sEH crystal structures with ligands bound can be found in PDB. + +BRD4 +The second target, bromodomain 4, is encoded by the BRD4 locus and its protein product is also named BRD4. Bromodomains bind to protein spools in the nucleus that DNA wraps around (called histones) and affect the likelihood that the DNA nearby is going to be transcribed, producing new gene products. Bromodomains play roles in cancer progression and a number of drugs have been discovered to inhibit their activities. + +BRD4 has been screened with DEL approaches previously but the screening data were not published. We included BRD4 to allow contestants to evaluate candidate molecules for oncology indications. + +We screened BRD4 purchased from Active Motif, a life sciences commercial vendor. For those contestants wishing to incorporate protein structural information in their submissions, the amino acid sequence is positions 44-460 from UniProt entry O60885-1, the crystal structure (for a single domain) can be found in PDB entry 7USK and predicted structure can be found in AlphaFold2 entry O60885. Additional BRD4 crystal structures with ligands bound can be found in PDB. + +ALB (HSA) +The third target, serum albumin, is encoded by the ALB locus and its protein product is also named ALB. The protein product is sometimes abbreviated as HSA, for “human serum albumin”. ALB, the most common protein in the blood, is used to drive osmotic pressure (to bring fluid back from tissues into blood vessels) and to transport many ligands, hormones, fatty acids, and more. + +Albumin, being the most abundant protein in the blood, often plays a role in absorbing candidate drugs in the body and sequestering them from their target tissues. Adjusting candidate drugs to bind less to albumin and other blood proteins is a strategy to help these candidate drugs be more effective. + +ALB has been screened with DEL approaches previously but the screening data were not published. We included ALB to allow contestants to build models that might have a larger impact on drug discovery across many disease types. The ability to predict ALB binding well would allow drug developers to improve their candidate small molecule therapies much more quickly than physically manufacturing many variants and testing them against ALB empirically in an iterative process. + +We screened ALB purchased from Active Motif. For those contestants wishing to incorporate protein structural information in their submissions, the amino acid sequence is positions 25 to 609 from UniProt entry P02768, the crystal structure can be found in PDB entry 1AO6, and predicted structure can be found in AlphaFold2 entry P02768. Additional ALB crystal structures with ligands bound can be found in PDB. + +Good luck! + +""" diff --git a/neo_sapiens/hass_schema.py b/neo_sapiens/hass_schema.py index 4e95826..ab4b81f 100644 --- a/neo_sapiens/hass_schema.py +++ b/neo_sapiens/hass_schema.py @@ -271,7 +271,6 @@ def master_creates_agents(task: str, *args, **kwargs): ) # Call the agents [ Main Agents ] - # Create the agents boss = Agent( agent_name="Swarm Orchestrator", system_prompt=boss_sys_prompt, @@ -283,8 +282,8 @@ def master_creates_agents(task: str, *args, **kwargs): autosave=True, dashboard=False, verbose=True, - stopping_token="", interactive=True, + stopping_token="", *args, **kwargs, ) @@ -292,10 +291,9 @@ def master_creates_agents(task: str, *args, **kwargs): # Task 1: Run the agent and parse the output logger.info("Creating the workers ...") out = agent.run(str(task)) + json_agentic_output = out # logger.info(f"Output: {out}") out = parse_json_from_input(out) - json_agentic_output = out - # logger.info(str(out)) plan, agents = out # Task 2: Print agent names and create agents @@ -317,29 +315,6 @@ def master_creates_agents(task: str, *args, **kwargs): return out # , agents, plan -def message_metadata_log(task: str, message: str, agent, plan: str): - """ - Create a document with metadata for a log message. - - Args: - task (str): The task associated with the log message. - message (str): The log message. - agent: The agent object. - plan (str): The plan associated with the log message. - - Returns: - dict: A dictionary containing the log message metadata. - """ - doc = { - "message": message, - "task": task, - "agent_name": agent.agent_name, - "plan": plan, - } - - return doc - - def run_swarm(task: str = None, *args, **kwargs): """ Run a task using the Swarm Orchestrator agent.