From 1f625dcfd89ad8f8e698aec8b944e145ad032ce7 Mon Sep 17 00:00:00 2001 From: Anders Madsen <28491857+Axedyson@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:40:09 +0100 Subject: [PATCH 01/14] Remove unused import statement as it causes tkinter error --- src/ai_hawk/job_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index 112af685..71303065 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -5,7 +5,6 @@ from itertools import product from pathlib import Path import traceback -from turtle import color from inputimeout import inputimeout, TimeoutOccurred from selenium.common.exceptions import NoSuchElementException From 75d19efc6fa0af4c13e86590c1d314e38fb23ac0 Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 02:33:21 -0500 Subject: [PATCH 02/14] temporary solution for Linkedin pages changes the changed job_manager can work well to "applied job" pages. --- src/ai_hawk/job_manager.py | 61 ++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index 112af685..c0d2e400 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -253,8 +253,10 @@ def get_jobs_from_page(self, scroll=False): pass try: - # XPath query to find the ul tag with class scaffold-layout__list-container - jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]" + # XPath query to find the ul tag with class scaffold-layout__list + jobs_xpath_query = ( + "//div[contains(@class, 'scaffold-layout__list-detail-container')]//ul" + ) jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query) if scroll: @@ -264,7 +266,10 @@ def get_jobs_from_page(self, scroll=False): browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement) browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True) - job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]") + job_element_list = jobs_container.find_elements( + By.XPATH, + ".//li[contains(@class, 'scaffold-layout__list-item') and contains(@class, 'ember-view')]", + ) if not job_element_list: logger.debug("No job class elements found on page, skipping.") @@ -303,7 +308,7 @@ def apply_jobs(self): for job in job_list: logger.debug(f"Starting applicant for job: {job.title} at {job.company}") - #TODO fix apply threshold + # TODO fix apply threshold """ # Initialize applicants_count as None applicants_count = None @@ -355,7 +360,6 @@ def apply_jobs(self): # Continue with the job application process regardless of the applicants count check """ - if self.is_previously_failed_to_apply(job.link): logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...") @@ -392,10 +396,10 @@ def write_to_file(self, job : Job, file_name, reason=None): "job_location": job.location, "pdf_path": pdf_path } - + if reason: data["reason"] = reason - + file_path = self.output_file_directory / f"{file_name}.json" if not file_path.exists(): with open(file_path, 'w', encoding='utf-8') as f: @@ -455,29 +459,45 @@ def next_job_page(self, position, location, job_page): self.driver.get( f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={job_page * 25}") - def job_tile_to_job(self, job_tile) -> Job: logger.debug("Extracting job information from tile") job = Job() + # Extract job Title try: - job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').find_element(By.TAG_NAME, 'strong').text + title_element = job_tile.find_element( + By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__title')]//a" + ) + job.title = title_element.text.strip() logger.debug(f"Job title extracted: {job.title}") except NoSuchElementException: logger.warning("Job title is missing.") - + + # Extract job Link try: - job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0] + job.link = title_element.get_attribute("href").split("?")[0] logger.debug(f"Job link extracted: {job.link}") except NoSuchElementException: logger.warning("Job link is missing.") + # Extract Company Name try: - job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text + job.company = job_tile.find_element( + By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" + ).text.strip() logger.debug(f"Job company extracted: {job.company}") except NoSuchElementException as e: - logger.warning(f'Job company is missing. {e} {traceback.format_exc()}') - + logger.warning(f"Job company is missing. {e} {traceback.format_exc()}") + + # Extract job Location + try: + job.location = job_tile.find_element( + By.XPATH, ".//ul[contains(@class, 'job-card-container__metadata-wrapper')]//li" + ).text.strip() + logger.debug(f"Job location extracted: {job.location}") + except NoSuchElementException: + logger.warning("Job location is missing.") + # Extract job ID from job url try: match = re.search(r'/jobs/view/(\d+)/', job.link) @@ -489,14 +509,9 @@ def job_tile_to_job(self, job_tile) -> Job: except Exception as e: logger.warning(f"Failed to extract job ID: {e}", exc_info=True) + # Extract job State try: - job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text - except NoSuchElementException: - logger.warning("Job location is missing.") - - - try: - job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text + job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]").text except NoSuchElementException as e: try: # Fetching state when apply method is not found @@ -559,10 +574,10 @@ def is_previously_failed_to_apply(self, link): except json.JSONDecodeError: logger.error(f"JSON decode error in file: {file_path}") return False - + for data in existing_data: data_link = data['link'] if data_link == link: return True - + return False From dbebfa8447092532b975e533b13b7f1b1e3f590d Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:00:52 -0500 Subject: [PATCH 03/14] Update linkedIn_easy_applier.py Still have bugs, but can work based on it --- src/ai_hawk/linkedIn_easy_applier.py | 152 +++++++++++++++++++-------- 1 file changed, 110 insertions(+), 42 deletions(-) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index 257b0ee9..b2ac8447 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -98,7 +98,7 @@ def check_for_premium_redirect(self, job_context: JobContext, max_attempts=3): logger.error(f"Failed to return to job page after {max_attempts} attempts. Cannot apply for the job.") raise Exception( f"Redirected to linkedIn Premium page and failed to return after {max_attempts} attempts. Job application aborted.") - + def apply_to_job(self, job: Job) -> None: """ Starts the process of applying to a job. @@ -149,12 +149,11 @@ def job_apply(self, job: Job): job.set_recruiter_link(recruiter_link) logger.debug(f"Recruiter link set: {recruiter_link}") - self.current_job = job logger.debug("Passing job information to GPT Answerer") self.gpt_answerer.set_job(job) - + # Todo: add this job to skip list with it's reason if not self.gpt_answerer.is_job_suitable(): return @@ -179,23 +178,31 @@ def job_apply(self, job: Job): raise Exception(f"Failed to apply to job! Original exception:\nTraceback:\n{tb_str}") def _find_easy_apply_button(self, job_context: JobContext) -> WebElement: - logger.debug("Searching for 'Easy Apply' button") + logger.debug("Searching for 'Easy Apply' or 'Continue' button") attempt = 0 search_methods = [ { - 'description': "find all 'Easy Apply' buttons using find_elements", - 'find_elements': True, - 'xpath': '//button[contains(@class, "jobs-apply-button") and contains(., "Easy Apply")]' + "description": "find all 'Easy Apply' buttons using find_elements", + "find_elements": True, + "xpath": '//button[contains(@class, "jobs-apply-button") and contains(., "Easy Apply")]', }, { - 'description': "'aria-label' containing 'Easy Apply to'", - 'xpath': '//button[contains(@aria-label, "Easy Apply to")]' + "description": "'aria-label' containing 'Easy Apply to'", + "xpath": '//button[contains(@aria-label, "Easy Apply to")]', }, { - 'description': "button text search", - 'xpath': '//button[contains(text(), "Easy Apply") or contains(text(), "Apply now")]' - } + "description": "button text search", + "xpath": '//button[contains(text(), "Easy Apply") or contains(text(), "Apply now")]', + }, + { + "description": "find 'Continue' button using text", + "xpath": '//button[.//span[text()="Continue"]]', + }, + { + "description": "find 'Continue' button using aria-label", + "xpath": '//button[contains(@aria-label, "Continue applying")]', + }, ] while attempt < 2: @@ -259,7 +266,7 @@ def _get_job_description(self) -> str: logger.debug("See more button not found, skipping") try: - description = self.driver.find_element(By.CLASS_NAME, 'jobs-description-content__text').text + description = self.driver.find_element(By.CLASS_NAME, 'jobs-description-content__text--stretch').text except NoSuchElementException: logger.debug("First class not found, checking for second class for premium members") description = self.driver.find_element(By.CLASS_NAME, 'job-details-about-the-job-module__description').text @@ -373,14 +380,34 @@ def fill_up(self, job_context : JobContext) -> None: try: easy_apply_content = WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, 'jobs-easy-apply-content')) + EC.presence_of_element_located( + (By.XPATH, "//div[contains(@class, 'artdeco-modal__content')]") + ) ) - input_elements = easy_apply_content.find_elements(By.CLASS_NAME, 'jobs-easy-apply-form-section__grouping') - for element in input_elements: - self._process_form_element(element, job_context) - except Exception as e: - logger.error(f"Failed to find form elements: {e}") + input_elements = easy_apply_content.find_elements( + By.XPATH, + ".//div[contains(@class, 'fb-dash-form-element')]" + ) + logger.debug(f"Found {len(input_elements)} form elements") + + for index, element in enumerate(input_elements, start=1): + try: + logger.debug(f"Processing form element {index}/{len(input_elements)}: {element}") + self._process_form_element(element, job_context) + except Exception as element_error: + logger.error(f"Error processing form element {index}: {element_error}") + logger.debug(traceback.format_exc()) + + except TimeoutException as timeout_error: + logger.error("Timeout while waiting for the easy apply modal content to load.") + logger.debug(traceback.format_exc()) + except NoSuchElementException as no_element_error: + logger.error("Unable to locate the specified elements.") + logger.debug(traceback.format_exc()) + except Exception as generic_error: + logger.error(f"Unexpected error occurred: {generic_error}") + logger.debug(traceback.format_exc()) def _process_form_element(self, element: WebElement, job_context : JobContext) -> None: logger.debug("Processing form element") @@ -668,7 +695,11 @@ def split_text_by_width(text, font, font_size, max_width): def _fill_additional_questions(self, job_context : JobContext) -> None: logger.debug("Filling additional questions") - form_sections = self.driver.find_elements(By.CLASS_NAME, 'jobs-easy-apply-form-section__grouping') + form_sections = self.driver.find_elements( + By.XPATH, + ".//div[contains(@class, 'fb-dash-form-element')]" + ) + for section in form_sections: self._process_form_section(job_context,section) @@ -677,9 +708,6 @@ def _process_form_section(self,job_context : JobContext, section: WebElement) -> if self._handle_terms_of_service(job_context,section): logger.debug("Handled terms of service") return - if self._find_and_handle_radio_question(job_context, section): - logger.debug("Handled radio question") - return if self._find_and_handle_textbox_question(job_context, section): logger.debug("Handled textbox question") return @@ -689,6 +717,9 @@ def _process_form_section(self,job_context : JobContext, section: WebElement) -> if self._find_and_handle_dropdown_question(job_context, section): logger.debug("Handled dropdown question") return + if self._find_and_handle_radio_question(job_context, section): + logger.debug("Handled radio question") + return def _handle_terms_of_service(self,job_context: JobContext, element: WebElement) -> bool: checkbox = element.find_elements(By.TAG_NAME, 'label') @@ -699,35 +730,60 @@ def _handle_terms_of_service(self,job_context: JobContext, element: WebElement) return True return False - def _find_and_handle_radio_question(self,job_context : JobContext, section: WebElement) -> bool: - job_application = job_context.job_application - question = section.find_element(By.CLASS_NAME, 'jobs-easy-apply-form-element') - radios = question.find_elements(By.CLASS_NAME, 'fb-text-selectable__option') - if radios: - question_text = section.text.lower() - options = [radio.text.lower() for radio in radios] + def _find_and_handle_radio_question(self, job_context: JobContext, section: WebElement) -> bool: + try: + # using css selector to find radio buttons + radios = section.find_elements(By.CSS_SELECTOR, "input[type='radio']") + if not radios: + logger.warning("No radio options found in the question.") + return False + logger.debug(f"Found {len(radios)} radio options: {[radio.text for radio in radios]}") + + question = WebDriverWait(section, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, "fb-dash-form-element__label")) + ) + logger.debug(f"Question label found: {question.text}") + options = [] + for radio in radios: + label = section.find_element(By.CSS_SELECTOR, f"label[for='{radio.get_attribute('id')}']") + options.append(label.text.strip().lower()) + + logger.debug(f"Found radio options: {options}") + + question_text = section.text.lower() existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) + current_question_sanitized = self._sanitize_text(question_text) + for item in self.all_data: if current_question_sanitized in item['question'] and item['type'] == 'radio': existing_answer = item - break if existing_answer: self._select_radio(radios, existing_answer['answer']) - job_application.save_application_data(existing_answer) + job_context.job_application.save_application_data(existing_answer) logger.debug("Selected existing radio answer") return True answer = self.gpt_answerer.answer_question_from_options(question_text, options) self._save_questions_to_json({'type': 'radio', 'question': question_text, 'answer': answer}) self.all_data = self._load_questions_from_json() - job_application.save_application_data({'type': 'radio', 'question': question_text, 'answer': answer}) + job_context.job_application.save_application_data({'type': 'radio', 'question': question_text, 'answer': answer}) self._select_radio(radios, answer) logger.debug("Selected new radio answer") return True + + except TimeoutException as e: + logger.error(f"Timeout while locating question label or radio options: {e}") + logger.debug(traceback.format_exc()) + except NoSuchElementException as e: + logger.error(f"Failed to find required elements in section: {e}") + logger.debug(traceback.format_exc()) + except Exception as e: + logger.error(f"Unexpected error occurred: {e}") + logger.debug(traceback.format_exc()) + return False def _find_and_handle_textbox_question(self,job_context : JobContext, section: WebElement) -> bool: @@ -821,9 +877,9 @@ def _find_and_handle_date_question(self, job_context : JobContext, section: WebE def _find_and_handle_dropdown_question(self,job_context : JobContext, section: WebElement) -> bool: job_application = job_context.job_application try: - question = section.find_element(By.CLASS_NAME, 'jobs-easy-apply-form-element') + question = section.find_element(By.CLASS_NAME, 'fb-dash-form-element__label') - dropdowns = question.find_elements(By.TAG_NAME, 'select') + dropdowns = section.find_elements(By.TAG_NAME, "select") if not dropdowns: dropdowns = section.find_elements(By.CSS_SELECTOR, '[data-test-text-entity-list-form-select]') @@ -834,7 +890,7 @@ def _find_and_handle_dropdown_question(self,job_context : JobContext, section: W logger.debug(f"Dropdown options found: {options}") - question_text = question.find_element(By.TAG_NAME, 'label').text.lower() + question_text = question.text.lower() logger.debug(f"Processing dropdown or combobox question: {question_text}") current_selection = select.first_selected_option.text @@ -890,10 +946,23 @@ def _enter_text(self, element: WebElement, text: str) -> None: def _select_radio(self, radios: List[WebElement], answer: str) -> None: logger.debug(f"Selecting radio option: {answer}") for radio in radios: - if answer in radio.text.lower(): - radio.find_element(By.TAG_NAME, 'label').click() - return - radios[-1].find_element(By.TAG_NAME, 'label').click() + try: + radio_id = radio.get_attribute('id') + label = radio.find_element(By.XPATH, f"//label[@for='{radio_id}']") + if answer in label.text.lower(): + label.click() + logger.debug(f"Clicked radio option: {label.text}") + return + except Exception as e: + logger.warning(f"Failed to process radio option: {radio}. Error: {e}") + # If no matching option found, select the last radio option + try: + last_radio_id = radios[-1].get_attribute('id') + last_label = radios[-1].find_element(By.XPATH, f"//label[@for='{last_radio_id}']") + last_label.click() + logger.debug(f"No matching option found. Selected last radio option: {last_label.text}") + except Exception as e: + logger.error(f"Failed to select the last radio option. Error: {e}") def _select_dropdown_option(self, element: WebElement, text: str) -> None: logger.debug(f"Selecting dropdown option: {text}") @@ -950,4 +1019,3 @@ def _find_existing_answer(self, question_text): def answer_contians_company_name(self,answer:Any)->bool: return isinstance(answer,str) and not self.current_job.company is None and self.current_job.company in answer - From 9a197297d07dd1dc8b4381345631add33eff5540 Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:24:45 -0500 Subject: [PATCH 04/14] Fixed bugs on this --- src/ai_hawk/linkedIn_easy_applier.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index b2ac8447..063e18fb 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -390,6 +390,7 @@ def fill_up(self, job_context : JobContext) -> None: ".//div[contains(@class, 'fb-dash-form-element')]" ) logger.debug(f"Found {len(input_elements)} form elements") + for index, element in enumerate(input_elements, start=1): try: @@ -414,7 +415,7 @@ def _process_form_element(self, element: WebElement, job_context : JobContext) - if self._is_upload_field(element): self._handle_upload_fields(element, job_context) else: - self._fill_additional_questions(job_context) + self._fill_additional_questions(element,job_context) def _handle_dropdown_fields(self, element: WebElement) -> None: logger.debug("Handling dropdown fields") @@ -693,15 +694,9 @@ def split_text_by_width(text, font, font_size, max_width): logger.error(f"Cover letter upload failed: {tb_str}") raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") - def _fill_additional_questions(self, job_context : JobContext) -> None: + def _fill_additional_questions(self, element: WebElement, job_context : JobContext) -> None: logger.debug("Filling additional questions") - form_sections = self.driver.find_elements( - By.XPATH, - ".//div[contains(@class, 'fb-dash-form-element')]" - ) - - for section in form_sections: - self._process_form_section(job_context,section) + self._process_form_section(job_context,element) def _process_form_section(self,job_context : JobContext, section: WebElement) -> None: logger.debug("Processing form section") @@ -748,7 +743,7 @@ def _find_and_handle_radio_question(self, job_context: JobContext, section: WebE for radio in radios: label = section.find_element(By.CSS_SELECTOR, f"label[for='{radio.get_attribute('id')}']") options.append(label.text.strip().lower()) - + logger.debug(f"Found radio options: {options}") question_text = section.text.lower() From 48e6c6c9d70c3a7d7b7982a95fa67ce63417369f Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:00:33 -0500 Subject: [PATCH 05/14] Fixed Uploading Resume bugs --- src/ai_hawk/linkedIn_easy_applier.py | 51 +++++++++++++++++----------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index 063e18fb..e5e7b6cb 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -385,20 +385,7 @@ def fill_up(self, job_context : JobContext) -> None: ) ) - input_elements = easy_apply_content.find_elements( - By.XPATH, - ".//div[contains(@class, 'fb-dash-form-element')]" - ) - logger.debug(f"Found {len(input_elements)} form elements") - - - for index, element in enumerate(input_elements, start=1): - try: - logger.debug(f"Processing form element {index}/{len(input_elements)}: {element}") - self._process_form_element(element, job_context) - except Exception as element_error: - logger.error(f"Error processing form element {index}: {element_error}") - logger.debug(traceback.format_exc()) + self._process_form_element(easy_apply_content, job_context) except TimeoutException as timeout_error: logger.error("Timeout while waiting for the easy apply modal content to load.") @@ -414,8 +401,17 @@ def _process_form_element(self, element: WebElement, job_context : JobContext) - logger.debug("Processing form element") if self._is_upload_field(element): self._handle_upload_fields(element, job_context) - else: + elif self._is_filled(element): self._fill_additional_questions(element,job_context) + else: + logger.debug("Element is not filled") + + def _is_filled(self, element: WebElement) -> bool: + is_filled = bool(element.find_elements( + By.XPATH, ".//div[contains(@class, 'fb-dash-form-element')]" + )) + logger.debug(f"Element is filled: {is_filled}") + return is_filled def _handle_dropdown_fields(self, element: WebElement) -> None: logger.debug("Handling dropdown fields") @@ -479,8 +475,9 @@ def _handle_upload_fields(self, element: WebElement, job_context: JobContext) -> logger.debug("Handling upload fields") try: - show_more_button = self.driver.find_element(By.XPATH, - "//button[contains(@aria-label, 'Show more resumes')]") + show_more_button = self.driver.find_element( + By.XPATH, "//button[contains(@aria-label, 'Show') and contains(@aria-label, 'resumes')]" + ) show_more_button.click() logger.debug("Clicked 'Show more resumes' button") except NoSuchElementException: @@ -694,9 +691,25 @@ def split_text_by_width(text, font, font_size, max_width): logger.error(f"Cover letter upload failed: {tb_str}") raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") - def _fill_additional_questions(self, element: WebElement, job_context : JobContext) -> None: + def _fill_additional_questions(self, element: WebElement, job_context: JobContext) -> None: logger.debug("Filling additional questions") - self._process_form_section(job_context,element) + try: + input_elements = element.find_elements( + By.XPATH, ".//div[contains(@class, 'fb-dash-form-element')]" + ) + logger.debug(f"Found {len(input_elements)} form elements") + + for index, section in enumerate(input_elements, start=1): + try: + logger.debug( + f"Processing form element {index}/{len(input_elements)}: {section}" + ) + self._process_form_section( job_context,section) + except Exception as section_error: + logger.error(f"Error processing form element {index}: {section_error}") + logger.debug(traceback.format_exc()) + except Exception as e: + logger.error(f"Error processing additional question element: {e}") def _process_form_section(self,job_context : JobContext, section: WebElement) -> None: logger.debug("Processing form section") From 19eaeeeb39721558d1ed5cac085c82f49b71aadb Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:22:59 -0500 Subject: [PATCH 06/14] Fixed Job State bugs --- src/ai_hawk/job_manager.py | 27 +++++++++++++-------------- src/ai_hawk/linkedIn_easy_applier.py | 12 ++---------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index a1f58526..e7fe01aa 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -5,6 +5,7 @@ from itertools import product from pathlib import Path import traceback +from turtle import color from inputimeout import inputimeout, TimeoutOccurred from selenium.common.exceptions import NoSuchElementException @@ -431,8 +432,7 @@ def get_base_search_url(self, parameters): if working_type_filter: url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}") - experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if - v] + experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if v] if experience_levels: url_parts.append(f"f_E={','.join(experience_levels)}") url_parts.append(f"distance={parameters['distance']}") @@ -497,6 +497,17 @@ def job_tile_to_job(self, job_tile) -> Job: except NoSuchElementException: logger.warning("Job location is missing.") + # Extract job State + try: + job_state = job_tile.find_element( + By.XPATH, + ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]", + ).text + logger.debug(f"Job state extracted: {job_state}") + job.apply_method = job_state + except NoSuchElementException as e: + logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}") + # Extract job ID from job url try: match = re.search(r'/jobs/view/(\d+)/', job.link) @@ -508,18 +519,6 @@ def job_tile_to_job(self, job_tile) -> Job: except Exception as e: logger.warning(f"Failed to extract job ID: {e}", exc_info=True) - # Extract job State - try: - job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]").text - except NoSuchElementException as e: - try: - # Fetching state when apply method is not found - job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text - job.apply_method = "Applied" - logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}') - except NoSuchElementException as e: - logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}') - return job def is_blacklisted(self, job_title, company, link, job_location): diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index e5e7b6cb..db0eb594 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -178,7 +178,7 @@ def job_apply(self, job: Job): raise Exception(f"Failed to apply to job! Original exception:\nTraceback:\n{tb_str}") def _find_easy_apply_button(self, job_context: JobContext) -> WebElement: - logger.debug("Searching for 'Easy Apply' or 'Continue' button") + logger.debug("Searching for 'Easy Apply' button") attempt = 0 search_methods = [ @@ -194,15 +194,7 @@ def _find_easy_apply_button(self, job_context: JobContext) -> WebElement: { "description": "button text search", "xpath": '//button[contains(text(), "Easy Apply") or contains(text(), "Apply now")]', - }, - { - "description": "find 'Continue' button using text", - "xpath": '//button[.//span[text()="Continue"]]', - }, - { - "description": "find 'Continue' button using aria-label", - "xpath": '//button[contains(@aria-label, "Continue applying")]', - }, + } ] while attempt < 2: From f8562aff4eeb3125d840cbb2374a7c058de62b91 Mon Sep 17 00:00:00 2001 From: cjbbb <53784676+cjbbb@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:18:34 -0500 Subject: [PATCH 07/14] Fixed Radio button --- src/ai_hawk/linkedIn_easy_applier.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index db0eb594..d9768a8f 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -334,7 +334,8 @@ def _unfollow_company(self) -> None: try: logger.debug("Unfollowing company") follow_checkbox = self.driver.find_element( - By.XPATH, "//label[contains(.,'to stay up to date with their page.')]") + By.XPATH, "//label[contains(text(), 'stay up to date')]" + ) follow_checkbox.click() except Exception as e: logger.debug(f"Failed to unfollow company: {e}") @@ -708,6 +709,9 @@ def _process_form_section(self,job_context : JobContext, section: WebElement) -> if self._handle_terms_of_service(job_context,section): logger.debug("Handled terms of service") return + if self._find_and_handle_radio_question(job_context, section): + logger.debug("Handled radio question") + return if self._find_and_handle_textbox_question(job_context, section): logger.debug("Handled textbox question") return @@ -717,9 +721,7 @@ def _process_form_section(self,job_context : JobContext, section: WebElement) -> if self._find_and_handle_dropdown_question(job_context, section): logger.debug("Handled dropdown question") return - if self._find_and_handle_radio_question(job_context, section): - logger.debug("Handled radio question") - return + def _handle_terms_of_service(self,job_context: JobContext, element: WebElement) -> bool: checkbox = element.find_elements(By.TAG_NAME, 'label') From e5f074f60ee74dcc7d561ac0a42ceb8f4fd01b0f Mon Sep 17 00:00:00 2001 From: BeniaminC Date: Fri, 29 Nov 2024 20:53:46 -0800 Subject: [PATCH 08/14] Added fixes for directory sanitation, questions, and comapny/location --- src/ai_hawk/job_manager.py | 12 +++---- src/ai_hawk/linkedIn_easy_applier.py | 50 ++++++++++++++++++---------- src/job_application_saver.py | 8 ++++- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index e7fe01aa..84d467c5 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -288,8 +288,8 @@ def get_jobs_from_page(self, scroll=False): def read_jobs(self): job_element_list = self.get_jobs_from_page() - job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] - for job in job_list: + job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] + for job in job_list: if self.is_blacklisted(job.title, job.company, job.link, job.location): logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...") self.write_to_file(job, "skipped") @@ -483,7 +483,7 @@ def job_tile_to_job(self, job_tile) -> Job: try: job.company = job_tile.find_element( By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" - ).text.strip() + ).text.split('·')[0].strip() logger.debug(f"Job company extracted: {job.company}") except NoSuchElementException as e: logger.warning(f"Job company is missing. {e} {traceback.format_exc()}") @@ -491,8 +491,8 @@ def job_tile_to_job(self, job_tile) -> Job: # Extract job Location try: job.location = job_tile.find_element( - By.XPATH, ".//ul[contains(@class, 'job-card-container__metadata-wrapper')]//li" - ).text.strip() + By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" + ).text.split('·')[1].strip() logger.debug(f"Job location extracted: {job.location}") except NoSuchElementException: logger.warning("Job location is missing.") @@ -507,7 +507,7 @@ def job_tile_to_job(self, job_tile) -> Job: job.apply_method = job_state except NoSuchElementException as e: logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}") - + # Extract job ID from job url try: match = re.search(r'/jobs/view/(\d+)/', job.link) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index d9768a8f..0123a62f 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -32,11 +32,11 @@ def question_already_exists_in_data(question: str, data: List[dict]) -> bool: """ Check if a question already exists in the data list. - + Args: question: The question text to search for data: List of question dictionaries to search through - + Returns: bool: True if question exists, False otherwise """ @@ -334,7 +334,7 @@ def _unfollow_company(self) -> None: try: logger.debug("Unfollowing company") follow_checkbox = self.driver.find_element( - By.XPATH, "//label[contains(text(), 'stay up to date')]" + By.XPATH, "//label[@for='follow-company-checkbox']" ) follow_checkbox.click() except Exception as e: @@ -436,7 +436,7 @@ def _handle_dropdown_fields(self, element: WebElement) -> None: logger.debug(f"Detected question text: {question_text}") existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) + current_question_sanitized = self._sanitize_text(question_text) for item in self.all_data: if current_question_sanitized in item['question'] and item['type'] == 'dropdown': existing_answer = item['answer'] @@ -686,7 +686,7 @@ def split_text_by_width(text, font, font_size, max_width): def _fill_additional_questions(self, element: WebElement, job_context: JobContext) -> None: logger.debug("Filling additional questions") - try: + try: input_elements = element.find_elements( By.XPATH, ".//div[contains(@class, 'fb-dash-form-element')]" ) @@ -701,8 +701,8 @@ def _fill_additional_questions(self, element: WebElement, job_context: JobContex except Exception as section_error: logger.error(f"Error processing form element {index}: {section_error}") logger.debug(traceback.format_exc()) - except Exception as e: - logger.error(f"Error processing additional question element: {e}") + except Exception as e: + logger.error(f"Error processing additional question element: {e}") def _process_form_section(self,job_context : JobContext, section: WebElement) -> None: logger.debug("Processing form section") @@ -721,7 +721,7 @@ def _process_form_section(self,job_context : JobContext, section: WebElement) -> if self._find_and_handle_dropdown_question(job_context, section): logger.debug("Handled dropdown question") return - + def _handle_terms_of_service(self,job_context: JobContext, element: WebElement) -> bool: checkbox = element.find_elements(By.TAG_NAME, 'label') @@ -747,7 +747,7 @@ def _find_and_handle_radio_question(self, job_context: JobContext, section: WebE logger.debug(f"Question label found: {question.text}") options = [] - for radio in radios: + for radio in radios: label = section.find_element(By.CSS_SELECTOR, f"label[for='{radio.get_attribute('id')}']") options.append(label.text.strip().lower()) @@ -808,7 +808,7 @@ def _find_and_handle_textbox_question(self,job_context : JobContext, section: We # Look for existing answer if it's not a cover letter field existing_answer = None if not is_cover_letter: - current_question_sanitized = self._sanitize_text(question_text) + current_question_sanitized = self._sanitize_text(question_text) for item in self.all_data: if item['question'] == current_question_sanitized and item.get('type') == question_type: existing_answer = item['answer'] @@ -856,7 +856,7 @@ def _find_and_handle_date_question(self, job_context : JobContext, section: WebE answer_text = answer_date.strftime("%Y-%m-%d") existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) + current_question_sanitized = self._sanitize_text(question_text) for item in self.all_data: if current_question_sanitized in item['question'] and item['type'] == 'date': existing_answer = item @@ -876,10 +876,24 @@ def _find_and_handle_date_question(self, job_context : JobContext, section: WebE return True return False - def _find_and_handle_dropdown_question(self,job_context : JobContext, section: WebElement) -> bool: + def _find_and_handle_dropdown_question(self, job_context : JobContext, section: WebElement) -> bool: job_application = job_context.job_application + # in the event that there is one question, the question is outside the subsection, so we need to find the + # parents' text + try: - question = section.find_element(By.CLASS_NAME, 'fb-dash-form-element__label') + try: + question = section.find_element(By.CLASS_NAME, 'fb-dash-form-element__label') + question_text = question.text.lower() + except NoSuchElementException: + logger.debug(f"Unable to find subsection question, trying parent class...") + # parent = hash, grandparent = hash + span texts + grand_parent = section.find_element(By.XPATH, "../..") + # find the elements with texts + question = grand_parent.find_elements(By.TAG_NAME, "span") + # combine the texts + question = '\n'.join([question.text for question in question]) + question_text = question.lower() dropdowns = section.find_elements(By.TAG_NAME, "select") if not dropdowns: @@ -892,14 +906,14 @@ def _find_and_handle_dropdown_question(self,job_context : JobContext, section: W logger.debug(f"Dropdown options found: {options}") - question_text = question.text.lower() + question_text = question.text.lower() logger.debug(f"Processing dropdown or combobox question: {question_text}") current_selection = select.first_selected_option.text logger.debug(f"Current selection: {current_selection}") existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) + current_question_sanitized = self._sanitize_text(question_text) for item in self.all_data: if current_question_sanitized in item['question'] and item['type'] == 'dropdown': existing_answer = item['answer'] @@ -948,9 +962,9 @@ def _enter_text(self, element: WebElement, text: str) -> None: def _select_radio(self, radios: List[WebElement], answer: str) -> None: logger.debug(f"Selecting radio option: {answer}") for radio in radios: - try: - radio_id = radio.get_attribute('id') - label = radio.find_element(By.XPATH, f"//label[@for='{radio_id}']") + try: + radio_id = radio.get_attribute('id') + label = radio.find_element(By.XPATH, f"//label[@for='{radio_id}']") if answer in label.text.lower(): label.click() logger.debug(f"Clicked radio option: {label.text}") diff --git a/src/job_application_saver.py b/src/job_application_saver.py index a8554d2a..85f42c01 100644 --- a/src/job_application_saver.py +++ b/src/job_application_saver.py @@ -1,6 +1,7 @@ from src.logging import logger import os import json +import re import shutil from dataclasses import asdict @@ -24,7 +25,7 @@ def create_application_directory(self): job = self.job_application.job # Create a unique directory name using the application ID and company name - dir_name = f"{job.id} - {job.company} {job.title}" + dir_name = self._sanitize_filename("{job.id} - {job.company} {job.title}") dir_path = os.path.join(BASE_DIR, dir_name) # Create the directory if it doesn't exist @@ -32,6 +33,11 @@ def create_application_directory(self): self.job_application_files_path = dir_path return dir_path + @staticmethod + def _sanitize_filename(filename): + # Remove invalid characters + return re.sub(r'[<>:"/\\|?*\n]', '_', filename) + # Function to save the job application details as a JSON file def save_application_details(self): From 2e9168f6cfef3269012c375b7cfebac2757787c4 Mon Sep 17 00:00:00 2001 From: Beniamin Condrea <30630733+BeniaminC@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:00:09 -0800 Subject: [PATCH 09/14] Update job_application_saver.py Changed to f-string --- src/job_application_saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/job_application_saver.py b/src/job_application_saver.py index 85f42c01..ad8408a5 100644 --- a/src/job_application_saver.py +++ b/src/job_application_saver.py @@ -25,7 +25,7 @@ def create_application_directory(self): job = self.job_application.job # Create a unique directory name using the application ID and company name - dir_name = self._sanitize_filename("{job.id} - {job.company} {job.title}") + dir_name = self._sanitize_filename(f"{job.id} - {job.company} {job.title}") dir_path = os.path.join(BASE_DIR, dir_name) # Create the directory if it doesn't exist From fa8f4ec905e4dcaac2c9b20b3208c434270945a8 Mon Sep 17 00:00:00 2001 From: Beniamin Condrea <30630733+BeniaminC@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:08:01 -0800 Subject: [PATCH 10/14] Update linkedIn_easy_applier.py Fixed the question_text error --- src/ai_hawk/linkedIn_easy_applier.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ai_hawk/linkedIn_easy_applier.py b/src/ai_hawk/linkedIn_easy_applier.py index 0123a62f..d60a9a9b 100644 --- a/src/ai_hawk/linkedIn_easy_applier.py +++ b/src/ai_hawk/linkedIn_easy_applier.py @@ -395,7 +395,7 @@ def _process_form_element(self, element: WebElement, job_context : JobContext) - if self._is_upload_field(element): self._handle_upload_fields(element, job_context) elif self._is_filled(element): - self._fill_additional_questions(element,job_context) + self._fill_additional_questions(element, job_context) else: logger.debug("Element is not filled") @@ -697,16 +697,16 @@ def _fill_additional_questions(self, element: WebElement, job_context: JobContex logger.debug( f"Processing form element {index}/{len(input_elements)}: {section}" ) - self._process_form_section( job_context,section) + self._process_form_section(job_context, section) except Exception as section_error: logger.error(f"Error processing form element {index}: {section_error}") logger.debug(traceback.format_exc()) except Exception as e: logger.error(f"Error processing additional question element: {e}") - def _process_form_section(self,job_context : JobContext, section: WebElement) -> None: + def _process_form_section(self, job_context : JobContext, section: WebElement) -> None: logger.debug("Processing form section") - if self._handle_terms_of_service(job_context,section): + if self._handle_terms_of_service(job_context, section): logger.debug("Handled terms of service") return if self._find_and_handle_radio_question(job_context, section): @@ -906,7 +906,6 @@ def _find_and_handle_dropdown_question(self, job_context : JobContext, section: logger.debug(f"Dropdown options found: {options}") - question_text = question.text.lower() logger.debug(f"Processing dropdown or combobox question: {question_text}") current_selection = select.first_selected_option.text From 6ea2ff7f360d8582df6443b7bc9b509ac7873674 Mon Sep 17 00:00:00 2001 From: BeniaminC Date: Sat, 30 Nov 2024 02:59:40 -0800 Subject: [PATCH 11/14] combined company name and location into one find_element call --- src/ai_hawk/job_manager.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index 84d467c5..d0504cff 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -306,7 +306,6 @@ def apply_jobs(self): job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] for job in job_list: - logger.debug(f"Starting applicant for job: {job.title} at {job.company}") # TODO fix apply threshold """ @@ -479,23 +478,22 @@ def job_tile_to_job(self, job_tile) -> Job: except NoSuchElementException: logger.warning("Job link is missing.") - # Extract Company Name + # Extract company name and location try: - job.company = job_tile.find_element( + # contains both with a delimter '·' + company_location = job_tile.find_element( By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" - ).text.split('·')[0].strip() + ).text + company, location = company_location.split('·') + job.company = company.strip() logger.debug(f"Job company extracted: {job.company}") - except NoSuchElementException as e: - logger.warning(f"Job company is missing. {e} {traceback.format_exc()}") - - # Extract job Location - try: - job.location = job_tile.find_element( - By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" - ).text.split('·')[1].strip() + job.location = location.strip() logger.debug(f"Job location extracted: {job.location}") + except ValueError: + logger.debug(f"Could not find the company and location, trying original method...") + except NoSuchElementException: - logger.warning("Job location is missing.") + logger.warning(f"Job comapy and location are missing. {e} {traceback.format.exc()}") # Extract job State try: From 363351b9f161d99bf278197d553f73f73e501aa0 Mon Sep 17 00:00:00 2001 From: BeniaminC Date: Sat, 30 Nov 2024 03:16:09 -0800 Subject: [PATCH 12/14] Changed value error to warning --- src/ai_hawk/job_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index d0504cff..6f41b27d 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -490,7 +490,7 @@ def job_tile_to_job(self, job_tile) -> Job: job.location = location.strip() logger.debug(f"Job location extracted: {job.location}") except ValueError: - logger.debug(f"Could not find the company and location, trying original method...") + logger.warning(f"Could not find the company and location. {e} {traceback.format.exc()}") except NoSuchElementException: logger.warning(f"Job comapy and location are missing. {e} {traceback.format.exc()}") From eda81447681616d5f9184555915e5c4bdece0a38 Mon Sep 17 00:00:00 2001 From: feder-cr <85809106+feder-cr@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:28:47 +0100 Subject: [PATCH 13/14] Quick Fix for LinkedIn Automation + GPTParser + undetected-chromedriver --- main.py | 14 ++---- requirements.txt | 1 + src/ai_hawk/bot_facade.py | 3 +- src/ai_hawk/job_manager.py | 17 +++---- src/ai_hawk/llm/llm_manager.py | 87 ++++++++++++++++++++++++++++++---- src/utils/chrome_utils.py | 12 +++-- 6 files changed, 103 insertions(+), 31 deletions(-) diff --git a/main.py b/main.py index 6c0d98e3..da1e4b08 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,7 @@ from lib_resume_builder_AIHawk import Resume, FacadeManager, ResumeGenerator, StyleManager from typing import Optional from constants import PLAIN_TEXT_RESUME_YAML, SECRETS_YAML, WORK_PREFERENCES_YAML -from src.utils.chrome_utils import chrome_browser_options +from src.utils.chrome_utils import init_browser from src.job_application_profile import JobApplicationProfile from src.logging import logger @@ -26,6 +26,7 @@ from ai_hawk.bot_facade import AIHawkBotFacade from ai_hawk.job_manager import AIHawkJobManager from ai_hawk.llm.llm_manager import GPTAnswerer +from ai_hawk.llm.llm_manager import GPTParser class ConfigError(Exception): @@ -155,14 +156,6 @@ def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) - return result -def init_browser() -> webdriver.Chrome: - try: - options = chrome_browser_options() - service = ChromeService(ChromeDriverManager().install()) - return webdriver.Chrome(service=service, options=options) - except Exception as e: - raise RuntimeError(f"Failed to initialize browser: {str(e)}") - def create_and_run_bot(parameters, llm_api_key): try: style_manager = StyleManager() @@ -182,9 +175,10 @@ def create_and_run_bot(parameters, llm_api_key): login_component = get_authenticator(driver=browser, platform='linkedin') apply_component = AIHawkJobManager(browser) gpt_answerer_component = GPTAnswerer(parameters, llm_api_key) + gpt_parser_component = GPTParser(parameters, llm_api_key) bot = AIHawkBotFacade(login_component, apply_component) bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object) - bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager) + bot.set_gpt_answerer_and_resume_generator(gpt_parser_component, gpt_answerer_component, resume_generator_manager) bot.set_parameters(parameters) bot.start_login() if (parameters['collectMode'] == True): diff --git a/requirements.txt b/requirements.txt index c8bf8f1a..1097e155 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ webdriver-manager==4.0.2 pytest pytest-mock pytest-cov +undetected_chromedriver \ No newline at end of file diff --git a/src/ai_hawk/bot_facade.py b/src/ai_hawk/bot_facade.py index 1952a510..a11fbabf 100644 --- a/src/ai_hawk/bot_facade.py +++ b/src/ai_hawk/bot_facade.py @@ -46,12 +46,13 @@ def set_job_application_profile_and_resume(self, job_application_profile, resume logger.debug("Job application profile and resume set successfully") - def set_gpt_answerer_and_resume_generator(self, gpt_answerer_component, resume_generator_manager): + def set_gpt_answerer_and_resume_generator(self, gpt_parser_component, gpt_answerer_component, resume_generator_manager): logger.debug("Setting GPT answerer and resume generator") self._ensure_job_profile_and_resume_set() gpt_answerer_component.set_job_application_profile(self.job_application_profile) gpt_answerer_component.set_resume(self.resume) self.apply_component.set_gpt_answerer(gpt_answerer_component) + self.apply_component.set_gpt_parser(gpt_parser_component) self.apply_component.set_resume_generator_manager(resume_generator_manager) self.state.gpt_answerer_set = True logger.debug("GPT answerer and resume generator set successfully") diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py index 6f41b27d..f14a8c8c 100644 --- a/src/ai_hawk/job_manager.py +++ b/src/ai_hawk/job_manager.py @@ -10,7 +10,7 @@ from inputimeout import inputimeout, TimeoutOccurred from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By - +from src.ai_hawk.llm.llm_manager import GPTAnswerer from ai_hawk.linkedIn_easy_applier import AIHawkEasyApplier from config import JOB_MAX_APPLICATIONS, JOB_MIN_APPLICATIONS, MINIMUM_WAIT_TIME_IN_SECONDS @@ -82,6 +82,10 @@ def set_parameters(self, parameters): def set_gpt_answerer(self, gpt_answerer): logger.debug("Setting GPT answerer") self.gpt_answerer = gpt_answerer + + def set_gpt_parser(self, gpt_parser): + logger.debug("Setting GPT parser") + self.gpt_parser = gpt_parser def set_resume_generator_manager(self, resume_generator_manager): logger.debug("Setting resume generator manager") @@ -168,7 +172,7 @@ def start_applying(self): try: self.apply_jobs() except Exception as e: - logger.error(f"Error during job application: {e} {traceback.format_exc()}") + logger.error(f"Error during job application: {e}") continue logger.debug("Applying to jobs on this page has been completed!") @@ -481,18 +485,15 @@ def job_tile_to_job(self, job_tile) -> Job: # Extract company name and location try: # contains both with a delimter '·' - company_location = job_tile.find_element( - By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span" - ).text - company, location = company_location.split('·') + company, location = self.gpt_parser.extract_company_and_title(job_tile.get_attribute("outerHTML")) job.company = company.strip() logger.debug(f"Job company extracted: {job.company}") job.location = location.strip() logger.debug(f"Job location extracted: {job.location}") - except ValueError: + except ValueError as e: logger.warning(f"Could not find the company and location. {e} {traceback.format.exc()}") - except NoSuchElementException: + except NoSuchElementException as e: logger.warning(f"Job comapy and location are missing. {e} {traceback.format.exc()}") # Extract job State diff --git a/src/ai_hawk/llm/llm_manager.py b/src/ai_hawk/llm/llm_manager.py index b18f6f98..fe94a6af 100644 --- a/src/ai_hawk/llm/llm_manager.py +++ b/src/ai_hawk/llm/llm_manager.py @@ -461,7 +461,7 @@ def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]: class GPTAnswerer: def __init__(self, config, llm_api_key): self.ai_adapter = AIAdapter(config, llm_api_key) - self.llm_cheap = LoggerChatModel(self.ai_adapter) + self.llm = LoggerChatModel(self.ai_adapter) @property def job_description(self): @@ -512,7 +512,7 @@ def summarize_job_description(self, text: str) -> str: prompts.summarize_prompt_template ) prompt = ChatPromptTemplate.from_template(prompts.summarize_prompt_template) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_output = chain.invoke({TEXT: text}) output = self._clean_llm_output(raw_output) logger.debug(f"Summary generated: {output}") @@ -521,7 +521,7 @@ def summarize_job_description(self, text: str) -> str: def _create_chain(self, template: str): logger.debug(f"Creating chain with template: {template}") prompt = ChatPromptTemplate.from_template(template) - return prompt | self.llm_cheap | StrOutputParser() + return prompt | self.llm | StrOutputParser() def answer_question_textual_wide_range(self, question: str) -> str: logger.debug(f"Answering textual question: {question}") @@ -558,7 +558,7 @@ def answer_question_textual_wide_range(self, question: str) -> str: } prompt = ChatPromptTemplate.from_template(prompts.determine_section_template) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_output = chain.invoke({QUESTION: question}) output = self._clean_llm_output(raw_output) @@ -615,7 +615,7 @@ def answer_question_numeric( prompts.numeric_question_template ) prompt = ChatPromptTemplate.from_template(func_template) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_output_str = chain.invoke( { RESUME_EDUCATIONS: self.resume.education_details, @@ -650,7 +650,7 @@ def answer_question_from_options(self, question: str, options: list[str]) -> str logger.debug(f"Answering question from options: {question}") func_template = self._preprocess_template_string(prompts.options_template) prompt = ChatPromptTemplate.from_template(func_template) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_output_str = chain.invoke( { RESUME: self.resume, @@ -672,7 +672,7 @@ def resume_or_cover(self, phrase: str) -> str: prompt = ChatPromptTemplate.from_template( prompts.resume_or_cover_letter_template ) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_response = chain.invoke({PHRASE: phrase}) response = self._clean_llm_output(raw_response) logger.debug(f"Response for resume_or_cover: {response}") @@ -686,7 +686,7 @@ def resume_or_cover(self, phrase: str) -> str: def is_job_suitable(self): logger.info("Checking if job is suitable") prompt = ChatPromptTemplate.from_template(prompts.is_relavant_position_template) - chain = prompt | self.llm_cheap | StrOutputParser() + chain = prompt | self.llm | StrOutputParser() raw_output = chain.invoke( { RESUME: self.resume, @@ -707,3 +707,74 @@ def is_job_suitable(self): if int(score) < JOB_SUITABILITY_SCORE: logger.debug(f"Job is not suitable: {reasoning}") return int(score) >= JOB_SUITABILITY_SCORE + +import re +import json +import logging + +logger = logging.getLogger(__name__) + +class GPTParser: + def __init__(self, config, llm_api_key): + self.ai_adapter = AIAdapter(config, llm_api_key) + self.llm = LoggerChatModel(self.ai_adapter) + + @staticmethod + def _clean_llm_output(output: str) -> str: + return output.replace("*", "").replace("#", "").strip() + + @staticmethod + def _preprocess_template_string(template: str) -> str: + return textwrap.dedent(template) + + def extract_company_and_title(self, html_content: str) -> dict: + """ + Uses AI to extract the company name and job title from HTML code. + + Args: + html_content (str): The HTML code to analyze. + + Returns: + dict: A dictionary with 'company' and 'title' as keys. + """ + logger.debug("Extracting company and title from HTML content.") + + # AI prompt template + extract_prompt_template = """ + You are an AI assistant extracting information from HTML code. + Extract the company name and job title from the following HTML code: + + {html_content} + + Provide the response in JSON format with keys "company" and "title", Provide only the exact JSON without any explanations or additional text and also without ```json ``` + + """ + + # Preprocess the template + extract_prompt_template = self._preprocess_template_string(extract_prompt_template) + + # Create the prompt + prompt = ChatPromptTemplate.from_template(extract_prompt_template) + + # Create the chain + chain = prompt | self.llm | StrOutputParser() + + # Invoke the chain with the HTML + raw_output = chain.invoke({"html_content": html_content}) + + # Clean the output + output = self._clean_llm_output(raw_output) + logger.debug(f"Raw output from AI: {output}") + + # Parse the JSON output + try: + result = json.loads(output) + company = result.get('company', 'Company not found') + title = result.get('title', 'Title not found') + except json.JSONDecodeError as e: + logger.error(f"JSON decoding failed: {e}") + company = 'Company not found' + title = 'Title not found' + + logger.debug(f"Extracted company: {company}, title: {title}") + return company, title diff --git a/src/utils/chrome_utils.py b/src/utils/chrome_utils.py index 3d3a84ac..612e7766 100644 --- a/src/utils/chrome_utils.py +++ b/src/utils/chrome_utils.py @@ -1,5 +1,5 @@ import os -from selenium import webdriver +import undetected_chromedriver as uc from src.logging import logger chromeProfilePath = os.path.join(os.getcwd(), "chrome_profile", "linkedin_profile") @@ -18,7 +18,7 @@ def ensure_chrome_profile(): def chrome_browser_options(): logger.debug("Setting Chrome browser options") ensure_chrome_profile() - options = webdriver.ChromeOptions() + options = uc.ChromeOptions() options.add_argument("--start-maximized") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") @@ -37,7 +37,6 @@ def chrome_browser_options(): options.add_argument("--disable-plugins") options.add_argument("--disable-animations") options.add_argument("--disable-cache") - options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) prefs = { "profile.default_content_setting_values.images": 2, @@ -57,4 +56,9 @@ def chrome_browser_options(): return options - +def init_browser() -> uc.Chrome: + try: + options = chrome_browser_options() + return uc.Chrome(options=options) + except Exception as e: + raise RuntimeError(f"Failed to initialize browser: {str(e)}") From 0f0a0b24b26c3471129cb9e030b642fb94a3107a Mon Sep 17 00:00:00 2001 From: BeniaminC Date: Sat, 30 Nov 2024 15:04:24 -0800 Subject: [PATCH 14/14] Removed imports in the middle of the code --- src/ai_hawk/llm/llm_manager.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ai_hawk/llm/llm_manager.py b/src/ai_hawk/llm/llm_manager.py index fe94a6af..192e2693 100644 --- a/src/ai_hawk/llm/llm_manager.py +++ b/src/ai_hawk/llm/llm_manager.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Dict, List, Union + import httpx from dotenv import load_dotenv from langchain_core.messages import BaseMessage @@ -505,7 +506,7 @@ def set_job_application_profile(self, job_application_profile): def _clean_llm_output(self, output: str) -> str: return output.replace("*", "").replace("#", "").strip() - + def summarize_job_description(self, text: str) -> str: logger.debug(f"Summarizing job description: {text}") prompts.summarize_prompt_template = self._preprocess_template_string( @@ -708,11 +709,7 @@ def is_job_suitable(self): logger.debug(f"Job is not suitable: {reasoning}") return int(score) >= JOB_SUITABILITY_SCORE -import re -import json -import logging -logger = logging.getLogger(__name__) class GPTParser: def __init__(self, config, llm_api_key):