Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quick Fix for LinkedIn Automation + GPTParser + undetected-chromedriver #972

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lib_resume_builder_AIHawk import Resume, FacadeManager, ResumeGenerator, StyleManager
from typing import Optional
from constants import PLAIN_TEXT_RESUME_YAML, SECRETS_YAML, WORK_PREFERENCES_YAML
from src.utils.chrome_utils import chrome_browser_options
from src.utils.chrome_utils import init_browser

from src.job_application_profile import JobApplicationProfile
from src.logging import logger
Expand All @@ -26,6 +26,7 @@
from ai_hawk.bot_facade import AIHawkBotFacade
from ai_hawk.job_manager import AIHawkJobManager
from ai_hawk.llm.llm_manager import GPTAnswerer
from ai_hawk.llm.llm_manager import GPTParser


class ConfigError(Exception):
Expand Down Expand Up @@ -155,14 +156,6 @@ def file_paths_to_dict(resume_file: Path | None, plain_text_resume_file: Path) -

return result

def init_browser() -> webdriver.Chrome:
try:
options = chrome_browser_options()
service = ChromeService(ChromeDriverManager().install())
return webdriver.Chrome(service=service, options=options)
except Exception as e:
raise RuntimeError(f"Failed to initialize browser: {str(e)}")

def create_and_run_bot(parameters, llm_api_key):
try:
style_manager = StyleManager()
Expand All @@ -182,9 +175,10 @@ def create_and_run_bot(parameters, llm_api_key):
login_component = get_authenticator(driver=browser, platform='linkedin')
apply_component = AIHawkJobManager(browser)
gpt_answerer_component = GPTAnswerer(parameters, llm_api_key)
gpt_parser_component = GPTParser(parameters, llm_api_key)
bot = AIHawkBotFacade(login_component, apply_component)
bot.set_job_application_profile_and_resume(job_application_profile_object, resume_object)
bot.set_gpt_answerer_and_resume_generator(gpt_answerer_component, resume_generator_manager)
bot.set_gpt_answerer_and_resume_generator(gpt_parser_component, gpt_answerer_component, resume_generator_manager)
bot.set_parameters(parameters)
bot.start_login()
if (parameters['collectMode'] == True):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ webdriver-manager==4.0.2
pytest
pytest-mock
pytest-cov
undetected_chromedriver
3 changes: 2 additions & 1 deletion src/ai_hawk/bot_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@ def set_job_application_profile_and_resume(self, job_application_profile, resume
logger.debug("Job application profile and resume set successfully")


def set_gpt_answerer_and_resume_generator(self, gpt_answerer_component, resume_generator_manager):
def set_gpt_answerer_and_resume_generator(self, gpt_parser_component, gpt_answerer_component, resume_generator_manager):
logger.debug("Setting GPT answerer and resume generator")
self._ensure_job_profile_and_resume_set()
gpt_answerer_component.set_job_application_profile(self.job_application_profile)
gpt_answerer_component.set_resume(self.resume)
self.apply_component.set_gpt_answerer(gpt_answerer_component)
self.apply_component.set_gpt_parser(gpt_parser_component)
self.apply_component.set_resume_generator_manager(resume_generator_manager)
self.state.gpt_answerer_set = True
logger.debug("GPT answerer and resume generator set successfully")
Expand Down
92 changes: 52 additions & 40 deletions src/ai_hawk/job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from inputimeout import inputimeout, TimeoutOccurred
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By

from src.ai_hawk.llm.llm_manager import GPTAnswerer

from ai_hawk.linkedIn_easy_applier import AIHawkEasyApplier
from config import JOB_MAX_APPLICATIONS, JOB_MIN_APPLICATIONS, MINIMUM_WAIT_TIME_IN_SECONDS
Expand Down Expand Up @@ -82,6 +82,10 @@ def set_parameters(self, parameters):
def set_gpt_answerer(self, gpt_answerer):
logger.debug("Setting GPT answerer")
self.gpt_answerer = gpt_answerer

def set_gpt_parser(self, gpt_parser):
logger.debug("Setting GPT parser")
self.gpt_parser = gpt_parser

def set_resume_generator_manager(self, resume_generator_manager):
logger.debug("Setting resume generator manager")
Expand Down Expand Up @@ -168,7 +172,7 @@ def start_applying(self):
try:
self.apply_jobs()
except Exception as e:
logger.error(f"Error during job application: {e} {traceback.format_exc()}")
logger.error(f"Error during job application: {e}")
continue

logger.debug("Applying to jobs on this page has been completed!")
Expand Down Expand Up @@ -253,8 +257,10 @@ def get_jobs_from_page(self, scroll=False):
pass

try:
# XPath query to find the ul tag with class scaffold-layout__list-container
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
# XPath query to find the ul tag with class scaffold-layout__list
jobs_xpath_query = (
"//div[contains(@class, 'scaffold-layout__list-detail-container')]//ul"
)
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query)

if scroll:
Expand All @@ -264,7 +270,10 @@ def get_jobs_from_page(self, scroll=False):
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement)
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True)

job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")
job_element_list = jobs_container.find_elements(
By.XPATH,
".//li[contains(@class, 'scaffold-layout__list-item') and contains(@class, 'ember-view')]",
)

if not job_element_list:
logger.debug("No job class elements found on page, skipping.")
Expand All @@ -283,8 +292,8 @@ def get_jobs_from_page(self, scroll=False):
def read_jobs(self):

job_element_list = self.get_jobs_from_page()
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]
for job in job_list:
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]
for job in job_list:
if self.is_blacklisted(job.title, job.company, job.link, job.location):
logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...")
self.write_to_file(job, "skipped")
Expand All @@ -301,9 +310,8 @@ def apply_jobs(self):
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]

for job in job_list:

logger.debug(f"Starting applicant for job: {job.title} at {job.company}")
#TODO fix apply threshold
# TODO fix apply threshold
"""
# Initialize applicants_count as None
applicants_count = None
Expand Down Expand Up @@ -355,7 +363,6 @@ def apply_jobs(self):

# Continue with the job application process regardless of the applicants count check
"""


if self.is_previously_failed_to_apply(job.link):
logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...")
Expand Down Expand Up @@ -392,10 +399,10 @@ def write_to_file(self, job : Job, file_name, reason=None):
"job_location": job.location,
"pdf_path": pdf_path
}

if reason:
data["reason"] = reason

file_path = self.output_file_directory / f"{file_name}.json"
if not file_path.exists():
with open(file_path, 'w', encoding='utf-8') as f:
Expand Down Expand Up @@ -428,8 +435,7 @@ def get_base_search_url(self, parameters):
if working_type_filter:
url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}")

experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if
v]
experience_levels = [str(i + 1) for i, (level, v) in enumerate(parameters.get('experience_level', {}).items()) if v]
if experience_levels:
url_parts.append(f"f_E={','.join(experience_levels)}")
url_parts.append(f"distance={parameters['distance']}")
Expand All @@ -455,29 +461,52 @@ def next_job_page(self, position, location, job_page):
self.driver.get(
f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={job_page * 25}")


def job_tile_to_job(self, job_tile) -> Job:
logger.debug("Extracting job information from tile")
job = Job()

# Extract job Title
try:
job.title = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').find_element(By.TAG_NAME, 'strong').text
title_element = job_tile.find_element(
By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__title')]//a"
)
job.title = title_element.text.strip()
logger.debug(f"Job title extracted: {job.title}")
except NoSuchElementException:
logger.warning("Job title is missing.")


# Extract job Link
try:
job.link = job_tile.find_element(By.CLASS_NAME, 'job-card-list__title').get_attribute('href').split('?')[0]
job.link = title_element.get_attribute("href").split("?")[0]
logger.debug(f"Job link extracted: {job.link}")
except NoSuchElementException:
logger.warning("Job link is missing.")

# Extract company name and location
try:
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
# contains both with a delimter '·'
company, location = self.gpt_parser.extract_company_and_title(job_tile.get_attribute("outerHTML"))
job.company = company.strip()
logger.debug(f"Job company extracted: {job.company}")
job.location = location.strip()
logger.debug(f"Job location extracted: {job.location}")
except ValueError as e:
logger.warning(f"Could not find the company and location. {e} {traceback.format.exc()}")

except NoSuchElementException as e:
logger.warning(f'Job company is missing. {e} {traceback.format_exc()}')

logger.warning(f"Job comapy and location are missing. {e} {traceback.format.exc()}")

# Extract job State
try:
job_state = job_tile.find_element(
By.XPATH,
".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-item')]",
).text
logger.debug(f"Job state extracted: {job_state}")
job.apply_method = job_state
except NoSuchElementException as e:
logger.warning(f"Apply method and state not found. {e} {traceback.format_exc()}")

# Extract job ID from job url
try:
match = re.search(r'/jobs/view/(\d+)/', job.link)
Expand All @@ -489,23 +518,6 @@ def job_tile_to_job(self, job_tile) -> Job:
except Exception as e:
logger.warning(f"Failed to extract job ID: {e}", exc_info=True)

try:
job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text
except NoSuchElementException:
logger.warning("Job location is missing.")


try:
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text
except NoSuchElementException as e:
try:
# Fetching state when apply method is not found
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text
job.apply_method = "Applied"
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}')
except NoSuchElementException as e:
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}')

return job

def is_blacklisted(self, job_title, company, link, job_location):
Expand Down Expand Up @@ -559,10 +571,10 @@ def is_previously_failed_to_apply(self, link):
except json.JSONDecodeError:
logger.error(f"JSON decode error in file: {file_path}")
return False

for data in existing_data:
data_link = data['link']
if data_link == link:
return True

return False
Loading