Spaces:

drift-ai
/

recruiter-assistant

Runtime error

App Files Files Community

recruiter-assistant / test.py

FilipinosRich

Ran black

5f111f9 over 2 years ago

raw

history blame

5.27 kB

	import boto3
	import os
	import json
	import pandas as pd
	from urllib.parse import urlparse
	import random

	from langchain.chat_models import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	from langchain.chains import LLMChain, SequentialChain

	llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])


	def generate_skills() -> list:
	template_generate_skills = """
	Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role?
	Return 10 skills as a JSON list.
	"""

	prompt_generate_skills = ChatPromptTemplate.from_template(
	template=template_generate_skills
	)
	role_skills = LLMChain(
	llm=llm, prompt=prompt_generate_skills, output_key="role_skills"
	)

	generate_skills_chain = SequentialChain(
	chains=[role_skills],
	input_variables=[],
	output_variables=["role_skills"],
	verbose=False,
	)

	result = generate_skills_chain({})
	result_array = json.loads(result["role_skills"])["skills"]
	return result_array


	def generate_resume(skills: list) -> str:
	template_generate_resume = """
	Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience.
	Make sure to include a section "skills" in the resume.

	```
	{skills}
	```
	"""

	prompt_generate_resume = ChatPromptTemplate.from_template(
	template=template_generate_resume
	)
	resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume")

	generate_resume_chain = SequentialChain(
	chains=[resume],
	input_variables=["skills"],
	output_variables=["resume"],
	verbose=False,
	)

	result = generate_resume_chain({"skills": skills})

	return result


	def retrieve_skills(resume: str) -> str:
	template_retrieve_skills = """
	Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses.
	Return them as a JSON list.

	```
	{resume}
	```
	"""

	prompt_retrieve_skills = ChatPromptTemplate.from_template(
	template=template_retrieve_skills
	)
	skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills")

	retrieve_skills_chain = SequentialChain(
	chains=[skills],
	input_variables=["resume"],
	output_variables=["skills"],
	verbose=False,
	)

	result = retrieve_skills_chain({"resume": resume})
	result_array = json.loads(result["skills"])

	return result_array


	def get_score(true_values: list, predicted_values: list) -> float:
	intersection_list = [value for value in predicted_values if value in true_values]
	print(intersection_list)
	return len(intersection_list) / len(true_values)


	if __name__ == "__main__":
	role_skills = generate_skills()
	random_skills = random.sample(role_skills, 3)
	resume = generate_resume(random_skills)
	skills = retrieve_skills(resume)
	score = get_score(random_skills, skills)
	print(random_skills)
	print(skills)
	print(score)

	# def get_resumes() -> str:

	# s3 = boto3.client(
	# 's3',
	# region_name='eu-west-1'
	# )

	# resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')

	# resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
	# resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
	# resumes_list = str(resumes_list).replace('. ', '.\n')
	# resumes_list = str(resumes_list).replace('â¢', '\n - ')
	# resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
	# resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list]
	# resume_string =''.join(resumes_list)
	# s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl()
	# resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist()

	# return resumes_list

	# def get_skills(resumes: str) -> list:

	# template_resumes_get_skills = """
	# Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
	# For each domain list the skills of the resumes that are part of that domain.

	# Create a JSON object where they keys are the domains and the values are a list containing the skills.

	# Return that JSON object only.

	# <RESUMES>
	# {resumes}
	# </RESUMES>
	# """

	# prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
	# resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")

	# get_skills_resumes_chain = SequentialChain(
	# chains=[resume_skills],
	# input_variables=["resumes"],
	# output_variables=["resume_skills"],
	# verbose=False
	# )

	# result = get_skills_resumes_chain({"resumes": resumes})
	# # print(result)
	# resume_skills = json.loads(result['resume_skills'])
	# print(resume_skills)

	# if __name__ == "__main__":
	# resumes = get_resumes()
	# print(resumes)
	# for x in resumes:
	# get_skills(x)