Spaces:
Runtime error
Runtime error
| import boto3 | |
| import os | |
| import json | |
| import pandas as pd | |
| from urllib.parse import urlparse | |
| import random | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.chains import LLMChain, SequentialChain | |
| llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"]) | |
| def generate_skills() -> list: | |
| template_generate_skills = """ | |
| Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role? | |
| Return 10 skills as a JSON list. | |
| """ | |
| prompt_generate_skills = ChatPromptTemplate.from_template( | |
| template=template_generate_skills | |
| ) | |
| role_skills = LLMChain( | |
| llm=llm, prompt=prompt_generate_skills, output_key="role_skills" | |
| ) | |
| generate_skills_chain = SequentialChain( | |
| chains=[role_skills], | |
| input_variables=[], | |
| output_variables=["role_skills"], | |
| verbose=False, | |
| ) | |
| result = generate_skills_chain({}) | |
| result_array = json.loads(result["role_skills"])["skills"] | |
| return result_array | |
| def generate_resume(skills: list) -> str: | |
| template_generate_resume = """ | |
| Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience. | |
| Make sure to include a section "skills" in the resume. | |
| ``` | |
| {skills} | |
| ``` | |
| """ | |
| prompt_generate_resume = ChatPromptTemplate.from_template( | |
| template=template_generate_resume | |
| ) | |
| resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume") | |
| generate_resume_chain = SequentialChain( | |
| chains=[resume], | |
| input_variables=["skills"], | |
| output_variables=["resume"], | |
| verbose=False, | |
| ) | |
| result = generate_resume_chain({"skills": skills}) | |
| return result | |
| def retrieve_skills(resume: str) -> str: | |
| template_retrieve_skills = """ | |
| Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses. | |
| Return them as a JSON list. | |
| ``` | |
| {resume} | |
| ``` | |
| """ | |
| prompt_retrieve_skills = ChatPromptTemplate.from_template( | |
| template=template_retrieve_skills | |
| ) | |
| skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills") | |
| retrieve_skills_chain = SequentialChain( | |
| chains=[skills], | |
| input_variables=["resume"], | |
| output_variables=["skills"], | |
| verbose=False, | |
| ) | |
| result = retrieve_skills_chain({"resume": resume}) | |
| result_array = json.loads(result["skills"]) | |
| return result_array | |
| def get_score(true_values: list, predicted_values: list) -> float: | |
| intersection_list = [value for value in predicted_values if value in true_values] | |
| print(intersection_list) | |
| return len(intersection_list) / len(true_values) | |
| if __name__ == "__main__": | |
| role_skills = generate_skills() | |
| random_skills = random.sample(role_skills, 3) | |
| resume = generate_resume(random_skills) | |
| skills = retrieve_skills(resume) | |
| score = get_score(random_skills, skills) | |
| print(random_skills) | |
| print(skills) | |
| print(score) | |
| # def get_resumes() -> str: | |
| # s3 = boto3.client( | |
| # 's3', | |
| # region_name='eu-west-1' | |
| # ) | |
| # resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv') | |
| # resumes_list = resumes['Body'].read().decode('utf-8').splitlines() | |
| # resumes_list = resumes['Body'].read().decode('utf-8').splitlines() | |
| # resumes_list = str(resumes_list).replace('. ', '.\n') | |
| # resumes_list = str(resumes_list).replace('â¢', '\n - ') | |
| # resumes_list = [s.replace('. ', '.\n') for s in resumes_list] | |
| # resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list] | |
| # resume_string =''.join(resumes_list) | |
| # s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl() | |
| # resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist() | |
| # return resumes_list | |
| # def get_skills(resumes: str) -> list: | |
| # template_resumes_get_skills = """ | |
| # Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain. | |
| # For each domain list the skills of the resumes that are part of that domain. | |
| # Create a JSON object where they keys are the domains and the values are a list containing the skills. | |
| # Return that JSON object only. | |
| # <RESUMES> | |
| # {resumes} | |
| # </RESUMES> | |
| # """ | |
| # prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills) | |
| # resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills") | |
| # get_skills_resumes_chain = SequentialChain( | |
| # chains=[resume_skills], | |
| # input_variables=["resumes"], | |
| # output_variables=["resume_skills"], | |
| # verbose=False | |
| # ) | |
| # result = get_skills_resumes_chain({"resumes": resumes}) | |
| # # print(result) | |
| # resume_skills = json.loads(result['resume_skills']) | |
| # print(resume_skills) | |
| # if __name__ == "__main__": | |
| # resumes = get_resumes() | |
| # print(resumes) | |
| # for x in resumes: | |
| # get_skills(x) | |