project / app.py
ArchiMathur's picture
Update app.py
ee35562 verified
# import gradio as gr
# import pandas as pd
# import numpy as np
# import pickle
# import sklearn
# from datasets import load_dataset
# import joblib
# import requests
# # Read the data
# data = pd.read_csv("mldata.csv")
# # Function to load model based on selection
# def load_model(model_choice):
# if model_choice == "Random Forest":
# with open('rfweights (1).pkl', 'rb') as pickleFile:
# return pickle.load(pickleFile)
# elif model_choice == "Decision Tree":
# with open('dtreeweights.pkl', 'rb') as pickleFile:
# return pickle.load(pickleFile)
# else:
# raise ValueError("Invalid model selection")
# # Prepare categorical data (same as original code)
# categorical_cols = data[[
# 'certifications',
# 'workshops',
# 'Interested subjects',
# 'interested career area ',
# 'Type of company want to settle in?',
# 'Interested Type of Books'
# ]]
# # Assign category codes
# for i in categorical_cols:
# data[i] = data[i].astype('category')
# data[i] = data[i].cat.codes
# # Create reference dictionaries for embeddings (same as original code)
# def create_embedding_dict(column):
# unique_names = list(categorical_cols[column].unique())
# unique_codes = list(data[column].unique())
# return dict(zip(unique_names, unique_codes))
# certificates_references = create_embedding_dict('certifications')
# workshop_references = create_embedding_dict('workshops')
# subjects_interest_references = create_embedding_dict('Interested subjects')
# career_interest_references = create_embedding_dict('interested career area ')
# company_intends_references = create_embedding_dict('Type of company want to settle in?')
# book_interest_references = create_embedding_dict('Interested Type of Books')
# # Prediction function (modified to accept model choice)
# def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
# self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
# subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
# team_player, management_technical, smart_hardworker):
# # Load the selected model
# rfmodel = load_model(model_choice)
# # Create DataFrame (same as original code)
# df = pd.DataFrame.from_dict(
# {
# "logical_thinking": [logical_thinking],
# "hackathon_attend": [hackathon_attend],
# "coding_skills": [coding_skills],
# "public_speaking_skills": [public_speaking_skills],
# "self_learning": [self_learning],
# "extra_course": [extra_course],
# "certificate": [certificate_code],
# "workshop": [worskhop_code],
# "read_writing_skills": [
# (0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
# ],
# "memory_capability": [
# (0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)
# ],
# "subject_interest": [subject_interest],
# "career_interest": [career_interest],
# "company_intend": [company_intend],
# "senior_elder_advise": [senior_elder_advise],
# "book_interest": [book_interest],
# "introvert_extro": [introvert_extro],
# "team_player": [team_player],
# "management_technical":[management_technical],
# "smart_hardworker": [smart_hardworker]
# }
# )
# # Replace string values with numeric representations
# df = df.replace({
# "certificate": certificates_references,
# "workshop": workshop_references,
# "subject_interest": subjects_interest_references,
# "career_interest": career_interest_references,
# "company_intend": company_intends_references,
# "book_interest": book_interest_references
# })
# # Dummy encoding (same as original code)
# userdata_list = df.values.tolist()
# # Management-Technical dummy encoding
# if(df["management_technical"].values == "Management"):
# userdata_list[0].extend([1])
# userdata_list[0].extend([0])
# userdata_list[0].remove('Management')
# elif(df["management_technical"].values == "Technical"):
# userdata_list[0].extend([0])
# userdata_list[0].extend([1])
# userdata_list[0].remove('Technical')
# else:
# return "Error in Management-Technical encoding"
# # Smart-Hard worker dummy encoding
# if(df["smart_hardworker"].values == "smart worker"):
# userdata_list[0].extend([1])
# userdata_list[0].extend([0])
# userdata_list[0].remove('smart worker')
# elif(df["smart_hardworker"].values == "hard worker"):
# userdata_list[0].extend([0])
# userdata_list[0].extend([1])
# userdata_list[0].remove('hard worker')
# else:
# return "Error in Smart-Hard worker encoding"
# # Prediction
# prediction_result_all = rfmodel.predict_proba(userdata_list)
# # Create result dictionary
# result_list = {
# "Applications Developer": float(prediction_result_all[0][0]),
# "CRM Technical Developer": float(prediction_result_all[0][1]),
# "Database Developer": float(prediction_result_all[0][2]),
# "Mobile Applications Developer": float(prediction_result_all[0][3]),
# "Network Security Engineer": float(prediction_result_all[0][4]),
# "Software Developer": float(prediction_result_all[0][5]),
# "Software Engineer": float(prediction_result_all[0][6]),
# "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
# "Systems Security Administrator": float(prediction_result_all[0][8]),
# "Technical Support": float(prediction_result_all[0][9]),
# "UX Designer": float(prediction_result_all[0][10]),
# "Web Developer": float(prediction_result_all[0][11]),
# }
# return result_list
# # Lists for dropdown menus (same as original code)
# cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
# workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
# skill = ["excellent", "medium", "poor"]
# subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
# career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
# company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
# book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
# Choice_list = ["Management", "Technical"]
# worker_list = ["hard worker", "smart worker"]
# # Create Gradio interface (modified to include model selection)
# demo = gr.Interface(
# fn=rfprediction,
# inputs=[
# gr.Dropdown(["Random Forest", "Decision Tree"], label="Select Machine Learning Model"),
# gr.Textbox(placeholder="What is your name?", label="Name"),
# gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
# gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
# gr.Slider(minimum=1, maximum=9, value=5, step=1, label="How do you rate your coding skills?", info="Scale: 1 - 9"),
# gr.Slider(minimum=1, maximum=9, value=3, step=1, label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
# gr.Radio({"Yes", "No"}, type="index", label="Are you a self-learning person? *"),
# gr.Radio({"Yes", "No"}, type="index", label="Do you take extra courses in uni (other than IT)? *"),
# gr.Dropdown(cert_list, label="Select a certificate you took!"),
# gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
# gr.Dropdown(skill, label="Select your read and writing skill"),
# gr.Dropdown(skill, label="Is your memory capability good?"),
# gr.Dropdown(subject_list, label="What subject you are interested in?"),
# gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
# gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
# gr.Radio({"Yes", "No"}, type="index", label="Do you ever seek any advices from senior or elders? *"),
# gr.Dropdown(book_list, label="Select your interested genre of book!"),
# gr.Radio({"Yes", "No"}, type="index", label="Are you an Introvert?| No - extrovert *"),
# gr.Radio({"Yes", "No"}, type="index", label="Ever worked in a team? *"),
# gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
# gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
# ],
# outputs=gr.Label(num_top_classes=5),
# title="IT-Career Recommendation System: TMI4033 Colletive Intelligence, Group 12",
# description="Members: Derrick Lim Kin Yeap 74597, Jason Jong Sheng Tat 75125, Jason Ng Yong Xing 75127, Muhamad Hazrie Bin Suhkery 73555 "
# )
# url = "https://jobs-api14.p.rapidapi.com/v2/list"
# querystring = {
# "query":"Web Developer",
# "location":"India",
# "autoTranslateLocation":"false",
# "remoteOnly":"false",
# "employmentTypes":"fulltime;parttime;intern;contractor"
# }
# headers = {
# "x-rapidapi-key": "714f5a2539msh798d996c3243876p19c71ajsnfcd7ce481cb9",
# "x-rapidapi-host": "jobs-api14.p.rapidapi.com"
# }
# # Main execution
# if __name__ == "__main__":
# # Fetch job listings before launching the app
# try:
# response = requests.get(url, headers=headers, params=querystring)
# job_listings = response.json()
# print("Job Listings Retrieved Successfully")
# # You could potentially store or process job_listings here
# except requests.RequestException as e:
# print(f"Error fetching job listings: {e}")
# demo.launch(share=True)
import gradio as gr
import pandas as pd
import numpy as np
import pickle
import sklearn
from datasets import load_dataset
import joblib
import requests
# Read the data
data = pd.read_csv("mldata.csv")
# Function to load model based on selection
def load_model(model_choice):
if model_choice == "Random Forest":
with open('rfweights (1).pkl', 'rb') as pickleFile:
return pickle.load(pickleFile)
elif model_choice == "Decision Tree":
with open('dtreeweights.pkl', 'rb') as pickleFile:
return pickle.load(pickleFile)
else:
raise ValueError("Invalid model selection")
# Prepare categorical data
categorical_cols = data[[
'certifications',
'workshops',
'Interested subjects',
'interested career area ',
'Type of company want to settle in?',
'Interested Type of Books'
]]
# Assign category codes
for i in categorical_cols:
data[i] = data[i].astype('category')
data[i] = data[i].cat.codes
# Create reference dictionaries for embeddings
def create_embedding_dict(column):
unique_names = list(categorical_cols[column].unique())
unique_codes = list(data[column].unique())
return dict(zip(unique_names, unique_codes))
certificates_references = create_embedding_dict('certifications')
workshop_references = create_embedding_dict('workshops')
subjects_interest_references = create_embedding_dict('Interested subjects')
career_interest_references = create_embedding_dict('interested career area ')
company_intends_references = create_embedding_dict('Type of company want to settle in?')
book_interest_references = create_embedding_dict('Interested Type of Books')
# Function to fetch job listings
def fetch_job_listings(job_title):
url = "https://jobs-api14.p.rapidapi.com/v2/list"
querystring = {
"query": job_title,
"location": "India",
"autoTranslateLocation": "false",
"remoteOnly": "false",
"employmentTypes": "fulltime;parttime;intern;contractor"
}
headers = {
"x-rapidapi-key": "714f5a2539msh798d996c3243876p19c71ajsnfcd7ce481cb9",
"x-rapidapi-host": "jobs-api14.p.rapidapi.com"
}
try:
response = requests.get(url, headers=headers, params=querystring)
job_data = response.json()
# Process and format job listings
if job_data.get('jobs'):
job_listings = []
for job in job_data['jobs'][:5]: # Limit to 5 job listings
job_listings.append([
job.get('title', 'N/A'),
job.get('company', 'N/A'),
job.get('location', 'N/A'),
job.get('salary', 'Not specified')
])
return job_listings
else:
return [['No job listings', 'found', 'for this', 'career path']]
except requests.RequestException as e:
return [['Error', 'fetching', 'job listings', str(e)]]
# Prediction function (modified to return job suggestions)
def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
team_player, management_technical, smart_hardworker):
# Load the selected model
rfmodel = load_model(model_choice)
# Create DataFrame
df = pd.DataFrame.from_dict(
{
"logical_thinking": [logical_thinking],
"hackathon_attend": [hackathon_attend],
"coding_skills": [coding_skills],
"public_speaking_skills": [public_speaking_skills],
"self_learning": [self_learning],
"extra_course": [extra_course],
"certificate": [certificate_code],
"workshop": [worskhop_code],
"read_writing_skills": [
(0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
],
"memory_capability": [
(0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)
],
"subject_interest": [subject_interest],
"career_interest": [career_interest],
"company_intend": [company_intend],
"senior_elder_advise": [senior_elder_advise],
"book_interest": [book_interest],
"introvert_extro": [introvert_extro],
"team_player": [team_player],
"management_technical":[management_technical],
"smart_hardworker": [smart_hardworker]
}
)
# Replace string values with numeric representations
df = df.replace({
"certificate": certificates_references,
"workshop": workshop_references,
"subject_interest": subjects_interest_references,
"career_interest": career_interest_references,
"company_intend": company_intends_references,
"book_interest": book_interest_references
})
# Dummy encoding
userdata_list = df.values.tolist()
# Management-Technical dummy encoding
if(df["management_technical"].values == "Management"):
userdata_list[0].extend([1])
userdata_list[0].extend([0])
userdata_list[0].remove('Management')
elif(df["management_technical"].values == "Technical"):
userdata_list[0].extend([0])
userdata_list[0].extend([1])
userdata_list[0].remove('Technical')
else:
return "Error in Management-Technical encoding"
# Smart-Hard worker dummy encoding
if(df["smart_hardworker"].values == "smart worker"):
userdata_list[0].extend([1])
userdata_list[0].extend([0])
userdata_list[0].remove('smart worker')
elif(df["smart_hardworker"].values == "hard worker"):
userdata_list[0].extend([0])
userdata_list[0].extend([1])
userdata_list[0].remove('hard worker')
else:
return "Error in Smart-Hard worker encoding"
# Prediction
prediction_result_all = rfmodel.predict_proba(userdata_list)
# Create result dictionary with probabilities
result_list = {
"Applications Developer": float(prediction_result_all[0][0]),
"CRM Technical Developer": float(prediction_result_all[0][1]),
"Database Developer": float(prediction_result_all[0][2]),
"Mobile Applications Developer": float(prediction_result_all[0][3]),
"Network Security Engineer": float(prediction_result_all[0][4]),
"Software Developer": float(prediction_result_all[0][5]),
"Software Engineer": float(prediction_result_all[0][6]),
"Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
"Systems Security Administrator": float(prediction_result_all[0][8]),
"Technical Support": float(prediction_result_all[0][9]),
"UX Designer": float(prediction_result_all[0][10]),
"Web Developer": float(prediction_result_all[0][11]),
}
# Find the top predicted career
top_career = max(result_list, key=result_list.get)
# Fetch job listings for the top predicted career
job_suggestions = fetch_job_listings(top_career)
return result_list, job_suggestions
# Lists for dropdown menus
cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
skill = ["excellent", "medium", "poor"]
subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
Choice_list = ["Management", "Technical"]
worker_list = ["hard worker", "smart worker"]
# Create Gradio interface
def create_output_component():
return [
gr.Label(label="Career Probabilities"),
gr.Dataframe(
headers=["Job Title", "Company", "Location", "Salary"],
label="Job Suggestions"
)
]
demo = gr.Interface(
fn=rfprediction,
inputs=[
gr.Dropdown(["Random Forest", "Decision Tree"], label="Select Machine Learning Model"),
gr.Textbox(placeholder="What is your name?", label="Name"),
gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
gr.Slider(minimum=1, maximum=9, value=5, step=1, label="How do you rate your coding skills?", info="Scale: 1 - 9"),
gr.Slider(minimum=1, maximum=9, value=3, step=1, label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
gr.Radio({"Yes", "No"}, type="index", label="Are you a self-learning person? *"),
gr.Radio({"Yes", "No"}, type="index", label="Do you take extra courses in uni (other than IT)? *"),
gr.Dropdown(cert_list, label="Select a certificate you took!"),
gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
gr.Dropdown(skill, label="Select your read and writing skill"),
gr.Dropdown(skill, label="Is your memory capability good?"),
gr.Dropdown(subject_list, label="What subject you are interested in?"),
gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
gr.Radio({"Yes", "No"}, type="index", label="Do you ever seek any advices from senior or elders? *"),
gr.Dropdown(book_list, label="Select your interested genre of book!"),
gr.Radio({"Yes", "No"}, type="index", label="Are you an Introvert?| No - extrovert *"),
gr.Radio({"Yes", "No"}, type="index", label="Ever worked in a team? *"),
gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
],
outputs=create_output_component(),
title="AI-Enhanced Career guidance System",
)
# Main execution
if __name__ == "__main__":
demo.launch(share=True)