usmanyousaf commited on
Commit
24d0209
Β·
verified Β·
1 Parent(s): b8a4c6a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
3
+ from parse import parse_with_groq
4
+
5
+ # Streamlit UI with sidebar
6
+ st.set_page_config(page_title="Web Scraping App 🧠", page_icon="🌐")
7
+
8
+ st.sidebar.title("πŸš€ Model Selection")
9
+ selected_model = st.sidebar.selectbox(
10
+ "Choose a Model for Parsing:",
11
+ [
12
+ "llama3-8b-8192",
13
+ "distil-whisper-large-v3-en",
14
+ "llama3-groq-70b-8192-tool-use-preview",
15
+ "llama-3.1-8b-instant",
16
+ "llava-v1.5-7b-4096-preview",
17
+ "mixtral-8x7b-32768",
18
+ ]
19
+ )
20
+
21
+ # Application title
22
+ st.title("AI Web Scraper App 🌐")
23
+ st.write("Easily scrape and analyze web content using advanced AI models. 🌟")
24
+
25
+ # Input for website URL
26
+ url = st.text_input("Enter Website URL πŸ”—")
27
+
28
+ # Step 1: Scrape the Website
29
+ if st.button("Scrape Website"):
30
+ if url:
31
+ st.write("πŸ•΅οΈβ€β™‚οΈ Scraping the website...")
32
+
33
+ # Scrape the website
34
+ dom_content = scrape_website(url)
35
+ body_content = extract_body_content(dom_content)
36
+ cleaned_content = clean_body_content(body_content)
37
+
38
+ # Store the DOM content in Streamlit session state
39
+ st.session_state.dom_content = cleaned_content
40
+
41
+ # Display the DOM content in an expandable text box
42
+ with st.expander("View DOM Content"):
43
+ st.text_area("DOM Content", cleaned_content, height=300)
44
+
45
+ # Step 2: Parse the Content
46
+ if "dom_content" in st.session_state:
47
+ parse_description = st.text_area("Describe what you want to parse πŸ“")
48
+
49
+ if st.button("Parse Content"):
50
+ if parse_description:
51
+ st.write(f"πŸ€– Parsing the content with {selected_model}...")
52
+
53
+ # Parse content using Groq
54
+ dom_chunks = split_dom_content(st.session_state.dom_content)
55
+ parsed_result = parse_with_groq(dom_chunks, parse_description, model=selected_model)
56
+ st.write(parsed_result)
57
+
58
+ # CSS for footer at the bottom of the sidebar
59
+ st.markdown(
60
+ """
61
+ <style>
62
+ .footer {
63
+ position: fixed;
64
+ bottom: 0;
65
+ left: 0;
66
+ width: 100%;
67
+ background-color: #272432; /* Dark background for visibility */
68
+ color: white;
69
+ text-align: center;
70
+ padding: 10px;
71
+ font-size: 14px;
72
+ }
73
+ .sidebar .footer {
74
+ position: fixed;
75
+ bottom: 0;
76
+ }
77
+ </style>
78
+
79
+ <div class="footer">
80
+ Made with ❀️ by Usman Yousaf πŸš€<br>
81
+ Feel free to improve and expand this app for more powerful insights! πŸ”₯
82
+ </div>
83
+ """,
84
+ unsafe_allow_html=True
85
+ )