UmaKumpatla commited on
Commit
be5957e
Β·
verified Β·
1 Parent(s): 0feaf12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py CHANGED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import streamlit as st
4
+
5
+ from crawl4ai import AsyncWebCrawler
6
+ from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
7
+
8
+ from langchain_core.documents import Document
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+
11
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
12
+ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
13
+ from langchain_community.vectorstores import Chroma
14
+
15
+ # Load API keys from Space Secrets
16
+ os.environ["HUGGINGFACEHUB_API_KEY"] = st.secrets["hf"]
17
+ os.environ["HF_TOKEN"] = st.secrets["hf"]
18
+
19
+ async def run_pipeline(url: str, query: str):
20
+ # 1️⃣ Crawler setup
21
+ browser_config = BrowserConfig()
22
+ run_config = CrawlerRunConfig()
23
+
24
+ async with AsyncWebCrawler(config=browser_config) as crawler:
25
+ result = await crawler.arun(url=url, config=run_config)
26
+
27
+ # 2️⃣ LangChain doc + split
28
+ doc = Document(page_content=result.markdown.raw_markdown)
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
30
+ chunks = text_splitter.split_documents([doc])
31
+
32
+ # 3️⃣ Embedding + Chroma
33
+ emb = HuggingFaceEmbeddings(model="avsolatorio/GIST-small-Embedding-v0")
34
+ cb = Chroma(embedding_function=emb)
35
+
36
+ cb.add_documents(chunks)
37
+
38
+ # 4️⃣ Similarity search
39
+ docs = cb.similarity_search(query, k=3)
40
+
41
+ # 5️⃣ Llama3 via Nebius
42
+ llama_model = HuggingFaceEndpoint(
43
+ repo_id="meta-llama/Llama-3.1-8B-Instruct",
44
+ provider="nebius",
45
+ temperature=0.7,
46
+ max_new_tokens=300,
47
+ task="conversational"
48
+ )
49
+
50
+ llama = ChatHuggingFace(
51
+ llm=llama_model,
52
+ repo_id="meta-llama/Llama-3.1-8B-Instruct",
53
+ provider="nebius",
54
+ temperature=0.7,
55
+ max_new_tokens=300,
56
+ task="conversational"
57
+ )
58
+
59
+ response = llama.invoke(
60
+ f"Context: {docs[0].page_content}\n\nQuestion: {query}"
61
+ )
62
+ return response.content
63
+
64
+ # Streamlit UI
65
+ st.title("πŸŒπŸ” Ask Any Website with Llama3")
66
+ st.write("Enter a URL and your question β€” this app crawls the site and answers using Llama3!")
67
+
68
+ url = st.text_input("πŸ“Œ Website URL", placeholder="https://www.example.com")
69
+ query = st.text_input("πŸ’¬ Your Question", placeholder="What is this website about?")
70
+
71
+ if st.button("πŸ”Ž Crawl & Answer"):
72
+ if not url.strip() or not query.strip():
73
+ st.warning("❗ Please enter both a URL and a question.")
74
+ else:
75
+ with st.spinner("πŸ•ΈοΈ Crawling website and generating answer..."):
76
+ result = asyncio.run(run_pipeline(url, query))
77
+ st.success(f"βœ… **Answer:** {result}")