devaldicaliesta commited on
Commit
e32cea3
·
1 Parent(s): 150fd2f

first commit

Browse files
Files changed (2) hide show
  1. app.py +62 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
5
+ from transformers import pipeline
6
+ import torch
7
+ import base64
8
+ import time
9
+ from PIL import Image
10
+
11
+ # Load Hugging Face banner image
12
+ banner_image = Image.open("https://huggingface.co/spaces/wiwaaw/summary/resolve/main/banner.png")
13
+ st.image(banner_image, caption="Hugging Face LaMDA Mini Summary")
14
+
15
+ # Model and tokenizer
16
+ model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
17
+ model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
18
+ model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
19
+
20
+ # File loader and preprocessing
21
+ def preprocess_pdf(file):
22
+ loader = PyPDFLoader(file)
23
+ pages = loader.load_and_split()
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
25
+ texts = text_splitter.split_documents(pages)
26
+ final_text = ""
27
+ for text in texts:
28
+ final_text = final_text + text.page_content
29
+ return final_text
30
+
31
+ @st.cache_data
32
+ def language_model_pipeline(filepath):
33
+ summarization_pipeline = pipeline(
34
+ 'summarization',
35
+ model=model,
36
+ tokenizer=model_tokenizer,
37
+ max_length=500,
38
+ min_length=32
39
+ )
40
+ input_text = preprocess_pdf(filepath)
41
+ summary_result = summarization_pipeline(input_text)
42
+ summarized_text = summary_result[0]['summary_text']
43
+ return summarized_text
44
+
45
+ # User interface
46
+ title = st.title("PDF Summarization using LaMini")
47
+ uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
48
+
49
+ if uploaded_file is not None:
50
+ st.success("File uploaded")
51
+
52
+ if st.button("Summarize"):
53
+ with st.spinner("Summarizing..."):
54
+ time.sleep(10)
55
+
56
+ filepath = uploaded_file.name
57
+ with open(filepath, "wb") as temp_file:
58
+ temp_file.write(uploaded_file.read())
59
+
60
+ summarized_result = language_model_pipeline(filepath)
61
+ st.success("Summary:")
62
+ st.write(summarized_result)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ sentence_transformers
3
+ torch
4
+ sentencepiece
5
+ transformers
6
+ accelerate
7
+ chromadb
8
+ pypdf
9
+ tiktoken
10
+ streamlit
11
+ fastapi
12
+ uvicorn
13
+ python-multipart
14
+ aiofiles