ohmygod0193 commited on
Commit
eafcc47
·
verified ·
1 Parent(s): 541dd53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -81
app.py CHANGED
@@ -60,89 +60,18 @@ def transcribe_youtube_video(url, force_transcribe=False,use_api=False,api_token
60
 
61
  def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_sample=False):
62
 
63
- from langchain.chains.llm import LLMChain
64
  from langchain.prompts import PromptTemplate
65
- from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
66
- from langchain.chains.combine_documents.stuff import StuffDocumentsChain
67
- import torch
68
- import transformers
69
- from transformers import BitsAndBytesConfig
70
- from transformers import AutoTokenizer, AutoModelForCausalLM
71
-
72
- from langchain import HuggingFacePipeline
73
- import torch
74
-
75
- model_kwargs1 = {"temperature":temperature ,
76
- "do_sample":do_sample,
77
- "min_new_tokens":300-25,
78
- "max_new_tokens":300+25,
79
- 'repetition_penalty':20.0
80
- }
81
- model_kwargs2 = {"temperature":temperature ,
82
- "do_sample":do_sample,
83
- "min_new_tokens":words,
84
- "max_new_tokens":words+100,
85
- 'repetition_penalty':20.0
86
- }
87
- if not do_sample:
88
- del model_kwargs1["temperature"]
89
- del model_kwargs2["temperature"]
90
-
91
- if use_api:
92
-
93
- from langchain import HuggingFaceHub
94
-
95
- # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
96
- llm=HuggingFaceHub(
97
- repo_id=llm_model_id, model_kwargs=model_kwargs1,
98
- huggingfacehub_api_token=api_token
99
- )
100
- llm2=HuggingFaceHub(
101
- repo_id=llm_model_id, model_kwargs=model_kwargs2,
102
- huggingfacehub_api_token=api_token
103
- )
104
- summary_source = 'The summary was generated using {} via Hugging Face API.'.format(llm_model_id)
105
-
106
- else:
107
- quantization_config = BitsAndBytesConfig(
108
- load_in_4bit=True,
109
- bnb_4bit_compute_dtype=torch.float16,
110
- bnb_4bit_quant_type="nf4",
111
- bnb_4bit_use_double_quant=True,
112
- )
113
-
114
- tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
115
- model = AutoModelForCausalLM.from_pretrained(llm_model_id,
116
- # quantization_config=quantization_config
117
- )
118
- model.to_bettertransformer()
119
-
120
- pipeline = transformers.pipeline(
121
- "text-generation",
122
- model=model,
123
- tokenizer=tokenizer,
124
- torch_dtype=torch.bfloat16,
125
- device_map="auto",
126
- pad_token_id=tokenizer.eos_token_id,
127
- **model_kwargs1,
128
- )
129
- pipeline2 = transformers.pipeline(
130
- "text-generation",
131
- model=model,
132
- tokenizer=tokenizer,
133
- torch_dtype=torch.bfloat16,
134
- device_map="auto",
135
- pad_token_id=tokenizer.eos_token_id,
136
- **model_kwargs2,
137
- )
138
- llm = HuggingFacePipeline(pipeline=pipeline)
139
- llm2 = HuggingFacePipeline(pipeline=pipeline2)
140
-
141
- summary_source = 'The summary was generated using {} hosted locally.'.format(llm_model_id)
142
 
 
 
 
143
 
144
  # Map templates
145
- map_template = """
146
  As an AI tasked with summarizing a video, your objective is to distill the key insights without introducing new information. This prompt aims to provide a concise summary.\n
147
  ----------------------- \n
148
  TITLE: `{title}`\n
@@ -200,7 +129,7 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
200
  template = combine_template,
201
  input_variables = ['title','doc_summaries','words']
202
  )
203
- combine_chain = LLMChain(llm=llm2, prompt=combine_prompt)
204
 
205
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
206
  combine_documents_chain = StuffDocumentsChain(
@@ -242,7 +171,7 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
242
  summary = map_reduce_chain.run({'input_documents':docs, 'title':title, 'words':words})
243
 
244
  try:
245
- del(map_reduce_chain,reduce_documents_chain,combine_chain,collapse_documents_chain,map_chain,collapse_chain,llm,llm2,pipeline,pipeline2,model,tokenizer)
246
  except:
247
  pass
248
  torch.cuda.empty_cache()
 
60
 
61
  def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_sample=False):
62
 
63
+ from langchain_google_genai import ChatGoogleGenerativeAI
64
  from langchain.prompts import PromptTemplate
65
+ from langchain.chains import LLMChain
66
+ GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
67
+ genai.configure()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
70
+ llm_model_id = 'Gemini-Pro'
71
+ summary_source = 'The summary was generated using {} via Hugging Face API.'.format(llm_model_id)
72
 
73
  # Map templates
74
+ prompt_template = """
75
  As an AI tasked with summarizing a video, your objective is to distill the key insights without introducing new information. This prompt aims to provide a concise summary.\n
76
  ----------------------- \n
77
  TITLE: `{title}`\n
 
129
  template = combine_template,
130
  input_variables = ['title','doc_summaries','words']
131
  )
132
+ combine_chain = LLMChain(llm=llm, prompt=combine_prompt)
133
 
134
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
135
  combine_documents_chain = StuffDocumentsChain(
 
171
  summary = map_reduce_chain.run({'input_documents':docs, 'title':title, 'words':words})
172
 
173
  try:
174
+ del(map_reduce_chain,reduce_documents_chain,combine_chain,collapse_documents_chain,map_chain,collapse_chain,llm)
175
  except:
176
  pass
177
  torch.cuda.empty_cache()