whisper-jax-yt-summary

Build error

App Files Files Community

ohmygod0193 commited on Feb 16, 2024

Commit

eafcc47

verified ·

1 Parent(s): 541dd53

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -81

app.py CHANGED Viewed

@@ -60,89 +60,18 @@ def transcribe_youtube_video(url, force_transcribe=False,use_api=False,api_token
 def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_sample=False):
-    from langchain.chains.llm import LLMChain
     from langchain.prompts import PromptTemplate
-    from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
-    from langchain.chains.combine_documents.stuff import StuffDocumentsChain
-    import torch
-    import transformers
-    from transformers import BitsAndBytesConfig
-    from transformers import AutoTokenizer, AutoModelForCausalLM
-    from langchain import HuggingFacePipeline
-    import torch
-    model_kwargs1 = {"temperature":temperature ,
-                    "do_sample":do_sample,
-                    "min_new_tokens":300-25,
-                    "max_new_tokens":300+25,
-                    'repetition_penalty':20.0
-                    }
-    model_kwargs2 = {"temperature":temperature ,
-                    "do_sample":do_sample,
-                    "min_new_tokens":words,
-                    "max_new_tokens":words+100,
-                    'repetition_penalty':20.0
-                    }
-    if not do_sample:
-        del model_kwargs1["temperature"]
-        del model_kwargs2["temperature"]
-    if use_api:
-        from langchain import HuggingFaceHub
-        # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
-        llm=HuggingFaceHub(
-            repo_id=llm_model_id, model_kwargs=model_kwargs1,
-            huggingfacehub_api_token=api_token
-            )
-        llm2=HuggingFaceHub(
-            repo_id=llm_model_id, model_kwargs=model_kwargs2,
-            huggingfacehub_api_token=api_token
-            )
-        summary_source = 'The summary was generated using {} via Hugging Face API.'.format(llm_model_id)
-    else:
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_use_double_quant=True,
-            )
-        tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
-        model = AutoModelForCausalLM.from_pretrained(llm_model_id,
-                                                    # quantization_config=quantization_config
-                                                    )
-        model.to_bettertransformer()
-        pipeline = transformers.pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            pad_token_id=tokenizer.eos_token_id,
-            **model_kwargs1,
-        )
-        pipeline2 = transformers.pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            pad_token_id=tokenizer.eos_token_id,
-            **model_kwargs2,
-        )
-        llm = HuggingFacePipeline(pipeline=pipeline)
-        llm2 = HuggingFacePipeline(pipeline=pipeline2)
-        summary_source = 'The summary was generated using {} hosted locally.'.format(llm_model_id)
     # Map templates
-    map_template = """
     As an AI tasked with summarizing a video, your objective is to distill the key insights without introducing new information. This prompt aims to provide a concise summary.\n
     ----------------------- \n
     TITLE: `{title}`\n
@@ -200,7 +129,7 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
         template = combine_template,
         input_variables = ['title','doc_summaries','words']
         )
-    combine_chain = LLMChain(llm=llm2, prompt=combine_prompt)
     # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
     combine_documents_chain = StuffDocumentsChain(
@@ -242,7 +171,7 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
     summary = map_reduce_chain.run({'input_documents':docs, 'title':title, 'words':words})
     try:
-        del(map_reduce_chain,reduce_documents_chain,combine_chain,collapse_documents_chain,map_chain,collapse_chain,llm,llm2,pipeline,pipeline2,model,tokenizer)
     except:
         pass
     torch.cuda.empty_cache()

 def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_sample=False):
+    from langchain_google_genai import ChatGoogleGenerativeAI
     from langchain.prompts import PromptTemplate
+    from langchain.chains import LLMChain
+    GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
+    genai.configure()
+    llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
+    llm_model_id = 'Gemini-Pro'
+    summary_source = 'The summary was generated using {} via Hugging Face API.'.format(llm_model_id)
     # Map templates
+    prompt_template = """
     As an AI tasked with summarizing a video, your objective is to distill the key insights without introducing new information. This prompt aims to provide a concise summary.\n
     ----------------------- \n
     TITLE: `{title}`\n
         template = combine_template,
         input_variables = ['title','doc_summaries','words']
         )
+    combine_chain = LLMChain(llm=llm, prompt=combine_prompt)
     # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
     combine_documents_chain = StuffDocumentsChain(
     summary = map_reduce_chain.run({'input_documents':docs, 'title':title, 'words':words})
     try:
+        del(map_reduce_chain,reduce_documents_chain,combine_chain,collapse_documents_chain,map_chain,collapse_chain,llm)
     except:
         pass
     torch.cuda.empty_cache()