Spaces:

Sasiraj01
/

RagSkincancersass

Running

App Files Files Community

RagSkincancersass / app-2.py

Sasiraj01

Upload app-2.py

7c5b479 verified 7 months ago

raw

history blame contribute delete

1.97 kB


	import gradio as gr
	from transformers import AutoProcessor, LlavaForConditionalGeneration
	from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.vector_stores.faiss import FaissVectorStore
	from llama_index.storage.storage_context import StorageContext
	import torch
	from PIL import Image
	import os

	# Load LLaVA model and processor
	model_id = "llava-hf/llava-1.5-7b-hf"
	processor = AutoProcessor.from_pretrained(model_id)
	model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)
	model.to("cuda" if torch.cuda.is_available() else "cpu")

	# Load documents and build FAISS index
	documents = SimpleDirectoryReader("docs").load_data()
	embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
	service_context = ServiceContext.from_defaults(embed_model=embed_model)
	set_global_service_context(service_context)

	index = VectorStoreIndex.from_documents(documents, service_context=service_context)
	query_engine = index.as_query_engine()

	def multimodal_rag(image, question):
	# Step 1: RAG to retrieve context
	context = query_engine.query(question)

	# Step 2: Process with LLaVA
	prompt = f"Context: {context}

	Question: {question}"
	inputs = processor(prompt, image, return_tensors="pt").to(model.device)
	output = model.generate(**inputs, max_new_tokens=100)
	answer = processor.decode(output[0], skip_special_tokens=True)
	return answer

	demo = gr.Interface(
	fn=multimodal_rag,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Enter your question")
	],
	outputs="text",
	title="Multimodal RAG with LLaVA and FAISS",
	description="Upload an image and ask a question. The system retrieves relevant text using FAISS and answers using LLaVA."
	)

	if __name__ == "__main__":
	demo.launch()