Spaces:

asasasaasasa
/

translate_tl

Sleeping

translate_tl / utils /file_readers.py

init

da8d2e4 4 months ago

875 Bytes

	# utils/file_readers.py

	import docx
	import PyPDF2

	def read_txt(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	return f.read()

	def read_docx(file_path):
	doc = docx.Document(file_path)
	full_text = []
	for para in doc.paragraphs:
	full_text.append(para.text)
	return '\n'.join(full_text)

	def read_pdf(file_path):
	text = ''
	with open(file_path, 'rb') as f:
	reader = PyPDF2.PdfReader(f)
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text

	def read_file(file_path):
	if file_path.endswith('.txt'):
	return read_txt(file_path)
	elif file_path.endswith('.docx'):
	return read_docx(file_path)
	elif file_path.endswith('.pdf'):
	return read_pdf(file_path)
	else:
	return ""