Spaces:
Sleeping
Sleeping
| import os | |
| from importlib.metadata import version | |
| from inspect import currentframe, getframeinfo | |
| from pathlib import Path | |
| from decouple import config | |
| from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP | |
| from theflow.settings.default import * # noqa | |
| cur_frame = currentframe() | |
| if cur_frame is None: | |
| raise ValueError("Cannot get the current frame.") | |
| this_file = getframeinfo(cur_frame).filename | |
| this_dir = Path("/data/kotaemon/ktem_app_data") | |
| #this_dir = Path(this_file).parent | |
| # change this if your app use a different name | |
| KH_PACKAGE_NAME = "kotaemon_app" | |
| KH_APP_VERSION = config("KH_APP_VERSION", None) | |
| if not KH_APP_VERSION: | |
| try: | |
| # Caution: This might produce the wrong version | |
| # https://stackoverflow.com/a/59533071 | |
| KH_APP_VERSION = version(KH_PACKAGE_NAME) | |
| except Exception: | |
| KH_APP_VERSION = "local" | |
| KH_GRADIO_SHARE = config("KH_GRADIO_SHARE", default=False, cast=bool) | |
| KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool) | |
| KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool) | |
| KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/") | |
| # App can be ran from anywhere and it's not trivial to decide where to store app data. | |
| # So let's use the same directory as the flowsetting.py file. | |
| KH_APP_DATA_DIR = this_dir / "ktem_app_data" | |
| KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists() | |
| KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| # User data directory | |
| KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data" | |
| KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| # markdown output directory | |
| KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir" | |
| KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # chunks output directory | |
| KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir" | |
| KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # zip output directory | |
| KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir" | |
| KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # zip input directory | |
| KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in" | |
| KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| # HF models can be big, let's store them in the app data directory so that it's easier | |
| # for users to manage their storage. | |
| # ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache | |
| os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface") | |
| os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface") | |
| # doc directory | |
| KH_DOC_DIR = this_dir / "docs" | |
| KH_MODE = "dev" | |
| KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool) | |
| KH_FEATURE_CHAT_SUGGESTION = config( | |
| "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool | |
| ) | |
| KH_FEATURE_USER_MANAGEMENT = config( | |
| "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool | |
| ) | |
| KH_USER_CAN_SEE_PUBLIC = None | |
| KH_FEATURE_USER_MANAGEMENT_ADMIN = str( | |
| config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin") | |
| ) | |
| KH_FEATURE_USER_MANAGEMENT_PASSWORD = str( | |
| config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin") | |
| ) | |
| KH_ENABLE_ALEMBIC = False | |
| KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}" | |
| KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files") | |
| KH_WEB_SEARCH_BACKEND = ( | |
| "kotaemon.indices.retrievers.tavily_web_search.WebSearch" | |
| # "kotaemon.indices.retrievers.jina_web_search.WebSearch" | |
| ) | |
| KH_DOCSTORE = { | |
| # "__type__": "kotaemon.storages.ElasticsearchDocumentStore", | |
| # "__type__": "kotaemon.storages.SimpleFileDocumentStore", | |
| "__type__": "kotaemon.storages.LanceDBDocumentStore", | |
| "path": str(KH_USER_DATA_DIR / "docstore"), | |
| } | |
| KH_VECTORSTORE = { | |
| # "__type__": "kotaemon.storages.LanceDBVectorStore", | |
| "__type__": "kotaemon.storages.ChromaVectorStore", | |
| # "__type__": "kotaemon.storages.MilvusVectorStore", | |
| # "__type__": "kotaemon.storages.QdrantVectorStore", | |
| "path": str(KH_USER_DATA_DIR / "vectorstore"), | |
| } | |
| KH_LLMS = {} | |
| KH_EMBEDDINGS = {} | |
| KH_RERANKINGS = {} | |
| # populate options from config | |
| if config("AZURE_OPENAI_API_KEY", default="") and config( | |
| "AZURE_OPENAI_ENDPOINT", default="" | |
| ): | |
| if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""): | |
| KH_LLMS["azure"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.AzureChatOpenAI", | |
| "temperature": 0, | |
| "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), | |
| "api_key": config("AZURE_OPENAI_API_KEY", default=""), | |
| "api_version": config("OPENAI_API_VERSION", default="") | |
| or "2024-02-15-preview", | |
| "azure_deployment": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""), | |
| "timeout": 20, | |
| }, | |
| "default": False, | |
| } | |
| if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""): | |
| KH_EMBEDDINGS["azure"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", | |
| "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), | |
| "api_key": config("AZURE_OPENAI_API_KEY", default=""), | |
| "api_version": config("OPENAI_API_VERSION", default="") | |
| or "2024-02-15-preview", | |
| "azure_deployment": config( | |
| "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default="" | |
| ), | |
| "timeout": 10, | |
| }, | |
| "default": False, | |
| } | |
| OPENAI_DEFAULT = "<YOUR_OPENAI_KEY>" | |
| OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT) | |
| GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key") | |
| IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT | |
| if OPENAI_API_KEY: | |
| KH_LLMS["openai"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.ChatOpenAI", | |
| "temperature": 0, | |
| "base_url": config("OPENAI_API_BASE", default="") | |
| or "https://api.openai.com/v1", | |
| "api_key": OPENAI_API_KEY, | |
| "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"), | |
| "timeout": 20, | |
| }, | |
| "default": IS_OPENAI_DEFAULT, | |
| } | |
| KH_EMBEDDINGS["openai"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.OpenAIEmbeddings", | |
| "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"), | |
| "api_key": OPENAI_API_KEY, | |
| "model": config( | |
| "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large" | |
| ), | |
| "timeout": 10, | |
| "context_length": 8191, | |
| }, | |
| "default": IS_OPENAI_DEFAULT, | |
| } | |
| if config("LOCAL_MODEL", default=""): | |
| KH_LLMS["ollama"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.ChatOpenAI", | |
| "base_url": KH_OLLAMA_URL, | |
| "model": config("LOCAL_MODEL", default="qwen2.5:7b"), | |
| "api_key": "ollama", | |
| }, | |
| "default": False, | |
| } | |
| KH_LLMS["ollama-long-context"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.LCOllamaChat", | |
| "base_url": KH_OLLAMA_URL.replace("v1/", ""), | |
| "model": config("LOCAL_MODEL", default="qwen2.5:7b"), | |
| "num_ctx": 8192, | |
| }, | |
| "default": False, | |
| } | |
| KH_EMBEDDINGS["ollama"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.OpenAIEmbeddings", | |
| "base_url": KH_OLLAMA_URL, | |
| "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"), | |
| "api_key": "ollama", | |
| }, | |
| "default": False, | |
| } | |
| KH_EMBEDDINGS["fast_embed"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.FastEmbedEmbeddings", | |
| "model_name": "BAAI/bge-base-en-v1.5", | |
| }, | |
| "default": False, | |
| } | |
| # additional LLM configurations | |
| KH_LLMS["claude"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.chats.LCAnthropicChat", | |
| "model_name": "claude-3-5-sonnet-20240620", | |
| "api_key": "your-key", | |
| }, | |
| "default": False, | |
| } | |
| KH_LLMS["google"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.chats.LCGeminiChat", | |
| "model_name": "gemini-1.5-flash", | |
| "api_key": GOOGLE_API_KEY, | |
| }, | |
| "default": not IS_OPENAI_DEFAULT, | |
| } | |
| KH_LLMS["groq"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.ChatOpenAI", | |
| "base_url": "https://api.groq.com/openai/v1", | |
| "model": "llama-3.1-8b-instant", | |
| "api_key": "your-key", | |
| }, | |
| "default": False, | |
| } | |
| KH_LLMS["cohere"] = { | |
| "spec": { | |
| "__type__": "kotaemon.llms.chats.LCCohereChat", | |
| "model_name": "command-r-plus-08-2024", | |
| "api_key": config("COHERE_API_KEY", default="your-key"), | |
| }, | |
| "default": False, | |
| } | |
| # additional embeddings configurations | |
| KH_EMBEDDINGS["cohere"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.LCCohereEmbeddings", | |
| "model": "embed-multilingual-v3.0", | |
| "cohere_api_key": config("COHERE_API_KEY", default="your-key"), | |
| "user_agent": "default", | |
| }, | |
| "default": False, | |
| } | |
| KH_EMBEDDINGS["google"] = { | |
| "spec": { | |
| "__type__": "kotaemon.embeddings.LCGoogleEmbeddings", | |
| "model": "models/text-embedding-004", | |
| "google_api_key": GOOGLE_API_KEY, | |
| }, | |
| "default": not IS_OPENAI_DEFAULT, | |
| } | |
| # KH_EMBEDDINGS["huggingface"] = { | |
| # "spec": { | |
| # "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings", | |
| # "model_name": "sentence-transformers/all-mpnet-base-v2", | |
| # }, | |
| # "default": False, | |
| # } | |
| # default reranking models | |
| KH_RERANKINGS["cohere"] = { | |
| "spec": { | |
| "__type__": "kotaemon.rerankings.CohereReranking", | |
| "model_name": "rerank-multilingual-v2.0", | |
| "cohere_api_key": config("COHERE_API_KEY", default=""), | |
| }, | |
| "default": True, | |
| } | |
| KH_REASONINGS = [ | |
| "ktem.reasoning.simple.FullQAPipeline", | |
| "ktem.reasoning.simple.FullDecomposeQAPipeline", | |
| "ktem.reasoning.react.ReactAgentPipeline", | |
| "ktem.reasoning.rewoo.RewooAgentPipeline", | |
| ] | |
| KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool) | |
| KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format( | |
| config("AZURE_OPENAI_ENDPOINT", default=""), | |
| config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"), | |
| config("OPENAI_API_VERSION", default=""), | |
| ) | |
| SETTINGS_APP: dict[str, dict] = {} | |
| SETTINGS_REASONING = { | |
| "use": { | |
| "name": "Reasoning options", | |
| "value": None, | |
| "choices": [], | |
| "component": "radio", | |
| }, | |
| "lang": { | |
| "name": "Language", | |
| "value": "en", | |
| "choices": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()], | |
| "component": "dropdown", | |
| }, | |
| "max_context_length": { | |
| "name": "Max context length (LLM)", | |
| "value": 32000, | |
| "component": "number", | |
| }, | |
| } | |
| USE_GLOBAL_GRAPHRAG = config("USE_GLOBAL_GRAPHRAG", default=True, cast=bool) | |
| USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool) | |
| USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool) | |
| USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=True, cast=bool) | |
| GRAPHRAG_INDEX_TYPES = [] | |
| if USE_MS_GRAPHRAG: | |
| GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex") | |
| if USE_NANO_GRAPHRAG: | |
| GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex") | |
| if USE_LIGHTRAG: | |
| GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex") | |
| KH_INDEX_TYPES = [ | |
| "ktem.index.file.FileIndex", | |
| *GRAPHRAG_INDEX_TYPES, | |
| ] | |
| GRAPHRAG_INDICES = [ | |
| { | |
| "name": graph_type.split(".")[-1].replace("Index", "") | |
| + " Collection", # get last name | |
| "config": { | |
| "supported_file_types": ( | |
| ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " | |
| ".pptx, .csv, .html, .mhtml, .txt, .md, .zip" | |
| ), | |
| "private": True, | |
| }, | |
| "index_type": graph_type, | |
| } | |
| for graph_type in GRAPHRAG_INDEX_TYPES | |
| ] | |
| KH_INDICES = [ | |
| { | |
| "name": "File Collection", | |
| "config": { | |
| "supported_file_types": ( | |
| ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " | |
| ".pptx, .csv, .html, .mhtml, .txt, .md, .zip" | |
| ), | |
| "private": True, | |
| }, | |
| "index_type": "ktem.index.file.FileIndex", | |
| }, | |
| *GRAPHRAG_INDICES, | |
| ] | |