Spaces:

brickfrog
/

ankigen

Running

App Files Files Community

brickfrog commited on about 7 hours ago

Commit

d6f5eba

verified ·

1 Parent(s): 993126d

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +626 -335
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -20,6 +20,8 @@ import json
 import tempfile
 from pathlib import Path
 import pandas as pd
 class Step(BaseModel):
@@ -194,6 +196,76 @@ def structured_output_completion(
         raise
 def generate_cards_batch(
     client, model, topic, num_cards, system_prompt, generate_cloze=False, batch_size=3
 ):
@@ -319,30 +391,29 @@ GENERATION_MODES = [
 def generate_cards(
     api_key_input,
     subject,
-    model_name="gpt-4.1-mini",
     topic_number=1,
     cards_per_topic=2,
     preference_prompt="assume I'm a beginner",
     generate_cloze=False,
 ):
-    logger.info(f"Starting card generation for subject: {subject}")
     logger.debug(
-        f"Parameters: topics={topic_number}, cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
     )
-    # Input validation
     if not api_key_input:
         logger.warning("No API key provided")
         raise gr.Error("OpenAI API key is required")
     if not api_key_input.startswith("sk-"):
         logger.warning("Invalid API key format")
         raise gr.Error("Invalid API key format. OpenAI keys should start with 'sk-'")
-    if not subject.strip():
-        logger.warning("No subject provided")
-        raise gr.Error("Subject is required")
-    gr.Info("🚀 Starting card generation...")
     try:
         logger.debug("Initializing OpenAI client")
         client = OpenAI(api_key=api_key_input)
@@ -353,101 +424,268 @@ def generate_cards(
     model = model_name
     flattened_data = []
     total = 0
     progress_tracker = gr.Progress(track_tqdm=True)
-    system_prompt = f"""
-    You are an expert educator in {subject}, creating an optimized learning sequence.
-    Your goal is to:
-    1. Break down the subject into logical concepts
-    2. Identify prerequisites and learning outcomes
-    3. Generate cards that build upon each other
-    4. Address and correct common misconceptions
-    5. Include verification steps to minimize hallucinations
-    6. Provide a recommended study order
-    For explanations and examples:
-    - Keep explanations in plain text
-    - Format code examples with triple backticks (```)
-    - Separate conceptual examples from code examples
-    - Use clear, concise language
-    Keep in mind the user's preferences: {preference_prompt}
-    """
-    topic_prompt = f"""
-    Generate the top {topic_number} important subjects to know about {subject} in
-    order of ascending difficulty. Return your response as a JSON object with the following structure:
-    {{
-        "topics": [
-            {{
-                "name": "topic name",
-                "difficulty": "beginner/intermediate/advanced",
-                "description": "brief description"
-            }}
-        ]
-    }}
-    """
     try:
-        logger.info("Generating topics...")
-        topics_response = structured_output_completion(
-            client, model, {"type": "json_object"}, system_prompt, topic_prompt
-        )
-        if not topics_response or "topics" not in topics_response:
-            logger.error("Invalid topics response format")
-            raise gr.Error("Failed to generate topics. Please try again.")
-        topics = topics_response["topics"]
-        gr.Info(f"✨ Generated {len(topics)} topics successfully!")
-        # Generate cards for each topic
-        for i, topic in enumerate(
-            progress_tracker.tqdm(topics, desc="Generating cards")
-        ):
             try:
-                cards = generate_cards_batch(
-                    client,
-                    model,
-                    topic["name"],
-                    cards_per_topic,
-                    system_prompt,
-                    generate_cloze=generate_cloze,
-                    batch_size=3,
                 )
-                if cards:
-                    for card_index, card in enumerate(cards, start=1):
-                        index = f"{i + 1}.{card_index}"
-                        metadata = card.metadata or {}
-                        row = [
-                            index,
-                            topic["name"],
-                            card.card_type,
-                            card.front.question,
-                            card.back.answer,
-                            card.back.explanation,
-                            card.back.example,
-                            metadata.get("prerequisites", []),
-                            metadata.get("learning_outcomes", []),
-                            metadata.get("misconceptions", []),
-                            metadata.get("difficulty", "beginner"),
-                        ]
-                        flattened_data.append(row)
-                        total += 1
-                    gr.Info(f"✅ Generated {len(cards)} cards for {topic['name']}")
-            except Exception as e:
                 logger.error(
-                    f"Failed to generate cards for topic {topic['name']}: {str(e)}"
                 )
-                gr.Warning(f"Failed to generate cards for '{topic['name']}'")
-                continue
         final_html = f"""
         <div style="text-align: center">
             <p>✅ Generation complete!</p>
@@ -455,7 +693,6 @@ def generate_cards(
         </div>
         """
-        # Convert to DataFrame with all columns
         df = pd.DataFrame(
             flattened_data,
             columns=[
@@ -472,12 +709,15 @@ def generate_cards(
                 "Difficulty",
             ],
         )
         return df, final_html, total
     except Exception as e:
         logger.error(f"Card generation failed: {str(e)}", exc_info=True)
-        raise gr.Error(f"Card generation failed: {str(e)}")
 # Update the BASIC_MODEL definition with enhanced CSS/HTML
@@ -1059,7 +1299,7 @@ with gr.Blocks(
     css="""
         #footer {display:none !important}
         .tall-dataframe {min-height: 500px !important}
-        .contain {max-width: 95% !important; margin: auto;}
         .output-cards {border-radius: 8px; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);}
         .hint-text {font-size: 0.9em; color: #666; margin-top: 4px;}
         .export-group > .gradio-group { margin-bottom: 0 !important; padding-bottom: 5px !important; }
@@ -1072,146 +1312,175 @@ with gr.Blocks(
         #### Generate comprehensive Anki flashcards using AI.
         """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### Configuration")
-                # Add mode selection
-                generation_mode = gr.Radio(
-                    choices=["subject", "path"],
-                    value="subject",
-                    label="Generation Mode",
-                    info="Choose how you want to generate content",
-                )
-                # Create containers for different modes
-                with gr.Group() as subject_mode:
-                    subject = gr.Textbox(
-                        label="Subject",
-                        placeholder="Enter the subject, e.g., 'Basic SQL Concepts'",
-                        info="The topic you want to generate flashcards for",
-                    )
-                with gr.Group(visible=False) as path_mode:
-                    description = gr.Textbox(
-                        label="Learning Goal",
-                        placeholder="Paste a job description or describe what you want to learn...",
-                        info="We'll break this down into learnable subjects",
-                        lines=5,
-                    )
-                    analyze_button = gr.Button(
-                        "Analyze & Break Down", variant="secondary"
-                    )
-                # Common settings
-                api_key_input = gr.Textbox(
-                    label="OpenAI API Key",
-                    type="password",
-                    placeholder="Enter your OpenAI API key",
-                    value=os.getenv("OPENAI_API_KEY", ""),
-                    info="Your OpenAI API key starting with 'sk-'",
-                )
-                # Generation Button
-                generate_button = gr.Button("Generate Cards", variant="primary")
-                # Advanced Settings in Accordion
-                with gr.Accordion("Advanced Settings", open=False):
-                    model_choice = gr.Dropdown(
-                        choices=["gpt-4.1", "gpt-4.1-nano"],  # Corrected choices
-                        value="gpt-4.1-nano",  # Changed default to nano as it's faster/cheaper
-                        label="Model Selection",
-                        info="Select the AI model to use for generation",
-                    )
-                    # Add tooltip/description for models
-                    model_info = gr.Markdown("""
-                    **Model Information:**
-                    - **gpt-4.1**: Highest quality, slower generation
-                    - **gpt-4.1-nano**: Optimized for speed and lower cost
-                    """)  # Corrected descriptions
-                    topic_number = gr.Slider(
-                        label="Number of Topics",
-                        minimum=2,
-                        maximum=20,
-                        step=1,
-                        value=2,
-                        info="How many distinct topics to cover within the subject",
-                    )
-                    cards_per_topic = gr.Slider(
-                        label="Cards per Topic",
-                        minimum=2,
-                        maximum=30,
-                        step=1,
-                        value=3,
-                        info="How many flashcards to generate for each topic",
-                    )
-                    preference_prompt = gr.Textbox(
-                        label="Learning Preferences",
-                        placeholder="e.g., 'Assume I'm a beginner' or 'Focus on practical examples'",
-                        info="Customize how the content is presented",
-                        lines=3,
-                    )
-                    generate_cloze_checkbox = gr.Checkbox(
-                        label="Generate Cloze Cards (Experimental)",
-                        value=False,
-                        info="Allow the AI to generate fill-in-the-blank style cards where appropriate.",
                     )
-            # Right column - add a new container for learning path results
-            with gr.Column(scale=2):
-                with gr.Group(visible=False) as path_results:
-                    gr.Markdown("### Learning Path Analysis")
-                    subjects_list = gr.Dataframe(
-                        headers=["Subject", "Prerequisites", "Time Estimate"],
-                        label="Recommended Subjects",
-                        interactive=False,
                     )
-                    learning_order = gr.Markdown("### Recommended Learning Order")
-                    projects = gr.Markdown("### Suggested Projects")
-                    # Replace generate_selected with use_subjects
-                    use_subjects = gr.Button(
-                        "Use These Subjects ℹ️",  # Added info emoji to button text
-                        variant="primary",
-                    )
-                    gr.Markdown(
-                        "*Click to copy subjects to main input for card generation*",
-                        elem_classes="hint-text",
-                    )
-                # Existing output components
-                with gr.Group() as cards_output:
-                    gr.Markdown("### Generated Cards")
-                    # Output Format Documentation
-                    with gr.Accordion("Output Format", open=False):
-                        gr.Markdown("""
-                        The generated cards include:
-                        * **Index**: Unique identifier for each card
-                        * **Topic**: The specific subtopic within your subject
-                        * **Card_Type**: Type of card (basic or cloze)
-                        * **Question**: Clear, focused question for the flashcard front
-                        * **Answer**: Concise core answer
-                        * **Explanation**: Detailed conceptual explanation
-                        * **Example**: Practical implementation or code example
-                        * **Prerequisites**: Required knowledge for this concept
-                        * **Learning Outcomes**: What you should understand after mastering this card
-                        * **Common Misconceptions**: Incorrect assumptions debunked with explanations
-                        * **Difficulty**: Concept complexity level for optimal study sequencing
-                        Export options:
-                        - **CSV**: Raw data for custom processing
-                        - **Anki Deck**: Ready-to-use deck with formatted cards and metadata
-                        """)
-                        # Add near the output format documentation
-                        with gr.Accordion("Example Card Format", open=False):
-                            gr.Code(
-                                label="Example Card",
-                                value="""
 {
     "front": {
         "question": "What is a PRIMARY KEY constraint in SQL?"
@@ -1231,64 +1500,56 @@ with gr.Blocks(
         "difficulty": "beginner"
     }
 }
-                                """,
-                                language="json",
-                            )
-                    # Dataframe Output
-                    output = gr.Dataframe(
-                        value=example_data,
-                        headers=[
-                            "Index",
-                            "Topic",
-                            "Card_Type",
-                            "Question",
-                            "Answer",
-                            "Explanation",
-                            "Example",
-                            "Prerequisites",
-                            "Learning_Outcomes",
-                            "Common_Misconceptions",
-                            "Difficulty",
-                        ],
-                        interactive=True,
-                        elem_classes="tall-dataframe",
-                        wrap=True,
-                        column_widths=[
-                            50,
-                            100,
-                            80,
-                            200,
-                            200,
-                            250,
-                            200,
-                            150,
-                            150,
-                            150,
-                            100,
-                        ],
                     )
-                    # Export Controls
-                    with gr.Group(elem_classes="export-group"):
-                        gr.Markdown("#### Export Generated Cards")
-                        with gr.Row():
-                            export_csv_button = gr.Button(
-                                "Export to CSV", variant="secondary"
-                            )
-                            export_anki_button = gr.Button(
-                                "Export to Anki Deck (.apkg)", variant="secondary"
-                            )
-                        # Re-wrap File components in an invisible Row
-                        with gr.Row(visible=False):
-                            download_csv = gr.File(
-                                label="Download CSV", interactive=False, visible=False
-                            )
-                            download_anki = gr.File(
-                                label="Download Anki Deck",
-                                interactive=False,
-                                visible=False,
-                            )
         # Add near the top of the Blocks
         with gr.Row():
@@ -1297,62 +1558,69 @@ with gr.Blocks(
                 label="Total Cards Generated", value=0, visible=False
             )
-        # Add JavaScript to handle mode switching
         def update_mode_visibility(mode):
-            """Update component visibility based on selected mode and clear values"""
             is_subject = mode == "subject"
             is_path = mode == "path"
-            # Clear values when switching modes
-            if is_path:
-                subject.value = ""  # Clear subject when switching to path mode
-            else:
-                description.value = (
-                    ""  # Clear description when switching to subject mode
-                )
             return {
                 subject_mode: gr.update(visible=is_subject),
                 path_mode: gr.update(visible=is_path),
                 path_results: gr.update(visible=is_path),
-                cards_output: gr.update(visible=not is_path),
-                subject: gr.update(value="") if is_path else gr.update(),
-                description: gr.update(value="") if not is_path else gr.update(),
-                output: gr.update(value=None),  # Clear previous output
                 progress: gr.update(value="", visible=False),
                 total_cards: gr.update(value=0, visible=False),
             }
-        # Update the mode switching handler to include all components that need clearing
         generation_mode.change(
             fn=update_mode_visibility,
             inputs=[generation_mode],
             outputs=[
                 subject_mode,
                 path_mode,
                 path_results,
                 cards_output,
                 subject,
                 description,
                 output,
                 progress,
                 total_cards,
             ],
         )
-        # Add handler for path analysis
         analyze_button.click(
             fn=analyze_learning_path,
             inputs=[api_key_input, description, model_choice],
             outputs=[subjects_list, learning_order, projects],
         )
-        # Add this function to handle copying subjects to main input
         def use_selected_subjects(subjects_df):
-            """Copy selected subjects to main input and switch to subject mode"""
             if subjects_df is None or subjects_df.empty:
                 gr.Warning("No subjects available to copy from Learning Path analysis.")
-                # Return updates for all relevant output components to avoid errors
                 return (
                     gr.update(),
                     gr.update(),
@@ -1363,51 +1631,74 @@ with gr.Blocks(
                     gr.update(),
                     gr.update(),
                     gr.update(),
                 )
             subjects = subjects_df["Subject"].tolist()
             combined_subject = ", ".join(subjects)
-            suggested_topics = min(
-                len(subjects) + 1, 20
-            )  # Suggest topics = num subjects + 1
-            # Return updates for relevant components
-            return (
-                "subject",  # Set mode to subject
-                gr.update(visible=True),  # Show subject_mode group
-                gr.update(visible=False),  # Hide path_mode group
-                gr.update(visible=False),  # Hide path_results group
-                gr.update(visible=True),  # Show cards_output group
-                combined_subject,  # Update subject textbox value
-                suggested_topics,  # Update topic_number slider value
-                # Update preference prompt
-                "Focus on connections between these subjects and their practical applications.",
-                example_data,  # Reset output to example data - THIS NOW WORKS
-            )
-        # Correct the outputs for the use_subjects click handler
         use_subjects.click(
             fn=use_selected_subjects,
-            inputs=[subjects_list],  # Only needs the dataframe
-            outputs=[  # Match the return tuple of the function
                 generation_mode,
-                subject_mode,  # Group visibility
-                path_mode,  # Group visibility
-                path_results,  # Group visibility
-                cards_output,  # Group visibility
-                subject,  # Component value
-                topic_number,  # Component value
-                preference_prompt,  # Component value
-                output,  # Component value
             ],
         )
-        # Simplified event handlers
         generate_button.click(
             fn=generate_cards,
             inputs=[
                 api_key_input,
                 subject,
                 model_choice,
                 topic_number,
                 cards_per_topic,

 import tempfile
 from pathlib import Path
 import pandas as pd
+import requests
+from bs4 import BeautifulSoup
 class Step(BaseModel):
         raise
+def fetch_webpage_text(url: str) -> str:
+    """Fetches and extracts main text content from a URL."""
+    try:
+        logger.info(f"Fetching content from URL: {url}")
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers, timeout=15)  # Added timeout
+        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
+        logger.debug(f"Parsing HTML content for {url}")
+        # Use lxml for speed if available, fallback to html.parser
+        try:
+            soup = BeautifulSoup(response.text, "lxml")
+        except ImportError:
+            logger.warning("lxml not found, using html.parser instead.")
+            soup = BeautifulSoup(response.text, "html.parser")
+        # Remove script and style elements
+        for script_or_style in soup(["script", "style"]):
+            script_or_style.extract()
+        # Attempt to find main content tags
+        main_content = soup.find("main")
+        if not main_content:
+            main_content = soup.find("article")
+        # If specific tags found, use their text, otherwise fallback to body
+        if main_content:
+            text = main_content.get_text()
+            logger.debug(f"Extracted text from <{main_content.name}> tag.")
+        else:
+            body = soup.find("body")
+            if body:
+                text = body.get_text()
+                logger.debug("Extracted text from <body> tag (fallback).")
+            else:
+                text = ""  # No body tag found?
+                logger.warning(f"Could not find <body> tag in {url}")
+        # Break into lines and remove leading/trailing space on each
+        lines = (line.strip() for line in text.splitlines())
+        # Break multi-headlines into a line each
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        # Drop blank lines
+        text = "\n".join(chunk for chunk in chunks if chunk)
+        if not text:
+            logger.warning(f"Could not extract meaningful text from {url}")
+            raise ValueError("Could not extract text content from the URL.")
+        logger.info(
+            f"Successfully extracted text from {url} (Length: {len(text)} chars)"
+        )
+        return text
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Network error fetching URL {url}: {e}")
+        raise ConnectionError(f"Could not fetch URL: {e}")
+    except Exception as e:
+        logger.error(f"Error processing URL {url}: {e}", exc_info=True)
+        # Re-raise specific internal errors or a general one
+        if isinstance(e, (ValueError, ConnectionError)):
+            raise e
+        else:
+            raise RuntimeError(
+                f"An unexpected error occurred while processing the URL: {e}"
+            )
 def generate_cards_batch(
     client, model, topic, num_cards, system_prompt, generate_cloze=False, batch_size=3
 ):
 def generate_cards(
     api_key_input,
     subject,
+    generation_mode,
+    source_text,
+    url_input,
+    model_name="gpt-4.1-nano",
     topic_number=1,
     cards_per_topic=2,
     preference_prompt="assume I'm a beginner",
     generate_cloze=False,
 ):
+    logger.info(f"Starting card generation in {generation_mode} mode")
     logger.debug(
+        f"Parameters: mode={generation_mode}, topics={topic_number}, cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
     )
+    # --- Common Setup ---
     if not api_key_input:
         logger.warning("No API key provided")
         raise gr.Error("OpenAI API key is required")
     if not api_key_input.startswith("sk-"):
         logger.warning("Invalid API key format")
         raise gr.Error("Invalid API key format. OpenAI keys should start with 'sk-'")
+    # Moved client initialization up
     try:
         logger.debug("Initializing OpenAI client")
         client = OpenAI(api_key=api_key_input)
     model = model_name
     flattened_data = []
     total = 0
     progress_tracker = gr.Progress(track_tqdm=True)
+    # ---------------------
     try:
+        page_text_for_generation = ""  # Initialize variable to hold text for AI
+        # --- Web Mode --- (Fetch text first)
+        if generation_mode == "web":
+            logger.info("Generation mode: Web")
+            if not url_input or not url_input.strip():
+                logger.warning("No URL provided for web generation mode.")
+                raise gr.Error("URL is required for 'From Web' mode.")
+            gr.Info(f"🕸️ Fetching content from {url_input}...")
             try:
+                page_text_for_generation = fetch_webpage_text(url_input)
+                gr.Info(
+                    f"✅ Successfully fetched text (approx. {len(page_text_for_generation)} chars). Starting AI generation..."
                 )
+            except (ConnectionError, ValueError, RuntimeError) as e:
+                logger.error(f"Failed to fetch or process URL {url_input}: {e}")
+                raise gr.Error(
+                    f"Failed to get content from URL: {e}"
+                )  # Display fetch error to user
+            except Exception as e:  # Catch any other unexpected errors during fetch
                 logger.error(
+                    f"Unexpected error fetching URL {url_input}: {e}", exc_info=True
                 )
+                raise gr.Error(f"An unexpected error occurred fetching the URL.")
+        # --- Text Mode --- (Use provided text)
+        elif generation_mode == "text":
+            logger.info("Generation mode: Text Input")
+            if not source_text or not source_text.strip():
+                logger.warning("No source text provided for text generation mode.")
+                raise gr.Error("Source text is required for 'From Text' mode.")
+            page_text_for_generation = source_text  # Use the input text directly
+            gr.Info("🚀 Starting card generation from text...")
+        # --- Generation from Text/Web Content ---
+        if generation_mode == "text" or generation_mode == "web":
+            # Shared logic for generating cards from fetched/provided text
+            text_system_prompt = f"""
+            You are an expert educator specializing in extracting key information and creating flashcards from provided text.
+            Your goal is to generate clear, concise, and accurate flashcards based *only* on the text given by the user.
+            Focus on the most important concepts, definitions, facts, or processes mentioned.
+            Generate {cards_per_topic} cards.
+            Adhere to the user's learning preferences: {preference_prompt}
+            Use the specified JSON output format.
+            For explanations and examples:
+            - Keep explanations in plain text
+            - Format code examples with triple backticks (```)
+            - Separate conceptual examples from code examples
+            - Use clear, concise language
+            """
+            json_structure_prompt = """
+            Return your response as a JSON object with the following structure:
+            {
+                "cards": [
+                    {
+                        "card_type": "basic or cloze",
+                        "front": {
+                            "question": "question text (potentially with {{c1::cloze syntax}})"
+                        },
+                        "back": {
+                            "answer": "concise answer or full text for cloze",
+                            "explanation": "detailed explanation",
+                            "example": "practical example"
+                        },
+                        "metadata": {
+                            "prerequisites": ["list", "of", "prerequisites"],
+                            "learning_outcomes": ["list", "of", "outcomes"],
+                            "misconceptions": ["list", "of", "misconceptions"],
+                            "difficulty": "beginner/intermediate/advanced"
+                        }
+                    }
+                    // ... more cards
+                ]
+            }
+            """
+            cloze_instruction = ""
+            if generate_cloze:
+                cloze_instruction = """
+                Where appropriate, generate Cloze deletion cards.
+                - For Cloze cards, set "card_type" to "cloze".
+                - Format the question field using Anki's cloze syntax (e.g., "The capital of France is {{{{c1::Paris}}}}.").
+                - The "answer" field should contain the full, non-cloze text or specific context for the cloze.
+                - For standard question/answer cards, set "card_type" to "basic".
+                """
+            text_user_prompt = f"""
+            Generate {cards_per_topic} flashcards based *only* on the following text:
+            --- TEXT START ---
+            {page_text_for_generation}
+            --- TEXT END ---
+            {cloze_instruction}
+            {json_structure_prompt}
+            """
+            response = structured_output_completion(
+                client,
+                model,
+                {"type": "json_object"},
+                text_system_prompt,
+                text_user_prompt,
+            )
+            if not response or "cards" not in response:
+                logger.error("Invalid cards response format from text generation.")
+                raise gr.Error("Failed to generate cards from text. Please try again.")
+            # Process the cards (similar to generate_cards_batch processing)
+            cards_data = response["cards"]
+            topic_name = "From Web" if generation_mode == "web" else "From Text"
+            for card_index, card_data in enumerate(cards_data, start=1):
+                if "front" not in card_data or "back" not in card_data:
+                    logger.warning(
+                        f"Skipping card due to missing front/back data: {card_data}"
+                    )
+                    continue
+                if "question" not in card_data["front"]:
+                    logger.warning(
+                        f"Skipping card due to missing question: {card_data}"
+                    )
+                    continue
+                if (
+                    "answer" not in card_data["back"]
+                    or "explanation" not in card_data["back"]
+                    or "example" not in card_data["back"]
+                ):
+                    logger.warning(
+                        f"Skipping card due to missing answer/explanation/example: {card_data}"
+                    )
+                    continue
+                card = Card(
+                    card_type=card_data.get("card_type", "basic"),
+                    front=CardFront(**card_data["front"]),
+                    back=CardBack(**card_data["back"]),
+                    metadata=card_data.get("metadata", {}),
+                )
+                metadata = card.metadata or {}
+                row = [
+                    f"1.{card_index}",
+                    topic_name,  # Use dynamic topic name
+                    card.card_type,
+                    card.front.question,
+                    card.back.answer,
+                    card.back.explanation,
+                    card.back.example,
+                    metadata.get("prerequisites", []),
+                    metadata.get("learning_outcomes", []),
+                    metadata.get("misconceptions", []),
+                    metadata.get("difficulty", "beginner"),
+                ]
+                flattened_data.append(row)
+                total += 1
+            gr.Info(f"✅ Generated {total} cards from the provided content.")
+        # --- Subject Mode --- (Existing logic)
+        elif generation_mode == "subject":
+            logger.info(f"Generating cards for subject: {subject}")
+            if not subject or not subject.strip():
+                logger.warning("No subject provided for subject generation mode.")
+                raise gr.Error("Subject is required for 'Single Subject' mode.")
+            gr.Info("🚀 Starting card generation for subject...")
+            # Note: system_prompt uses subject variable
+            system_prompt = f"""
+            You are an expert educator in {subject}, creating an optimized learning sequence.
+            Your goal is to:
+            1. Break down the subject into logical concepts
+            2. Identify prerequisites and learning outcomes
+            3. Generate cards that build upon each other
+            4. Address and correct common misconceptions
+            5. Include verification steps to minimize hallucinations
+            6. Provide a recommended study order
+            For explanations and examples:
+            - Keep explanations in plain text
+            - Format code examples with triple backticks (```)
+            - Separate conceptual examples from code examples
+            - Use clear, concise language
+            Keep in mind the user's preferences: {preference_prompt}
+            """
+            topic_prompt = f"""
+            Generate the top {topic_number} important subjects to know about {subject} in
+            order of ascending difficulty. Return your response as a JSON object with the following structure:
+            {{
+                "topics": [
+                    {{
+                        "name": "topic name",
+                        "difficulty": "beginner/intermediate/advanced",
+                        "description": "brief description"
+                    }}
+                ]
+            }}
+            """
+            logger.info("Generating topics...")
+            topics_response = structured_output_completion(
+                client, model, {"type": "json_object"}, system_prompt, topic_prompt
+            )
+            if not topics_response or "topics" not in topics_response:
+                logger.error("Invalid topics response format")
+                raise gr.Error("Failed to generate topics. Please try again.")
+            topics = topics_response["topics"]
+            gr.Info(f"✨ Generated {len(topics)} topics successfully!")
+            # Generate cards for each topic
+            for i, topic in enumerate(
+                progress_tracker.tqdm(topics, desc="Generating cards")
+            ):
+                try:
+                    # Re-use the system_prompt defined above for topic generation
+                    cards = generate_cards_batch(
+                        client,
+                        model,
+                        topic["name"],
+                        cards_per_topic,
+                        system_prompt,  # Use the same system prompt
+                        generate_cloze=generate_cloze,
+                        batch_size=3,
+                    )
+                    if cards:
+                        for card_index, card in enumerate(cards, start=1):
+                            index = f"{i + 1}.{card_index}"
+                            metadata = card.metadata or {}
+                            row = [
+                                index,
+                                topic["name"],
+                                card.card_type,
+                                card.front.question,
+                                card.back.answer,
+                                card.back.explanation,
+                                card.back.example,
+                                metadata.get("prerequisites", []),
+                                metadata.get("learning_outcomes", []),
+                                metadata.get("misconceptions", []),
+                                metadata.get("difficulty", "beginner"),
+                            ]
+                            flattened_data.append(row)
+                            total += 1
+                        gr.Info(f"✅ Generated {len(cards)} cards for {topic['name']}")
+                except Exception as e:
+                    logger.error(
+                        f"Failed to generate cards for topic {topic['name']}: {str(e)}"
+                    )
+                    gr.Warning(f"Failed to generate cards for '{topic['name']}'")
+                    continue
+        else:
+            # Handle other modes or invalid mode if necessary
+            logger.error(f"Invalid generation mode: {generation_mode}")
+            raise gr.Error(f"Unsupported generation mode: {generation_mode}")
+        # --- Common Completion Logic ---
         final_html = f"""
         <div style="text-align: center">
             <p>✅ Generation complete!</p>
         </div>
         """
         df = pd.DataFrame(
             flattened_data,
             columns=[
                 "Difficulty",
             ],
         )
         return df, final_html, total
     except Exception as e:
         logger.error(f"Card generation failed: {str(e)}", exc_info=True)
+        # Check if e is already a gr.Error
+        if isinstance(e, gr.Error):
+            raise e
+        else:
+            raise gr.Error(f"Card generation failed: {str(e)}")
 # Update the BASIC_MODEL definition with enhanced CSS/HTML
     css="""
         #footer {display:none !important}
         .tall-dataframe {min-height: 500px !important}
+        .contain {max-width: 100% !important; margin: auto;}
         .output-cards {border-radius: 8px; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);}
         .hint-text {font-size: 0.9em; color: #666; margin-top: 4px;}
         .export-group > .gradio-group { margin-bottom: 0 !important; padding-bottom: 5px !important; }
         #### Generate comprehensive Anki flashcards using AI.
         """)
+        # Configuration Section in an Accordion
+        with gr.Accordion("Configuration Settings", open=True):
+            # Create a row to hold two columns for settings
+            with gr.Row():
+                # Column 1: Basic settings
+                with gr.Column(scale=1):
+                    # Add mode selection
+                    generation_mode = gr.Radio(
+                        choices=[
+                            ("Single Subject", "subject"),
+                            ("Learning Path", "path"),
+                            ("From Text", "text"),
+                            ("From Web", "web"),
+                        ],
+                        value="subject",
+                        label="Generation Mode",
+                        info="Choose how you want to generate content",
                     )
+                    # Create containers for different modes
+                    with gr.Group() as subject_mode:
+                        subject = gr.Textbox(
+                            label="Subject",
+                            placeholder="Enter the subject, e.g., 'Basic SQL Concepts'",
+                            info="The topic you want to generate flashcards for",
+                        )
+                    with gr.Group(visible=False) as path_mode:
+                        description = gr.Textbox(
+                            label="Learning Goal",
+                            placeholder="Paste a job description or describe what you want to learn...",
+                            info="We'll break this down into learnable subjects",
+                            lines=5,
+                        )
+                        analyze_button = gr.Button(
+                            "Analyze & Break Down", variant="secondary"
+                        )
+                    # Add group for text input mode
+                    with gr.Group(visible=False) as text_mode:
+                        source_text = gr.Textbox(
+                            label="Source Text",
+                            placeholder="Paste the text you want to generate cards from here...",
+                            info="The AI will extract key information from this text to create cards.",
+                            lines=15,
+                        )
+                    # Add group for web input mode
+                    with gr.Group(visible=False) as web_mode:
+                        url_input = gr.Textbox(
+                            label="Web Page URL",
+                            placeholder="Paste the URL of the page you want to generate cards from...",
+                            info="The AI will attempt to extract content from this URL.",
+                        )
+                    # Common settings moved inside the accordion, in column 1
+                    api_key_input = gr.Textbox(
+                        label="OpenAI API Key",
+                        type="password",
+                        placeholder="Enter your OpenAI API key",
+                        value=os.getenv("OPENAI_API_KEY", ""),
+                        info="Your OpenAI API key starting with 'sk-'",
                     )
+                # Column 2: Advanced settings accordion
+                with gr.Column(scale=1):
+                    # Advanced Settings Accordion moved inside the main accordion, in column 2
+                    with gr.Accordion("Advanced Settings", open=False):
+                        model_choice = gr.Dropdown(
+                            choices=["gpt-4.1", "gpt-4.1-nano"],  # Corrected choices
+                            value="gpt-4.1-nano",  # Changed default to nano as it's faster/cheaper
+                            label="Model Selection",
+                            info="Select the AI model to use for generation",
+                        )
+                        # Add tooltip/description for models
+                        model_info = gr.Markdown(
+                            """
+                        **Model Information:**
+                        - **gpt-4.1**: Highest quality, slower generation
+                        - **gpt-4.1-nano**: Optimized for speed and lower cost
+                        """  # Corrected descriptions
+                        )
+                        topic_number = gr.Slider(
+                            label="Number of Topics",
+                            minimum=2,
+                            maximum=20,
+                            step=1,
+                            value=2,
+                            info="How many distinct topics to cover within the subject",
+                        )
+                        cards_per_topic = gr.Slider(
+                            label="Cards per Topic",
+                            minimum=2,
+                            maximum=30,
+                            step=1,
+                            value=3,
+                            info="How many flashcards to generate for each topic",
+                        )
+                        preference_prompt = gr.Textbox(
+                            label="Learning Preferences",
+                            placeholder="e.g., 'Assume I'm a beginner' or 'Focus on practical examples'",
+                            info="Customize how the content is presented",
+                            lines=3,
+                        )
+                        generate_cloze_checkbox = gr.Checkbox(
+                            label="Generate Cloze Cards (Experimental)",
+                            value=False,
+                            info="Allow the AI to generate fill-in-the-blank style cards where appropriate.",
+                        )
+                    # End of Advanced Settings Accordion
+            # End of Row containing settings columns
+        # End of Configuration Settings Accordion
+        # Generation Button moved outside the Accordion
+        generate_button = gr.Button("Generate Cards", variant="primary")
+        # Output Area remains below the button
+        with gr.Group(
+            visible=False
+        ) as path_results:  # Initial visibility controlled by mode
+            gr.Markdown("### Learning Path Analysis")
+            subjects_list = gr.Dataframe(
+                headers=["Subject", "Prerequisites", "Time Estimate"],
+                label="Recommended Subjects",
+                interactive=False,
+            )
+            learning_order = gr.Markdown("### Recommended Learning Order")
+            projects = gr.Markdown("### Suggested Projects")
+            use_subjects = gr.Button(
+                "Use These Subjects ℹ️",
+                variant="primary",
+            )
+            gr.Markdown(
+                "*Click to copy subjects to main input for card generation*",
+                elem_classes="hint-text",
+            )
+        with gr.Group() as cards_output:  # Initial visibility controlled by mode
+            gr.Markdown("### Generated Cards")
+            # Output Format Documentation (can stay here)
+            with gr.Accordion("Output Format", open=False):
+                gr.Markdown("""
+                The generated cards include:
+                * **Index**: Unique identifier for each card
+                * **Topic**: The specific subtopic within your subject
+                * **Card_Type**: Type of card (basic or cloze)
+                * **Question**: Clear, focused question for the flashcard front
+                * **Answer**: Concise core answer
+                * **Explanation**: Detailed conceptual explanation
+                * **Example**: Practical implementation or code example
+                * **Prerequisites**: Required knowledge for this concept
+                * **Learning Outcomes**: What you should understand after mastering this card
+                * **Common Misconceptions**: Incorrect assumptions debunked with explanations
+                * **Difficulty**: Concept complexity level for optimal study sequencing
+                Export options:
+                - **CSV**: Raw data for custom processing
+                - **Anki Deck**: Ready-to-use deck with formatted cards and metadata
+                """)
+                with gr.Accordion("Example Card Format", open=False):
+                    gr.Code(
+                        label="Example Card",
+                        value="""
 {
     "front": {
         "question": "What is a PRIMARY KEY constraint in SQL?"
         "difficulty": "beginner"
     }
 }
+                        """,
+                        language="json",
                     )
+            output = gr.Dataframe(
+                value=example_data,
+                headers=[
+                    "Index",
+                    "Topic",
+                    "Card_Type",
+                    "Question",
+                    "Answer",
+                    "Explanation",
+                    "Example",
+                    "Prerequisites",
+                    "Learning_Outcomes",
+                    "Common_Misconceptions",
+                    "Difficulty",
+                ],
+                interactive=True,
+                elem_classes="tall-dataframe",
+                wrap=True,
+                column_widths=[
+                    50,
+                    100,
+                    80,
+                    200,
+                    200,
+                    250,
+                    200,
+                    150,
+                    150,
+                    150,
+                    100,
+                ],
+            )
+            with gr.Group(elem_classes="export-group"):
+                gr.Markdown("#### Export Generated Cards")
+                with gr.Row():
+                    export_csv_button = gr.Button("Export to CSV", variant="secondary")
+                    export_anki_button = gr.Button(
+                        "Export to Anki Deck (.apkg)", variant="secondary"
+                    )
+                with gr.Row():  # Row containing File components is now visible
+                    download_csv = gr.File(label="Download CSV", interactive=False)
+                    download_anki = gr.File(
+                        label="Download Anki Deck",
+                        interactive=False,
+                    )
         # Add near the top of the Blocks
         with gr.Row():
                 label="Total Cards Generated", value=0, visible=False
             )
+        # Adjust JavaScript handler for mode switching
         def update_mode_visibility(mode):
             is_subject = mode == "subject"
             is_path = mode == "path"
+            is_text = mode == "text"
+            is_web = mode == "web"
+            subject_val = subject.value if is_subject else ""
+            description_val = description.value if is_path else ""
+            text_val = source_text.value if is_text else ""
+            url_val = url_input.value if is_web else ""
             return {
                 subject_mode: gr.update(visible=is_subject),
                 path_mode: gr.update(visible=is_path),
+                text_mode: gr.update(visible=is_text),
+                web_mode: gr.update(visible=is_web),
                 path_results: gr.update(visible=is_path),
+                cards_output: gr.update(visible=is_subject or is_text or is_web),
+                subject: gr.update(value=subject_val),
+                description: gr.update(value=description_val),
+                source_text: gr.update(value=text_val),
+                url_input: gr.update(value=url_val),
+                output: gr.update(value=None),
+                subjects_list: gr.update(value=None),
+                learning_order: gr.update(value=""),
+                projects: gr.update(value=""),
                 progress: gr.update(value="", visible=False),
                 total_cards: gr.update(value=0, visible=False),
             }
         generation_mode.change(
             fn=update_mode_visibility,
             inputs=[generation_mode],
             outputs=[
                 subject_mode,
                 path_mode,
+                text_mode,
+                web_mode,
                 path_results,
                 cards_output,
                 subject,
                 description,
+                source_text,
+                url_input,
                 output,
+                subjects_list,
+                learning_order,
+                projects,
                 progress,
                 total_cards,
             ],
         )
         analyze_button.click(
             fn=analyze_learning_path,
             inputs=[api_key_input, description, model_choice],
             outputs=[subjects_list, learning_order, projects],
         )
         def use_selected_subjects(subjects_df):
             if subjects_df is None or subjects_df.empty:
                 gr.Warning("No subjects available to copy from Learning Path analysis.")
                 return (
                     gr.update(),
                     gr.update(),
                     gr.update(),
                     gr.update(),
                     gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
                 )
             subjects = subjects_df["Subject"].tolist()
             combined_subject = ", ".join(subjects)
+            suggested_topics = min(len(subjects) + 1, 20)
+            return {
+                generation_mode: "subject",
+                subject_mode: gr.update(visible=True),
+                path_mode: gr.update(visible=False),
+                text_mode: gr.update(visible=False),
+                web_mode: gr.update(visible=False),
+                path_results: gr.update(visible=False),
+                cards_output: gr.update(visible=True),
+                subject: combined_subject,
+                description: "",
+                source_text: "",
+                url_input: "",
+                topic_number: suggested_topics,
+                preference_prompt: "Focus on connections between these subjects and their practical applications.",
+                output: example_data,
+                subjects_list: subjects_df,
+                learning_order: gr.update(),
+                projects: gr.update(),
+                progress: gr.update(visible=False),
+                total_cards: gr.update(visible=False),
+            }
         use_subjects.click(
             fn=use_selected_subjects,
+            inputs=[subjects_list],
+            outputs=[
                 generation_mode,
+                subject_mode,
+                path_mode,
+                text_mode,
+                web_mode,
+                path_results,
+                cards_output,
+                subject,
+                description,
+                source_text,
+                url_input,
+                topic_number,
+                preference_prompt,
+                output,
+                subjects_list,
+                learning_order,
+                projects,
+                progress,
+                total_cards,
             ],
         )
         generate_button.click(
             fn=generate_cards,
             inputs=[
                 api_key_input,
                 subject,
+                generation_mode,
+                source_text,
+                url_input,
                 model_choice,
                 topic_number,
                 cards_per_topic,

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 aiofiles==23.2.1
 annotated-types==0.7.0
 anyio==4.9.0
 cached-property==2.0.1
 certifi==2025.1.31
 charset-normalizer==3.4.1
@@ -22,6 +23,7 @@ huggingface-hub==0.30.2
 idna==3.10
 jinja2==3.1.6
 jiter==0.9.0
 markdown-it-py==3.0.0
 markupsafe==2.1.5
 mdurl==0.1.2

 aiofiles==23.2.1
 annotated-types==0.7.0
 anyio==4.9.0
+beautifulsoup4==4.12.3
 cached-property==2.0.1
 certifi==2025.1.31
 charset-normalizer==3.4.1
 idna==3.10
 jinja2==3.1.6
 jiter==0.9.0
+lxml==5.2.2
 markdown-it-py==3.0.0
 markupsafe==2.1.5
 mdurl==0.1.2