Extract code fence from markdown

#1
by linux-china - opened

Anyone lucky to extract code from Markdown? I found it's hard to extract code from code fence in Markdown.


class CodeExtractor(BaseModel):
    """Extract language and script from Markdown."""
    language: str = Field(description="development language of the script")
    script: str = Field(description="The script from code fence")


def test_code_extract():
    reasoning_trace = """
This is generated code to run hello world program:

```java
public class HelloWorld {
    public static void main(String[] args) {
        System.out.println("Hello, World!");
    }
}
```
"""
    response = client.chat(
        messages=[
            {
                "role": "system",
                "content": f"You are a helpful assistant that understands and translates markdown to JSON format according to the following schema. {CodeExtractor.model_json_schema()}"
            },
            {
                'role': 'user',
                'content': reasoning_trace,
            }
        ],
        model='Osmosis/Osmosis-Structure-0.6B:latest',
        format=CodeExtractor.model_json_schema(),
    )

    script = CodeExtractor.model_validate_json(response.message.content)
    print(script)

Sign up or log in to comment