Update README.md to enhance model documentation and examples for zero-shot classification

Files changed (5) hide show

README.md +114 -39
UPLOAD_INSTRUCTIONS.md +112 -0
example_usage.py +95 -0
requirements.txt +4 -0
tokenizer_config.json +17 -1

README.md CHANGED Viewed

@@ -1,47 +1,90 @@
 ---
-language:
-- multilingual
-- en
-- fr
-- es
-- de
-- el
-- bg
-- ru
-- tr
-- ar
-- vi
-- th
-- zh
-- hi
-- sw
-- ur
 tags:
-- text-classification
-- pytorch
-- tensorflow
 datasets:
-- multi_nli
-- xnli
 license: mit
 pipeline_tag: zero-shot-classification
 widget:
-- text: "За кого вы голосуете в 2020 году?"
-  candidate_labels: "politique étrangère, Europe, élections, affaires, politique"
-  multi_class: true
-- text: "لمن تصوت في 2020؟"
-  candidate_labels: "السياسة الخارجية, أوروبا, الانتخابات, الأعمال, السياسة"
-  multi_class: true
-- text: "2020'de kime oy vereceksiniz?"
-  candidate_labels: "dış politika, Avrupa, seçimler, ticaret, siyaset"
-  multi_class: true
 ---
-# xlm-roberta-large-xnli
 ## Model Description
-This model takes [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) and fine-tunes it on a combination of NLI data in 15 languages. It is intended to be used for zero-shot text classification, such as with the Hugging Face [ZeroShotClassificationPipeline](https://huggingface.co/transformers/master/main_classes/pipelines.html#transformers.ZeroShotClassificationPipeline).
 ## Intended Usage
@@ -72,14 +115,14 @@ For English-only classification, it is recommended to use
 [bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli) or
 [a distilled bart MNLI model](https://huggingface.co/models?filter=pipeline_tag%3Azero-shot-classification&search=valhalla).
-#### With the zero-shot classification pipeline
 The model can be loaded with the `zero-shot-classification` pipeline like so:
 ```python
 from transformers import pipeline
 classifier = pipeline("zero-shot-classification",
-                      model="joeddav/xlm-roberta-large-xnli")
 ```
 You can then classify in any of the above languages. You can even pass the labels in one language and the sequence to
@@ -109,13 +152,13 @@ classifier(sequence_to_classify, candidate_labels, hypothesis_template=hypothesi
 #  'sequence': '¿A quién vas a votar en 2020?'}
 ```
-#### With manual PyTorch
 ```python
 # pose sequence as a NLI premise and label as a hypothesis
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
-nli_model = AutoModelForSequenceClassification.from_pretrained('joeddav/xlm-roberta-large-xnli')
-tokenizer = AutoTokenizer.from_pretrained('joeddav/xlm-roberta-large-xnli')
 premise = sequence
 hypothesis = f'This example is {label}.'
@@ -126,7 +169,7 @@ x = tokenizer.encode(premise, hypothesis, return_tensors='pt',
 logits = nli_model(x.to(device))[0]
 # we throw away "neutral" (dim 1) and take the probability of
-# "entailment" (2) as the probability of the label being true
 entail_contradiction_logits = logits[:,[0,2]]
 probs = entail_contradiction_logits.softmax(dim=1)
 prob_label_is_true = probs[:,1]
@@ -139,3 +182,35 @@ This model was pre-trained on set of 100 languages, as described in
 MNLI train set and the XNLI validation and test sets. Finally, it was trained for one additional epoch on only XNLI
 data where the translations for the premise and hypothesis are shuffled such that the premise and hypothesis for
 each example come from the same original English example but the premise and hypothesis are of different languages.

 ---
+language:
+  - multilingual
+  - en
+  - fr
+  - es
+  - de
+  - el
+  - bg
+  - ru
+  - tr
+  - ar
+  - vi
+  - th
+  - zh
+  - hi
+  - sw
+  - ur
 tags:
+  - text-classification
+  - pytorch
+  - tensorflow
+  - zero-shot-classification
+  - xlm-roberta
+  - multilingual
+  - nli
+  - natural-language-inference
 datasets:
+  - multi_nli
+  - xnli
 license: mit
 pipeline_tag: zero-shot-classification
+library_name: transformers
+model-index:
+  - name: xlm-roberta-large-xnli
+    results:
+      - task:
+          type: zero-shot-classification
+          name: Zero-Shot Classification
+        dataset:
+          name: XNLI
+          type: xnli
 widget:
+  - text: "За кого вы голосуете в 2020 году?"
+    candidate_labels: "politique étrangère, Europe, élections, affaires, politique"
+    multi_class: true
+    example_title: "Russian Political Classification"
+  - text: "لمن تصوت في 2020؟"
+    candidate_labels: "السياسة الخارجية, أوروبا, الانتخابات, الأعمال, السياسة"
+    multi_class: true
+    example_title: "Arabic Political Classification"
+  - text: "2020'de kime oy vereceksiniz?"
+    candidate_labels: "dış politika, Avrupa, seçimler, ticaret, siyaset"
+    multi_class: true
+    example_title: "Turkish Political Classification"
+  - text: "I love this movie"
+    candidate_labels: "positive, negative, neutral"
+    multi_class: false
+    example_title: "English Sentiment Analysis"
 ---
+# XLM-RoBERTa Large for Zero-Shot Classification (XNLI)
 ## Model Description
+This model is based on the excellent work by [joeddav/xlm-roberta-large-xnli](https://huggingface.co/joeddav/xlm-roberta-large-xnli). It takes [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) and fine-tunes it on a combination of NLI data in 15 languages.
+**Original Model Credit**: This model is a copy of [joeddav/xlm-roberta-large-xnli](https://huggingface.co/joeddav/xlm-roberta-large-xnli) by Joe Davison. All credit for the training and development goes to the original author.
+This model is intended to be used for zero-shot text classification, such as with the Hugging Face [ZeroShotClassificationPipeline](https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.ZeroShotClassificationPipeline).
+## Quick Start
+```python
+from transformers import pipeline
+# Load the zero-shot classification pipeline
+classifier = pipeline("zero-shot-classification",
+                      model="YOUR_USERNAME/zero-shot-classification")
+# Example usage
+text = "I love this new smartphone, it's amazing!"
+candidate_labels = ["technology", "sports", "politics", "entertainment"]
+result = classifier(text, candidate_labels)
+print(result)
+```
 ## Intended Usage
 [bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli) or
 [a distilled bart MNLI model](https://huggingface.co/models?filter=pipeline_tag%3Azero-shot-classification&search=valhalla).
+### Using the zero-shot classification pipeline
 The model can be loaded with the `zero-shot-classification` pipeline like so:
 ```python
 from transformers import pipeline
 classifier = pipeline("zero-shot-classification",
+                      model="YOUR_USERNAME/zero-shot-classification")
 ```
 You can then classify in any of the above languages. You can even pass the labels in one language and the sequence to
 #  'sequence': '¿A quién vas a votar en 2020?'}
 ```
+### Using with manual PyTorch
 ```python
 # pose sequence as a NLI premise and label as a hypothesis
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+nli_model = AutoModelForSequenceClassification.from_pretrained('YOUR_USERNAME/zero-shot-classification')
+tokenizer = AutoTokenizer.from_pretrained('YOUR_USERNAME/zero-shot-classification')
 premise = sequence
 hypothesis = f'This example is {label}.'
 logits = nli_model(x.to(device))[0]
 # we throw away "neutral" (dim 1) and take the probability of
+# "entailment" (2) as the probability of the label being true
 entail_contradiction_logits = logits[:,[0,2]]
 probs = entail_contradiction_logits.softmax(dim=1)
 prob_label_is_true = probs[:,1]
 MNLI train set and the XNLI validation and test sets. Finally, it was trained for one additional epoch on only XNLI
 data where the translations for the premise and hypothesis are shuffled such that the premise and hypothesis for
 each example come from the same original English example but the premise and hypothesis are of different languages.
+## Model Performance
+This model achieves excellent performance on multilingual zero-shot classification tasks. For detailed performance metrics, please refer to the [original model](https://huggingface.co/joeddav/xlm-roberta-large-xnli).
+## Limitations and Bias
+- The model may have biases inherited from the training data (MNLI and XNLI datasets)
+- Performance may vary across different languages and domains
+- The model works best with the 15 languages explicitly included in the XNLI training data
+- For English-only tasks, consider using specialized English models like `facebook/bart-large-mnli`
+## Citation
+If you use this model, please cite the original work:
+```bibtex
+@misc{davison2020zero,
+    title={Zero-Shot Learning in Modern NLP},
+    author={Joe Davison},
+    year={2020},
+    howpublished={\url{https://joeddav.github.io/blog/2020/05/29/ZSL.html}},
+}
+```
+## License
+This model is released under the MIT License, following the original model's licensing.
+## Contact
+This is a copy of the original model by Joe Davison. For questions about the model architecture and training, please refer to the [original repository](https://huggingface.co/joeddav/xlm-roberta-large-xnli).

UPLOAD_INSTRUCTIONS.md ADDED Viewed

	@@ -0,0 +1,112 @@

+# How to Upload Your Model to Hugging Face
+Follow these steps to upload your zero-shot classification model to Hugging Face and make it available for use through the transformers library.
+## Prerequisites
+1. Install required packages:
+```bash
+pip install huggingface_hub transformers
+```
+2. Create a Hugging Face account at https://huggingface.co/
+3. Get your access token:
+   - Go to https://huggingface.co/settings/tokens
+   - Create a new token with "Write" permissions
+   - Copy the token (keep it secure!)
+## Upload Steps
+### Method 1: Using the Web Interface (Recommended for beginners)
+1. Go to https://huggingface.co/new
+2. Choose "Model" and give it a name (e.g., `zero-shot-classification`)
+3. Set visibility (Public/Private)
+4. Click "Create model repository"
+5. Upload files using the web interface:
+   - Drag and drop all files from your model directory
+   - Or use git (see Method 2)
+### Method 2: Using Git/Command Line
+1. Login to Hugging Face CLI:
+```bash
+huggingface-cli login
+# Enter your token when prompted
+```
+2. Clone your repository:
+```bash
+git clone https://huggingface.co/YOUR_USERNAME/zero-shot-classification
+cd zero-shot-classification
+```
+3. Copy your model files:
+```bash
+# Copy all files from your model directory to the cloned repository
+cp /path/to/your/model/* .
+```
+4. Upload to Hugging Face:
+```bash
+git add .
+git commit -m "Upload XLM-RoBERTa zero-shot classification model"
+git push
+```
+### Method 3: Using Python API
+```python
+from huggingface_hub import HfApi, create_repo
+# Initialize API
+api = HfApi()
+# Create repository (optional if not exists)
+repo_id = "YOUR_USERNAME/zero-shot-classification"
+create_repo(repo_id, repo_type="model", private=False)
+# Upload files
+api.upload_folder(
+    folder_path="/path/to/your/model/directory",
+    repo_id=repo_id,
+    repo_type="model"
+)
+```
+## Important Notes
+1. **Replace placeholders**: Before uploading, make sure to replace `YOUR_USERNAME` in the README.md and example files with your actual Hugging Face username.
+2. **Model card**: The README.md serves as your model card. Make sure it's complete and accurate.
+3. **File size**: Large files (>10MB) are automatically handled by Git LFS, which is already configured in .gitattributes.
+4. **Testing**: After upload, test your model:
+```python
+from transformers import pipeline
+classifier = pipeline("zero-shot-classification", model="YOUR_USERNAME/zero-shot-classification")
+```
+## Making Your Model Discoverable
+1. Add relevant tags in your README.md frontmatter
+2. Add a good description
+3. Include example usage
+4. Consider adding a model card with performance metrics
+## Troubleshooting
+- **Authentication errors**: Make sure your token has write permissions
+- **Large file errors**: Ensure Git LFS is properly configured
+- **Model loading errors**: Check that all required files are present (config.json, model files, tokenizer files)
+After successful upload, your model will be available at:
+`https://huggingface.co/YOUR_USERNAME/zero-shot-classification`

example_usage.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/usr/bin/env python3
+"""
+Example script demonstrating how to use the XLM-RoBERTa Zero-Shot Classification model.
+This script shows various use cases including multilingual classification.
+"""
+import torch
+from transformers import pipeline
+def main():
+    print("Loading XLM-RoBERTa Zero-Shot Classification model...")
+    # Initialize the zero-shot classification pipeline
+    # Replace 'YOUR_USERNAME/zero-shot-classification' with your actual model path
+    classifier = pipeline(
+        "zero-shot-classification",
+        model="YOUR_USERNAME/zero-shot-classification",
+        device=0 if torch.cuda.is_available() else -1,  # Use GPU if available
+    )
+    print("Model loaded successfully!\n")
+    # Example 1: English sentiment analysis
+    print("Example 1: English Sentiment Analysis")
+    text_en = "I love this new smartphone, it's absolutely amazing!"
+    labels_en = ["positive", "negative", "neutral"]
+    result = classifier(text_en, labels_en)
+    print(f"Text: {text_en}")
+    print(f"Predicted label: {result['labels'][0]} (score: {result['scores'][0]:.4f})")
+    print()
+    # Example 2: Multilingual topic classification
+    print("Example 2: Multilingual Topic Classification")
+    texts = [
+        (
+            "English",
+            "The government announced new economic policies today.",
+            ["politics", "sports", "technology", "entertainment"],
+        ),
+        (
+            "Spanish",
+            "El nuevo iPhone tiene características increíbles.",
+            ["tecnología", "deportes", "política", "entretenimiento"],
+        ),
+        (
+            "French",
+            "Le match de football était très excitant hier soir.",
+            ["sport", "politique", "technologie", "divertissement"],
+        ),
+        (
+            "German",
+            "Die neue KI-Technologie wird die Zukunft verändern.",
+            ["Technologie", "Sport", "Politik", "Unterhaltung"],
+        ),
+    ]
+    for language, text, labels in texts:
+        result = classifier(text, labels)
+        print(f"{language}: {text}")
+        print(f"Predicted: {result['labels'][0]} (score: {result['scores'][0]:.4f})")
+        print()
+    # Example 3: Multi-label classification
+    print("Example 3: Multi-label Classification")
+    text_multi = "This movie has great action scenes and amazing special effects, but the story is quite boring."
+    labels_multi = ["action", "drama", "comedy", "boring", "exciting", "visual effects"]
+    result = classifier(text_multi, labels_multi, multi_label=True)
+    print(f"Text: {text_multi}")
+    print("All predictions:")
+    for label, score in zip(result["labels"], result["scores"]):
+        print(f"  {label}: {score:.4f}")
+    print()
+    # Example 4: Custom hypothesis template
+    print("Example 4: Custom Hypothesis Template (Spanish)")
+    text_es = "Esta película es realmente fantástica y emocionante."
+    labels_es = ["positivo", "negativo", "neutro"]
+    hypothesis_template = "Este texto es {}."
+    result = classifier(text_es, labels_es, hypothesis_template=hypothesis_template)
+    print(f"Text: {text_es}")
+    print(f"Predicted: {result['labels'][0]} (score: {result['scores'][0]:.4f})")
+    print(f"Using custom template: '{hypothesis_template}'")
+    print()
+    print(
+        "Demo completed! You can now use this model for your own zero-shot classification tasks."
+    )
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch>=1.9.0
+transformers>=4.21.0
+tokenizers>=0.13.0
+numpy>=1.21.0

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,17 @@
1	- {~~"model_max_length": 512}~~

+{
+  "model_max_length": 512,
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "do_lower_case": false,
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "sep_token": "</s>",
+  "cls_token": "<s>",
+  "unk_token": "<unk>",
+  "pad_token": "<pad>",
+  "mask_token": "<mask>",
+  "special_tokens_map_file": null,
+  "name_or_path": "xlm-roberta-large",
+  "tokenize_chinese_chars": true,
+  "strip_accents": null,
+  "do_basic_tokenize": true
+}