|
""" |
|
Script to prepare and package the B2B Ecommerce NER model for Hugging Face upload |
|
""" |
|
|
|
import os |
|
import shutil |
|
import json |
|
from pathlib import Path |
|
import sys |
|
|
|
|
|
sys.path.append(str(Path(__file__).parent.parent)) |
|
|
|
|
|
def prepare_huggingface_model(): |
|
"""Prepare the model for Hugging Face upload""" |
|
|
|
print("Preparing B2B Ecommerce NER model for Hugging Face...") |
|
|
|
|
|
base_dir = Path(__file__).parent.parent |
|
hf_dir = Path(__file__).parent |
|
spacy_model_path = base_dir / "models" / "food_ner_model" |
|
catalog_path = base_dir / "data" / "product_catalog.csv" |
|
|
|
print(f"Base directory: {base_dir}") |
|
print(f"HuggingFace directory: {hf_dir}") |
|
print(f"spaCy model path: {spacy_model_path}") |
|
print(f"Catalog path: {catalog_path}") |
|
|
|
|
|
if not spacy_model_path.exists(): |
|
print(f"β spaCy model not found at {spacy_model_path}") |
|
print("Please train the model first using: python src/train_model.py") |
|
return False |
|
|
|
if not catalog_path.exists(): |
|
print(f"β Product catalog not found at {catalog_path}") |
|
print("Please ensure product_catalog.csv exists in the data directory") |
|
return False |
|
|
|
print("β
Required files found") |
|
|
|
|
|
target_spacy_path = hf_dir / "spacy_model" |
|
if target_spacy_path.exists(): |
|
shutil.rmtree(target_spacy_path) |
|
|
|
print(f"Copying spaCy model to {target_spacy_path}") |
|
shutil.copytree(spacy_model_path, target_spacy_path) |
|
|
|
|
|
target_catalog_path = hf_dir / "product_catalog.csv" |
|
print(f"Copying product catalog to {target_catalog_path}") |
|
shutil.copy(catalog_path, target_catalog_path) |
|
|
|
|
|
config_path = hf_dir / "config.json" |
|
with open(config_path, 'r') as f: |
|
config = json.load(f) |
|
|
|
config["spacy_model_path"] = "spacy_model" |
|
config["catalog_path"] = "product_catalog.csv" |
|
config["prepared_for_upload"] = True |
|
|
|
with open(config_path, 'w') as f: |
|
json.dump(config, f, indent=2) |
|
|
|
print("β
Model prepared successfully!") |
|
print("\nNext steps:") |
|
print("1. Test the model using: python huggingface_model/example.py") |
|
print("2. Upload to Hugging Face using the upload script") |
|
|
|
return True |
|
|
|
|
|
def test_prepared_model(): |
|
"""Test the prepared model""" |
|
print("\nTesting prepared model...") |
|
|
|
try: |
|
from model import B2BEcommerceNER |
|
|
|
|
|
model = B2BEcommerceNER( |
|
model_path="spacy_model", |
|
catalog_path="product_catalog.csv" |
|
) |
|
|
|
|
|
test_texts = ["Order 5 Coke Zero 650ML"] |
|
results = model.predict(test_texts) |
|
|
|
print("β
Model test successful!") |
|
print("Sample result:", json.dumps(results[0], indent=2, default=str)) |
|
|
|
return True |
|
|
|
except Exception as e: |
|
print(f"β Model test failed: {e}") |
|
return False |
|
|
|
|
|
def create_upload_script(): |
|
"""Create a script for uploading to Hugging Face""" |
|
|
|
upload_script = '''#!/usr/bin/env python3 |
|
""" |
|
Upload the B2B Ecommerce NER model to Hugging Face Hub |
|
""" |
|
|
|
from huggingface_hub import HfApi, create_repo |
|
import os |
|
from pathlib import Path |
|
|
|
|
|
def upload_to_huggingface(repo_name: str, token: str = None): |
|
""" |
|
Upload the model to Hugging Face Hub |
|
|
|
Args: |
|
repo_name: Name of the repository (e.g., "username/b2b-ecommerce-ner") |
|
token: Hugging Face token (or set HF_TOKEN environment variable) |
|
""" |
|
|
|
if token is None: |
|
token = os.getenv("HF_TOKEN") |
|
if not token: |
|
print("Please provide a Hugging Face token or set HF_TOKEN environment variable") |
|
return False |
|
|
|
api = HfApi() |
|
|
|
try: |
|
# Create repository |
|
print(f"Creating repository: {repo_name}") |
|
create_repo(repo_name, token=token, exist_ok=True) |
|
|
|
# Upload all files in the current directory |
|
model_dir = Path(__file__).parent |
|
|
|
print("Uploading files...") |
|
api.upload_folder( |
|
folder_path=model_dir, |
|
repo_id=repo_name, |
|
token=token, |
|
repo_type="model" |
|
) |
|
|
|
print(f"β
Model uploaded successfully to: https://huggingface.co/{repo_name}") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"β Upload failed: {e}") |
|
return False |
|
|
|
|
|
if __name__ == "__main__": |
|
import sys |
|
|
|
if len(sys.argv) != 2: |
|
print("Usage: python upload.py <repo_name>") |
|
print("Example: python upload.py username/b2b-ecommerce-ner") |
|
sys.exit(1) |
|
|
|
repo_name = sys.argv[1] |
|
success = upload_to_huggingface(repo_name) |
|
|
|
if success: |
|
print("\\nYour model is now available on Hugging Face!") |
|
print(f"You can use it with: B2BEcommerceNER.from_pretrained('{repo_name}')") |
|
else: |
|
print("\\nUpload failed. Please check your token and try again.") |
|
''' |
|
|
|
upload_script_path = Path(__file__).parent / "upload.py" |
|
with open(upload_script_path, 'w') as f: |
|
f.write(upload_script) |
|
|
|
|
|
os.chmod(upload_script_path, 0o755) |
|
|
|
print(f"β
Upload script created at {upload_script_path}") |
|
|
|
|
|
def main(): |
|
"""Main function to prepare the model""" |
|
|
|
print("B2B Ecommerce NER - Hugging Face Preparation") |
|
print("=" * 50) |
|
|
|
|
|
os.chdir(Path(__file__).parent) |
|
|
|
|
|
if not prepare_huggingface_model(): |
|
return False |
|
|
|
|
|
if not test_prepared_model(): |
|
return False |
|
|
|
|
|
create_upload_script() |
|
|
|
print("\nπ Model preparation complete!") |
|
print("\nFiles in huggingface_model directory:") |
|
for file_path in Path(".").iterdir(): |
|
if file_path.is_file(): |
|
print(f" π {file_path.name}") |
|
elif file_path.is_dir(): |
|
print(f" π {file_path.name}/") |
|
|
|
print("\nπ Usage instructions:") |
|
print("1. Test locally: python example.py") |
|
print("2. Upload to HF: python upload.py username/model-name") |
|
print("3. Use remotely: B2BEcommerceNER.from_pretrained('username/model-name')") |
|
|
|
return True |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|