|
|
|
""" |
|
Sync BitTransformerLM repository to HuggingFace Hub for OS launch. |
|
Uploads all cleaned documentation and code with proper commit message. |
|
""" |
|
|
|
import os |
|
import logging |
|
from pathlib import Path |
|
from huggingface_hub import HfApi, login |
|
from typing import Optional, List |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def sync_repository_to_hf( |
|
repo_id: str = "WCNegentropy/BitTransformerLM", |
|
token: Optional[str] = None, |
|
commit_message: str = "π OS Launch: Clean documentation and refined licensing" |
|
): |
|
""" |
|
Sync the entire cleaned BitTransformerLM repository to HuggingFace Hub. |
|
|
|
Args: |
|
repo_id: HuggingFace repository ID |
|
token: HF token (defaults to HF_TOKEN environment variable) |
|
commit_message: Commit message for the upload |
|
""" |
|
|
|
|
|
if token is None: |
|
token = os.environ.get('HF_TOKEN') |
|
if not token: |
|
logger.error("HF_TOKEN environment variable not set and no token provided") |
|
return False |
|
|
|
try: |
|
|
|
login(token=token) |
|
api = HfApi() |
|
logger.info("Successfully authenticated with HuggingFace Hub") |
|
|
|
|
|
repo_root = Path(__file__).parent |
|
logger.info(f"Repository root: {repo_root}") |
|
|
|
|
|
include_patterns = [ |
|
|
|
"bit_transformer/**/*.py", |
|
"tests/**/*.py", |
|
"*.py", |
|
|
|
|
|
"README.md", |
|
"MODEL_CARD.md", |
|
"RESEARCH_STATUS.md", |
|
"EMPIRICAL_VALIDATION.md", |
|
"OPEN_SOURCE_LAUNCH.md", |
|
"AGENTS.md", |
|
|
|
|
|
"requirements.txt", |
|
"pyproject.toml", |
|
"Dockerfile", |
|
"start.sh", |
|
|
|
|
|
"LICENSE/**/*.txt", |
|
] |
|
|
|
|
|
exclude_patterns = [ |
|
"__pycache__/**", |
|
"*.pyc", |
|
".git/**", |
|
".pytest_cache/**", |
|
"weights/**", |
|
"checkpoints/**", |
|
"*.log", |
|
|
|
"BitTransformerLM_full_assessment.md", |
|
"FORENSIC_*.md", |
|
"state_of_the_repo_audit.md", |
|
|
|
"upload_to_hf.py", |
|
] |
|
|
|
|
|
files_to_upload = [] |
|
for pattern in include_patterns: |
|
for file_path in repo_root.glob(pattern): |
|
if file_path.is_file(): |
|
|
|
relative_path = file_path.relative_to(repo_root) |
|
should_exclude = any( |
|
relative_path.match(exclude) |
|
for exclude in exclude_patterns |
|
) |
|
if not should_exclude: |
|
files_to_upload.append(file_path) |
|
|
|
logger.info(f"Found {len(files_to_upload)} files to upload") |
|
|
|
|
|
uploaded_count = 0 |
|
for file_path in files_to_upload: |
|
try: |
|
relative_path = file_path.relative_to(repo_root) |
|
logger.info(f"Uploading: {relative_path}") |
|
|
|
api.upload_file( |
|
path_or_fileobj=str(file_path), |
|
path_in_repo=str(relative_path), |
|
repo_id=repo_id, |
|
repo_type="model", |
|
commit_message=commit_message, |
|
commit_description=""" |
|
This OS launch commit includes: |
|
|
|
β
**Cleaned Documentation** |
|
- Removed inflated claims and marketing language |
|
- Added honest research status and limitations |
|
- Created professional model card and validation reports |
|
- Streamlined licensing to AGPLv3 + commercial contact |
|
|
|
β
**Refined Codebase** |
|
- Complete experimental bit-native transformer implementation |
|
- 57 Python files with comprehensive research framework |
|
- Safety telemetry and monitoring systems |
|
- Distributed training and development tools |
|
|
|
β
**Professional Standards** |
|
- Empirical validation of all claims |
|
- Clear experimental vs production distinctions |
|
- Rigorous research methodology requirements |
|
- Community contribution framework |
|
|
|
Ready for serious research evaluation and academic investigation. |
|
""".strip() |
|
) |
|
|
|
uploaded_count += 1 |
|
if uploaded_count % 10 == 0: |
|
logger.info(f"Progress: {uploaded_count}/{len(files_to_upload)} files uploaded") |
|
|
|
except Exception as e: |
|
logger.warning(f"Failed to upload {relative_path}: {e}") |
|
continue |
|
|
|
logger.info(f"β
Successfully uploaded {uploaded_count}/{len(files_to_upload)} files") |
|
logger.info(f"π Repository synced to: https://huggingface.co/{repo_id}") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
logger.error(f"β Failed to sync repository: {e}") |
|
return False |
|
|
|
def create_release_info(): |
|
"""Create a release information file for the OS launch.""" |
|
release_info = """# BitTransformerLM v0.1.0 - Experimental Research Release |
|
|
|
**Release Date:** August 2025 |
|
**Status:** Open Source Research Implementation |
|
**License:** AGPLv3 + Commercial Licensing Available |
|
|
|
## What's Included |
|
|
|
This release provides a complete experimental framework for bit-native language modeling research: |
|
|
|
- **Core Architecture:** 57 Python files implementing bit-native transformer with reversible layers |
|
- **Safety Systems:** Real-time K/C/S telemetry and monitoring |
|
- **Research Tools:** Interactive dashboard, distributed training, comprehensive testing |
|
- **Documentation:** Professional model card, research status, and validation reports |
|
|
|
## Important Notes |
|
|
|
β οΈ **Experimental Status:** This is research code requiring rigorous baseline validation |
|
β οΈ **Not Production Ready:** Needs extensive evaluation vs standard transformers |
|
β οΈ **Research Use Only:** Intended for academic investigation and experimentation |
|
|
|
## Licensing |
|
|
|
- **Open Source:** AGPLv3 for research and open source use |
|
- **Commercial:** Contact [email protected] for commercial licensing |
|
|
|
## Next Steps |
|
|
|
The research community is invited to: |
|
1. Conduct rigorous baseline comparisons vs standard transformers |
|
2. Evaluate on established language modeling benchmarks |
|
3. Validate (or refute) claimed memory efficiency benefits |
|
4. Share findings openly to advance the field |
|
|
|
**Research responsibly. Validate rigorously. Share openly.** |
|
""" |
|
|
|
release_file = Path(__file__).parent / "RELEASE_INFO.md" |
|
with open(release_file, 'w') as f: |
|
f.write(release_info) |
|
|
|
logger.info("Created RELEASE_INFO.md") |
|
return release_file |
|
|
|
if __name__ == "__main__": |
|
|
|
create_release_info() |
|
|
|
|
|
success = sync_repository_to_hf() |
|
|
|
if success: |
|
print("\nπ BitTransformerLM OS Launch Sync Complete!") |
|
print("π Repository: https://huggingface.co/WCNegentropy/BitTransformerLM") |
|
print("π§ Commercial inquiries: [email protected]") |
|
print("\nReady for research community evaluation! π§ͺβ¨") |
|
else: |
|
print("\nβ Sync failed. Please check logs and try again.") |