import requests import shutil import zipfile import os import sys from wasabi import msg from urllib.request import urlretrieve from tqdm import tqdm # This is used to show progress when downloading. # see here: https://github.com/tqdm/tqdm#hooks-and-callbacks class TqdmUpTo(tqdm): """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" def update_to(self, b=1, bsize=1, tsize=None): """ b : int, optional Number of blocks transferred so far [default: 1]. bsize : int, optional Size of each block (in tqdm units) [default: 1]. tsize : int, optional Total size (in tqdm units). If [default: None] remains unchanged. """ if tsize is not None: self.total = tsize self.update(b * bsize - self.n) # will also set self.n = b * bsize def download_file(url, fname): with requests.get(url, stream=True) as r: with open(fname, 'wb') as f: shutil.copyfileobj(r.raw, f) return fname def download_progress(url, fname): """Download a file and show a progress bar.""" with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: # all optional kwargs urlretrieve(url, filename=fname, reporthook=t.update_to, data=None) t.total = t.n return fname def get_json(url, desc): r = requests.get(url) if r.status_code != 200: msg.fail( "Server error ({})".format(r.status_code), "Couldn't fetch {}. If this error persists please open an issue." " http://github.com/polm/unidic-py/issues/".format(desc), exits=1, ) return r.json() def download_and_clean(version, url, dirname='unidic', delfiles=[]): """Download unidic and prep the dicdir. This downloads the zip file from the source, extracts it, renames the resulting directory, and removes large files not used at runtime. """ cdir = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(cdir, 'unidic.zip') print("Downloading UniDic v{}...".format(version), file=sys.stderr) download_progress(url, fname) print("Finished download.") with zipfile.ZipFile(fname, 'r') as zf: zf.extractall(cdir) os.remove(fname) dicdir = os.path.join(cdir, 'dicdir') if os.path.isdir(dicdir): shutil.rmtree(dicdir) outdir = os.path.join(cdir, dirname) shutil.move(outdir, dicdir) for dfile in delfiles: os.remove(os.path.join(dicdir, dfile)) # save a version file so we can tell what it is vpath = os.path.join(dicdir, 'version') with open(vpath, 'w') as vfile: vfile.write('unidic-{}'.format(version)) # Write a dummy mecabrc with open(os.path.join(dicdir, 'mecabrc'), 'w') as mecabrc: mecabrc.write('# This is a dummy file.') print("Downloaded UniDic v{} to {}".format(version, dicdir), file=sys.stderr) DICT_INFO = "https://raw.githubusercontent.com/polm/unidic-py/master/dicts.json" def download_version(ver="latest"): res = get_json(DICT_INFO, "dictionary info") try: dictinfo = res[ver] except KeyError: print('Unknown version "{}".'.format(ver)) print("Known versions:") for key, val in res.items(): print("\t", key, "({})".format(val['version'])) print("download url:", dictinfo['url']) print("Dictionary version:", dictinfo['version']) download_and_clean(dictinfo['version'], dictinfo['url'])