ayousanz's picture
Add files using upload-large-folder tool
f96afdc verified
raw
history blame
1.55 kB
from fugashi import GenericTagger, Tagger, build_dictionary
import sys
import fileinput
def main():
"""
This is a simple wrapper for fugashi so you can test it from the command line.
Like the mecab binary, it treats each line of stdin as one sentence. You can
pass tagger arguments here too.
"""
args = ' '.join(sys.argv[1:])
# This should work if you specify a different dictionary,
# but it should also work with the pip unidic.
# Try the GenericTagger and then try the Unidic tagger.
try:
tagger = GenericTagger(args, quiet=True)
except RuntimeError:
tagger = Tagger(args)
for line in fileinput.input([]):
print(tagger.parse(line.strip()))
def info():
"""Print configuration info."""
args = ' '.join(sys.argv[1:])
try:
tagger = GenericTagger(args, quiet=True)
except RuntimeError:
tagger = Tagger(args)
#TODO get the fugashi version here too
print("Fugashi dictionary info:")
print("-----")
for di in tagger.dictionary_info:
for field in 'version size charset filename'.split():
print( (field + ':').ljust(10), di[field])
print('-----')
def build_dict():
"""EXPERIMENTAL A wrapper for MeCab's user dictionary building command.
This also defaults to utf8.
"""
# TODO simplify using pip-installed dictionaries as base
args = sys.argv[0] + " -f utf8 -t utf8 " + ' '.join(sys.argv[1:])
print(args)
build_dictionary(args)