instant-segment/instant-segment-py/test/test.py

24 lines
746 B
Python
Raw Normal View History

2021-03-24 09:59:57 +00:00
import instant_segment, os, sys
DATA_DIR = os.path.join(os.path.dirname(__file__), '../../data/')
def unigrams():
2021-08-31 12:49:38 +00:00
for ln in open(os.path.join(DATA_DIR, 'en-unigrams.txt')):
2021-03-24 09:59:57 +00:00
parts = ln.split('\t', 1)
yield (parts[0], float(parts[1].strip()))
def bigrams():
2021-08-31 12:49:38 +00:00
for ln in open(os.path.join(DATA_DIR, 'en-bigrams.txt')):
2021-03-24 09:59:57 +00:00
word_split = ln.split(' ', 1)
score_split = word_split[1].split('\t', 1)
yield ((word_split[0], score_split[0]), float(score_split[1].strip()))
def main():
segmenter = instant_segment.Segmenter(unigrams(), bigrams())
search = instant_segment.Search()
segmenter.segment('thisisatest', search)
print([word for word in search])
if __name__ == '__main__':
main()