Add new data files

This commit is contained in:
Dirkjan Ochtman 2021-06-02 01:18:36 -07:00
parent fcf24c7543
commit fee2adb995
3 changed files with 354784 additions and 2 deletions

262144
data/en-bigrams.txt Normal file

File diff suppressed because it is too large Load Diff

92638
data/en-unigrams.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@ pub fn segmenter() -> Segmenter {
let dir = PathBuf::from(format!("{}/../data", env!("CARGO_MANIFEST_DIR"))); let dir = PathBuf::from(format!("{}/../data", env!("CARGO_MANIFEST_DIR")));
let mut ln = String::new(); let mut ln = String::new();
let uni_file = dir.join("unigrams.txt"); let uni_file = dir.join("en-unigrams.txt");
let mut reader = BufReader::new(File::open(&uni_file).unwrap()); let mut reader = BufReader::new(File::open(&uni_file).unwrap());
let mut i = 0; let mut i = 0;
let mut unigrams = HashMap::default(); let mut unigrams = HashMap::default();
@ -33,7 +33,7 @@ pub fn segmenter() -> Segmenter {
ln.clear(); ln.clear();
} }
let bi_file = dir.join("bigrams.txt"); let bi_file = dir.join("en-bigrams.txt");
let mut reader = BufReader::new(File::open(&bi_file).unwrap()); let mut reader = BufReader::new(File::open(&bi_file).unwrap());
let mut i = 0; let mut i = 0;
let mut bigrams = HashMap::default(); let mut bigrams = HashMap::default();