Separate incorrect segmentation out of TEST_CASES

This commit is contained in:
Dirkjan Ochtman 2021-02-04 10:40:45 +01:00
parent 96187965b6
commit bacf82c8cc
1 changed files with 30 additions and 27 deletions

View File

@ -5,6 +5,7 @@ pub fn run(segmenter: &Segmenter) {
for test in TEST_CASES.iter().copied() { for test in TEST_CASES.iter().copied() {
assert_segments(segmenter, test); assert_segments(segmenter, test);
} }
assert_segments(segmenter, FAIL);
} }
pub fn assert_segments(segmenter: &Segmenter, s: &[&str]) { pub fn assert_segments(segmenter: &Segmenter, s: &[&str]) {
@ -96,31 +97,33 @@ pub const TEST_CASES: &[&[&str]] = &[
"nothing", "in", "it", "to", "sit", "down", "on", "or", "to", "eat", "it", "was", "a", "nothing", "in", "it", "to", "sit", "down", "on", "or", "to", "eat", "it", "was", "a",
"hobbit", "hole", "and", "that", "means", "comfort", "hobbit", "hole", "and", "that", "means", "comfort",
], ],
&[ ];
"far",
"out", /// Incorrectly segmented, since the test data doesn't contain "unregarded"
"in", const FAIL: &[&str] = &[
"the", "far",
"uncharted", "out",
"backwaters", "in",
"of", "the",
"the", "uncharted",
"unfashionable", "backwaters",
"end", "of",
"of", "the",
"the", "unfashionable",
"western", "end",
"spiral", "of",
"arm", "the",
"of", "western",
"the", "spiral",
"galaxy", "arm",
"lies", "of",
"a", "the",
"small", "galaxy",
"un", "lies",
"regarded", "a",
"yellow", "small",
"sun", "un",
], "regarded",
"yellow",
"sun",
]; ];