Reorganize tests and test data to expose test cases

This commit is contained in:
Dirkjan Ochtman 2021-02-01 17:25:32 +01:00
parent d4df4ce29a
commit 29d2d94a8d
4 changed files with 44 additions and 85 deletions

View File

@ -10,7 +10,8 @@ repository = "https://github.com/InstantDomainSearch/instant-segment"
documentation = "https://docs.rs/instant-segment" documentation = "https://docs.rs/instant-segment"
[features] [features]
__test_data = [] __test_data = ["testcases"]
testcases = []
[dependencies] [dependencies]
ahash = "0.7.0" ahash = "0.7.0"

View File

@ -6,6 +6,8 @@ use smartstring::alias::String;
#[cfg(feature = "__test_data")] #[cfg(feature = "__test_data")]
pub mod test_data; pub mod test_data;
#[cfg(feature = "testcases")]
pub mod testcases;
/// Central data structure used to calculate word probabilities /// Central data structure used to calculate word probabilities
pub struct Segmenter { pub struct Segmenter {

View File

@ -7,6 +7,11 @@ use std::str::FromStr;
use super::Segmenter; use super::Segmenter;
#[test]
fn test_data() {
crate::testcases::run(segmenter());
}
pub fn segmenter() -> Segmenter { pub fn segmenter() -> Segmenter {
let dir = PathBuf::from(format!("{}/data", env!("CARGO_MANIFEST_DIR"))); let dir = PathBuf::from(format!("{}/data", env!("CARGO_MANIFEST_DIR")));

View File

@ -1,22 +1,31 @@
#![cfg(feature = "__test_data")] use crate::Segmenter;
use once_cell::sync::Lazy; /// Run a segmenter against the built-in test cases
pub fn run(segmenter: Segmenter) {
use instant_segment::Segmenter; for test in TEST_CASES.iter().copied() {
let mut out = Vec::new();
#[test] segmenter.segment(&test.join(""), &mut out);
fn test_segment_0() { let cmp = out.iter().map(|s| &*s).collect::<Vec<_>>();
assert_segments(&["choose", "spain"]); assert_eq!(cmp, test);
}
} }
#[test] /// Built-in test cases
fn test_segment_1() { ///
assert_segments(&["this", "is", "a", "test"]); /// These are exposed so that you can test with different data sources.
} pub const TEST_CASES: &[&[&str]] = &[
&["choose", "spain"],
#[test] &["this", "is", "a", "test"],
fn test_segment_2() { &["who", "represents"],
assert_segments(&[ &["experts", "exchange"],
&["speed", "of", "art"],
&["now", "is", "the", "time", "for", "all", "good"],
&["it", "is", "a", "truth", "universally", "acknowledged"],
&[
"it", "was", "a", "bright", "cold", "day", "in", "april", "and", "the", "clocks", "were",
"striking", "thirteen",
],
&[
"when", "when",
"in", "in",
"the", "the",
@ -27,45 +36,8 @@ fn test_segment_2() {
"it", "it",
"becomes", "becomes",
"necessary", "necessary",
]); ],
} &[
#[test]
fn test_segment_3() {
assert_segments(&["who", "represents"]);
}
#[test]
fn test_segment_4() {
assert_segments(&["experts", "exchange"]);
}
#[test]
fn test_segment_5() {
assert_segments(&["speed", "of", "art"]);
}
#[test]
fn test_segment_6() {
assert_segments(&["now", "is", "the", "time", "for", "all", "good"]);
}
#[test]
fn test_segment_7() {
assert_segments(&["it", "is", "a", "truth", "universally", "acknowledged"]);
}
#[test]
fn test_segment_8() {
assert_segments(&[
"it", "was", "a", "bright", "cold", "day", "in", "april", "and", "the", "clocks", "were",
"striking", "thirteen",
]);
}
#[test]
fn test_segment_9() {
assert_segments(&[
"it", "it",
"was", "was",
"the", "the",
@ -90,12 +62,8 @@ fn test_segment_9() {
"age", "age",
"of", "of",
"foolishness", "foolishness",
]); ],
} &[
#[test]
fn test_segment_10() {
assert_segments(&[
"as", "as",
"gregor", "gregor",
"samsa", "samsa",
@ -116,23 +84,15 @@ fn test_segment_10() {
"a", "a",
"gigantic", "gigantic",
"insect", "insect",
]); ],
} &[
#[test]
fn test_segment_11() {
assert_segments(&[
"in", "a", "hole", "in", "the", "ground", "there", "lived", "a", "hobbit", "not", "a", "in", "a", "hole", "in", "the", "ground", "there", "lived", "a", "hobbit", "not", "a",
"nasty", "dirty", "wet", "hole", "filled", "with", "the", "ends", "of", "worms", "and", "nasty", "dirty", "wet", "hole", "filled", "with", "the", "ends", "of", "worms", "and",
"an", "oozy", "smell", "nor", "yet", "a", "dry", "bare", "sandy", "hole", "with", "an", "oozy", "smell", "nor", "yet", "a", "dry", "bare", "sandy", "hole", "with",
"nothing", "in", "it", "to", "sit", "down", "on", "or", "to", "eat", "it", "was", "a", "nothing", "in", "it", "to", "sit", "down", "on", "or", "to", "eat", "it", "was", "a",
"hobbit", "hole", "and", "that", "means", "comfort", "hobbit", "hole", "and", "that", "means", "comfort",
]); ],
} &[
#[test]
fn test_segment_12() {
assert_segments(&[
"far", "far",
"out", "out",
"in", "in",
@ -158,14 +118,5 @@ fn test_segment_12() {
"regarded", "regarded",
"yellow", "yellow",
"sun", "sun",
]); ],
} ];
fn assert_segments(s: &[&str]) {
let mut out = Vec::new();
SEGMENTER.segment(&s.join(""), &mut out);
let cmp = out.iter().map(|s| &*s).collect::<Vec<_>>();
assert_eq!(cmp, s);
}
static SEGMENTER: Lazy<Segmenter> = Lazy::new(instant_segment::test_data::segmenter);