diff --git a/Cargo.toml b/Cargo.toml index 6ca65d2..5810c31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,8 @@ repository = "https://github.com/InstantDomainSearch/instant-segment" documentation = "https://docs.rs/instant-segment" [features] -__test_data = [] +__test_data = ["testcases"] +testcases = [] [dependencies] ahash = "0.7.0" diff --git a/src/lib.rs b/src/lib.rs index 5b9ea99..1f320db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,8 @@ use smartstring::alias::String; #[cfg(feature = "__test_data")] pub mod test_data; +#[cfg(feature = "testcases")] +pub mod testcases; /// Central data structure used to calculate word probabilities pub struct Segmenter { diff --git a/src/test_data.rs b/src/test_data.rs index 15cee34..5a4f8a7 100644 --- a/src/test_data.rs +++ b/src/test_data.rs @@ -7,6 +7,11 @@ use std::str::FromStr; use super::Segmenter; +#[test] +fn test_data() { + crate::testcases::run(segmenter()); +} + pub fn segmenter() -> Segmenter { let dir = PathBuf::from(format!("{}/data", env!("CARGO_MANIFEST_DIR"))); diff --git a/tests/basic.rs b/src/testcases.rs similarity index 57% rename from tests/basic.rs rename to src/testcases.rs index b15cfc5..3cd008e 100644 --- a/tests/basic.rs +++ b/src/testcases.rs @@ -1,22 +1,31 @@ -#![cfg(feature = "__test_data")] +use crate::Segmenter; -use once_cell::sync::Lazy; - -use instant_segment::Segmenter; - -#[test] -fn test_segment_0() { - assert_segments(&["choose", "spain"]); +/// Run a segmenter against the built-in test cases +pub fn run(segmenter: Segmenter) { + for test in TEST_CASES.iter().copied() { + let mut out = Vec::new(); + segmenter.segment(&test.join(""), &mut out); + let cmp = out.iter().map(|s| &*s).collect::>(); + assert_eq!(cmp, test); + } } -#[test] -fn test_segment_1() { - assert_segments(&["this", "is", "a", "test"]); -} - -#[test] -fn test_segment_2() { - assert_segments(&[ +/// Built-in test cases +/// +/// These are exposed so that you can test with different data sources. +pub const TEST_CASES: &[&[&str]] = &[ + &["choose", "spain"], + &["this", "is", "a", "test"], + &["who", "represents"], + &["experts", "exchange"], + &["speed", "of", "art"], + &["now", "is", "the", "time", "for", "all", "good"], + &["it", "is", "a", "truth", "universally", "acknowledged"], + &[ + "it", "was", "a", "bright", "cold", "day", "in", "april", "and", "the", "clocks", "were", + "striking", "thirteen", + ], + &[ "when", "in", "the", @@ -27,45 +36,8 @@ fn test_segment_2() { "it", "becomes", "necessary", - ]); -} - -#[test] -fn test_segment_3() { - assert_segments(&["who", "represents"]); -} - -#[test] -fn test_segment_4() { - assert_segments(&["experts", "exchange"]); -} - -#[test] -fn test_segment_5() { - assert_segments(&["speed", "of", "art"]); -} - -#[test] -fn test_segment_6() { - assert_segments(&["now", "is", "the", "time", "for", "all", "good"]); -} - -#[test] -fn test_segment_7() { - assert_segments(&["it", "is", "a", "truth", "universally", "acknowledged"]); -} - -#[test] -fn test_segment_8() { - assert_segments(&[ - "it", "was", "a", "bright", "cold", "day", "in", "april", "and", "the", "clocks", "were", - "striking", "thirteen", - ]); -} - -#[test] -fn test_segment_9() { - assert_segments(&[ + ], + &[ "it", "was", "the", @@ -90,12 +62,8 @@ fn test_segment_9() { "age", "of", "foolishness", - ]); -} - -#[test] -fn test_segment_10() { - assert_segments(&[ + ], + &[ "as", "gregor", "samsa", @@ -116,23 +84,15 @@ fn test_segment_10() { "a", "gigantic", "insect", - ]); -} - -#[test] -fn test_segment_11() { - assert_segments(&[ + ], + &[ "in", "a", "hole", "in", "the", "ground", "there", "lived", "a", "hobbit", "not", "a", "nasty", "dirty", "wet", "hole", "filled", "with", "the", "ends", "of", "worms", "and", "an", "oozy", "smell", "nor", "yet", "a", "dry", "bare", "sandy", "hole", "with", "nothing", "in", "it", "to", "sit", "down", "on", "or", "to", "eat", "it", "was", "a", "hobbit", "hole", "and", "that", "means", "comfort", - ]); -} - -#[test] -fn test_segment_12() { - assert_segments(&[ + ], + &[ "far", "out", "in", @@ -158,14 +118,5 @@ fn test_segment_12() { "regarded", "yellow", "sun", - ]); -} - -fn assert_segments(s: &[&str]) { - let mut out = Vec::new(); - SEGMENTER.segment(&s.join(""), &mut out); - let cmp = out.iter().map(|s| &*s).collect::>(); - assert_eq!(cmp, s); -} - -static SEGMENTER: Lazy = Lazy::new(instant_segment::test_data::segmenter); + ], +];