use std::collections::BTreeSet; use std::collections::HashMap; use std::env; use std::fs; use std::path::{Path, PathBuf}; const PREMIUM_DIR: &str = "src/dictionaries/premium"; const TRADEMARKS_DIR: &str = "src/dictionaries/trademarks"; fn normalize_word(word: &str) -> Option { let w = word.trim().to_ascii_lowercase(); if w.is_empty() || w.len() > 20 { return None; } if !w.chars().all(|c| c.is_ascii_alphanumeric()) { return None; } Some(w) } fn gather_files(dir: &Path) -> Vec { let mut files = Vec::new(); if let Ok(entries) = fs::read_dir(dir) { for entry in entries.flatten() { let p = entry.path(); if p.is_dir() { files.extend(gather_files(&p)); } else if p.extension().and_then(|s| s.to_str()) == Some("txt") { files.push(p); } } } files.sort(); files } fn load_word_set(dir: &Path, label: &str) -> BTreeSet { let mut out = BTreeSet::new(); let mut seen: HashMap = HashMap::new(); for file in gather_files(dir) { println!("cargo:rerun-if-changed={}", file.display()); let raw = fs::read_to_string(&file).unwrap_or_default(); for line in raw.lines() { let line = line.trim(); if line.is_empty() || line.starts_with('#') { continue; } if let Some(w) = normalize_word(line) { *seen.entry(w.clone()).or_insert(0) += 1; out.insert(w); } } } let mut duplicate_words = 0usize; let mut duplicate_entries = 0usize; let mut sample: Vec = Vec::new(); let mut keys: Vec<_> = seen.keys().cloned().collect(); keys.sort(); for k in keys { if let Some(cnt) = seen.get(&k) { if *cnt > 1 { duplicate_words += 1; duplicate_entries += cnt - 1; if sample.len() < 40 { sample.push(format!("{k} x{cnt}")); } } } } if duplicate_words > 0 { println!( "cargo:warning=[{label}] duplicates found: words={}, extra_entries={}", duplicate_words, duplicate_entries ); println!( "cargo:warning=[{label}] duplicate samples: {}", sample.join(", ") ); } out } fn main() { let premium_dir = Path::new(PREMIUM_DIR); let trademarks_dir = Path::new(TRADEMARKS_DIR); println!("cargo:rerun-if-changed={}", premium_dir.display()); println!("cargo:rerun-if-changed={}", trademarks_dir.display()); let premium = load_word_set(premium_dir, "premium"); let trademarks = load_word_set(trademarks_dir, "trademarks"); let premium_words: Vec = premium.into_iter().collect(); let trademark_words: Vec = trademarks.into_iter().collect(); let mut out = String::new(); out.push_str("// @generated by build.rs\n"); out.push_str("pub static PREMIUM_WORDS: &[&str] = &[\n"); for w in &premium_words { out.push_str(" \""); out.push_str(w); out.push_str("\",\n"); } out.push_str("];\n"); out.push_str("pub static TRADEMARK_WORDS: &[&str] = &[\n"); for w in &trademark_words { out.push_str(" \""); out.push_str(w); out.push_str("\",\n"); } out.push_str("];\n"); let out_dir = env::var("OUT_DIR").expect("OUT_DIR is not set"); let dst = Path::new(&out_dir).join("generated_dictionary.rs"); fs::write(dst, out).expect("failed to write generated dictionary"); }