SHiNE-server/shine-solana/shine/programs/shine_login_guard/build.rs

115 lines
3.6 KiB
Rust

use std::collections::BTreeSet;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
const PREMIUM_DIR: &str = "src/dictionaries/premium";
const TRADEMARKS_DIR: &str = "src/dictionaries/trademarks";
fn normalize_word(word: &str) -> Option<String> {
let w = word.trim().to_ascii_lowercase();
if w.is_empty() || w.len() > 20 {
return None;
}
if !w.chars().all(|c| c.is_ascii_alphanumeric()) {
return None;
}
Some(w)
}
fn gather_files(dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
if let Ok(entries) = fs::read_dir(dir) {
for entry in entries.flatten() {
let p = entry.path();
if p.is_dir() {
files.extend(gather_files(&p));
} else if p.extension().and_then(|s| s.to_str()) == Some("txt") {
files.push(p);
}
}
}
files.sort();
files
}
fn load_word_set(dir: &Path, label: &str) -> BTreeSet<String> {
let mut out = BTreeSet::new();
let mut seen: HashMap<String, usize> = HashMap::new();
for file in gather_files(dir) {
println!("cargo:rerun-if-changed={}", file.display());
let raw = fs::read_to_string(&file).unwrap_or_default();
for line in raw.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
if let Some(w) = normalize_word(line) {
*seen.entry(w.clone()).or_insert(0) += 1;
out.insert(w);
}
}
}
let mut duplicate_words = 0usize;
let mut duplicate_entries = 0usize;
let mut sample: Vec<String> = Vec::new();
let mut keys: Vec<_> = seen.keys().cloned().collect();
keys.sort();
for k in keys {
if let Some(cnt) = seen.get(&k) {
if *cnt > 1 {
duplicate_words += 1;
duplicate_entries += cnt - 1;
if sample.len() < 40 {
sample.push(format!("{k} x{cnt}"));
}
}
}
}
if duplicate_words > 0 {
println!(
"cargo:warning=[{label}] duplicates found: words={}, extra_entries={}",
duplicate_words, duplicate_entries
);
println!(
"cargo:warning=[{label}] duplicate samples: {}",
sample.join(", ")
);
}
out
}
fn main() {
let premium_dir = Path::new(PREMIUM_DIR);
let trademarks_dir = Path::new(TRADEMARKS_DIR);
println!("cargo:rerun-if-changed={}", premium_dir.display());
println!("cargo:rerun-if-changed={}", trademarks_dir.display());
let premium = load_word_set(premium_dir, "premium");
let trademarks = load_word_set(trademarks_dir, "trademarks");
let premium_words: Vec<String> = premium.into_iter().collect();
let trademark_words: Vec<String> = trademarks.into_iter().collect();
let mut out = String::new();
out.push_str("// @generated by build.rs\n");
out.push_str("pub static PREMIUM_WORDS: &[&str] = &[\n");
for w in &premium_words {
out.push_str(" \"");
out.push_str(w);
out.push_str("\",\n");
}
out.push_str("];\n");
out.push_str("pub static TRADEMARK_WORDS: &[&str] = &[\n");
for w in &trademark_words {
out.push_str(" \"");
out.push_str(w);
out.push_str("\",\n");
}
out.push_str("];\n");
let out_dir = env::var("OUT_DIR").expect("OUT_DIR is not set");
let dst = Path::new(&out_dir).join("generated_dictionary.rs");
fs::write(dst, out).expect("failed to write generated dictionary");
}