optimize domain name matching

This commit is contained in:
wyhaya
2019-12-31 20:07:51 +08:00
parent e81d6de355
commit d4d796d3b1
6 changed files with 217 additions and 125 deletions

View File

@@ -1,3 +1,4 @@
use crate::matcher::Matcher;
use futures::future::{BoxFuture, FutureExt};
use regex::Regex;
use std::{
@@ -34,7 +35,7 @@ pub enum InvalidType {
}
impl InvalidType {
pub fn as_str(&self) -> &str {
pub fn description(&self) -> &str {
match self {
InvalidType::SocketAddr => "Cannot parse socket address",
InvalidType::IpAddr => "Cannot parse ip address",
@@ -47,7 +48,7 @@ impl InvalidType {
#[derive(Debug)]
pub struct Hosts {
record: Vec<(Host, IpAddr)>,
record: Vec<(Matcher, IpAddr)>,
}
impl Hosts {
@@ -55,7 +56,7 @@ impl Hosts {
Hosts { record: Vec::new() }
}
fn push(&mut self, record: (Host, IpAddr)) {
fn push(&mut self, record: (Matcher, IpAddr)) {
self.record.push(record);
}
@@ -65,7 +66,7 @@ impl Hosts {
}
}
pub fn iter(&mut self) -> Iter<(Host, IpAddr)> {
pub fn iter(&mut self) -> Iter<(Matcher, IpAddr)> {
self.record.iter()
}
@@ -79,61 +80,6 @@ impl Hosts {
}
}
// domain match
const TEXT: &str = "abcdefghijklmnopqrstuvwxyz0123456789-.";
const WILDCARD: &str = "abcdefghijklmnopqrstuvwxyz0123456789-.*";
#[derive(Debug)]
pub struct Host(MatchMode);
#[derive(Debug)]
enum MatchMode {
Text(String),
Regex(Regex),
}
impl Host {
fn new(domain: &str) -> result::Result<Host, regex::Error> {
// example.com
if Self::is_text(domain) {
return Ok(Host(MatchMode::Text(domain.to_string())));
}
// *.example.com
if Self::is_wildcard(domain) {
let s = format!("^{}$", domain.replace(".", r"\.").replace("*", r"[^.]+"));
return Ok(Host(MatchMode::Regex(Regex::new(&s)?)));
}
// use regex
Ok(Host(MatchMode::Regex(Regex::new(domain)?)))
}
fn is_text(domain: &str) -> bool {
domain.chars().all(|item| TEXT.chars().any(|c| item == c))
}
fn is_wildcard(domain: &str) -> bool {
domain
.chars()
.all(|item| WILDCARD.chars().any(|c| item == c))
}
pub fn is_match(&self, domain: &str) -> bool {
match &self.0 {
MatchMode::Text(text) => text == domain,
MatchMode::Regex(reg) => reg.is_match(domain),
}
}
pub fn as_str(&self) -> &str {
match &self.0 {
MatchMode::Text(text) => text,
MatchMode::Regex(reg) => reg.as_str(),
}
}
}
#[derive(Debug)]
pub struct Config {
pub bind: Vec<SocketAddr>,
@@ -223,17 +169,17 @@ impl Parser {
// match host
// example.com 0.0.0.0
// 0.0.0.0 example.com
fn record(left: &str, right: &str) -> result::Result<(Host, IpAddr), InvalidType> {
fn record(left: &str, right: &str) -> result::Result<(Matcher, IpAddr), InvalidType> {
// ip domain
if let Ok(ip) = right.parse() {
return Host::new(left)
return Matcher::new(left)
.map(|host| (host, ip))
.map_err(|_| InvalidType::Regex);
}
// domain ip
if let Ok(ip) = left.parse() {
return Host::new(right)
return Matcher::new(right)
.map(|host| (host, ip))
.map_err(|_| InvalidType::Regex);
}
@@ -307,39 +253,3 @@ impl Parser {
.boxed()
}
}
#[cfg(test)]
mod test_host {
use super::*;
#[test]
fn test_create() {}
#[test]
fn test_text() {
let host = Host::new("example.com").unwrap();
assert!(host.is_match("example.com"));
assert!(!host.is_match("-example.com"));
assert!(!host.is_match("example.com.cn"));
}
#[test]
fn test_wildcard() {
let host = Host::new("*.example.com").unwrap();
assert!(host.is_match("test.example.com"));
assert!(!host.is_match("test.example.test"));
assert!(!host.is_match("test.test.com"));
let host = Host::new("*.example.*").unwrap();
assert!(host.is_match("test.example.test"));
assert!(!host.is_match("example.com"));
assert!(!host.is_match("test.test.test"));
}
#[test]
fn test_regex() {
let host = Host::new("^example.com$").unwrap();
assert!(host.is_match("example.com"));
assert!(!host.is_match("test.example.com"));
}
}

View File

@@ -3,6 +3,7 @@ extern crate lazy_static;
mod config;
mod lib;
mod matcher;
mod watch;
use ace::App;
@@ -136,11 +137,11 @@ async fn main() {
let n = config
.hosts
.iter()
.map(|(r, _)| r.as_str().len())
.map(|(m, _)| m.to_string().len())
.fold(0, |a, b| a.max(b));
for (host, ip) in config.hosts.iter() {
println!("{:domain$} {}", host.as_str(), ip, domain = n);
println!("{:domain$} {}", host.to_string(), ip, domain = n);
}
}
"config" => {
@@ -227,7 +228,7 @@ fn output_invalid(errors: &[Invalid]) {
error!(
"[line:{}] {} `{}`",
invalid.line,
invalid.kind.as_str(),
invalid.kind.description(),
invalid.source
);
}

184
src/matcher.rs Normal file
View File

@@ -0,0 +1,184 @@
use regex::{Error, Regex};
use std::fmt;
#[derive(Debug)]
pub struct Matcher(MatchMode);
#[derive(Debug)]
enum MatchMode {
Static(String),
Wildcard(WildcardMatch),
Regex(Regex),
}
const REGEX_WORD: char = '~';
const WILDCARD: char = '*';
impl Matcher {
pub fn new(raw: &str) -> Result<Self, Error> {
// Use regex: ~^example\.com$
if raw.starts_with(REGEX_WORD) {
let reg = raw.replacen(REGEX_WORD, "", 1);
let mode = MatchMode::Regex(Regex::new(&reg)?);
return Ok(Matcher(mode));
}
// Use wildcard match: *.example.com
let find = raw.chars().any(|c| c == WILDCARD);
if find {
let mode = MatchMode::Wildcard(WildcardMatch::new(raw));
return Ok(Matcher(mode));
}
// Plain Text: example.com
Ok(Matcher(MatchMode::Static(raw.to_string())))
}
pub fn is_match(&self, domain: &str) -> bool {
match &self.0 {
MatchMode::Static(raw) => raw == domain,
MatchMode::Wildcard(raw) => raw.is_match(domain),
MatchMode::Regex(raw) => raw.is_match(domain),
}
}
}
#[derive(Debug)]
struct WildcardMatch {
chars: Vec<char>,
}
impl WildcardMatch {
fn new(raw: &str) -> Self {
let mut chars = Vec::with_capacity(raw.len());
for c in raw.chars() {
chars.push(c);
}
Self { chars }
}
fn is_match(&self, text: &str) -> bool {
let mut chars = text.chars();
let mut dot = false;
for cur in &self.chars {
match cur {
'*' => {
match chars.next() {
Some(c) => {
if c == '.' {
return false;
}
}
None => return false,
}
while let Some(n) = chars.next() {
if n == '.' {
dot = true;
break;
}
}
}
word => {
if dot {
if word == &'.' {
dot = false;
continue;
} else {
return false;
}
}
match chars.next() {
Some(c) => {
if word != &c {
return false;
}
}
None => return false,
}
}
}
}
if dot {
return false;
}
chars.next().is_none()
}
}
impl fmt::Display for Matcher {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.0 {
MatchMode::Static(raw) => write!(f, "{}", raw),
MatchMode::Wildcard(raw) => {
let mut s = String::new();
for ch in raw.chars.clone() {
s.push(ch);
}
write!(f, "{}", s)
}
MatchMode::Regex(raw) => write!(f, "~{}", raw.as_str()),
}
}
}
#[cfg(test)]
mod test_matcher {
use super::*;
#[test]
fn test_create() {}
#[test]
fn test_text() {
let matcher = Matcher::new("example.com").unwrap();
assert!(matcher.is_match("example.com"));
assert!(!matcher.is_match("-example.com"));
assert!(!matcher.is_match("example.com.cn"));
}
#[test]
fn test_wildcard() {
let matcher = Matcher::new("*").unwrap();
assert!(matcher.is_match("localhost"));
assert!(!matcher.is_match(".localhost"));
assert!(!matcher.is_match("localhost."));
assert!(!matcher.is_match("local.host"));
let matcher = Matcher::new("*.com").unwrap();
assert!(matcher.is_match("test.com"));
assert!(matcher.is_match("example.com"));
assert!(!matcher.is_match("test.test"));
assert!(!matcher.is_match(".test.com"));
assert!(!matcher.is_match("test.com."));
assert!(!matcher.is_match("test.test.com"));
let matcher = Matcher::new("*.*").unwrap();
assert!(matcher.is_match("test.test"));
assert!(!matcher.is_match(".test.test"));
assert!(!matcher.is_match("test.test."));
assert!(!matcher.is_match("test.test.test"));
let matcher = Matcher::new("*.example.com").unwrap();
assert!(matcher.is_match("test.example.com"));
assert!(matcher.is_match("example.example.com"));
assert!(!matcher.is_match("test.example.com.com"));
assert!(!matcher.is_match("test.test.example.com"));
let matcher = Matcher::new("*.example.*").unwrap();
assert!(matcher.is_match("test.example.com"));
assert!(matcher.is_match("example.example.com"));
assert!(!matcher.is_match("test.test.example.test"));
assert!(!matcher.is_match("test.example.test.test"));
}
#[test]
fn test_regex() {
let matcher = Matcher::new("~^example.com$").unwrap();
assert!(matcher.is_match("example.com"));
assert!(!matcher.is_match("test.example.com"));
}
#[test]
fn test_to_string() {}
}