feat: add fancy-regex

This commit is contained in:
2020-11-24 23:52:30 +08:00
parent 9eecc77ac0
commit 113911b6ac
4 changed files with 202 additions and 0 deletions

81
__misc/fancy-regex/Cargo.lock generated Normal file
View File

@@ -0,0 +1,81 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "bit-set"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3"
[[package]]
name = "fancy-regex"
version = "0.1.0"
dependencies = [
"fancy-regex 0.4.1",
]
[[package]]
name = "fancy-regex"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36996e5f56f32ca51a937f325094fa450b32df871af1a89be331b7145b931bfc"
dependencies = [
"bit-set",
"regex",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "memchr"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "regex"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]

View File

@@ -0,0 +1,11 @@
[package]
name = "fancy-regex"
version = "0.1.0"
authors = ["Hatter Jiang <jht5945@gmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
fancy-regex = "0.4.1"

View File

@@ -0,0 +1,6 @@
https://github.com/fancy-regex/fancy-regex
A Rust library for compiling and matching regular expressions. It uses a hybrid regex implementation designed to support a relatively rich set of features. In particular, it uses backtracking to implement "fancy" features such as look-around and backtracking, which are not supported in purely NFA-based implementations (exemplified by RE2, and implemented in Rust in the regex crate).

View File

@@ -0,0 +1,104 @@
use fancy_regex::internal::{analyze, compile, run_trace, Insn, Prog};
use fancy_regex::*;
use std::env;
use std::str::FromStr;
fn main() {
let mut args = env::args().skip(1);
if let Some(cmd) = args.next() {
if cmd == "parse" {
if let Some(re) = args.next() {
let e = Expr::parse_tree(&re);
println!("{:#?}", e);
}
} else if cmd == "analyze" {
if let Some(re) = args.next() {
let tree = Expr::parse_tree(&re).unwrap();
let a = analyze(&tree);
println!("{:#?}", a);
}
} else if cmd == "compile" {
if let Some(re) = args.next() {
let r = Regex::new(&re).unwrap();
r.debug_print();
}
} else if cmd == "run" {
let re = args.next().expect("expected regexp argument");
let r = Regex::new(&re).unwrap();
let text = args.next().expect("expected text argument");
let mut pos = 0;
if let Some(pos_str) = args.next() {
pos = usize::from_str(&pos_str).unwrap();
}
if let Some(caps) = r.captures_from_pos(&text, pos).unwrap() {
print!("captures:");
for i in 0..caps.len() {
print!(" {}:", i);
if let Some(m) = caps.get(i) {
print!("[{}..{}] \"{}\"", m.start(), m.end(), m.as_str());
} else {
print!("_");
}
}
println!("");
for cap in caps.iter() {
println!("iterate {:?}", cap);
}
} else {
println!("no match");
}
} else if cmd == "trace" {
if let Some(re) = args.next() {
let prog = prog(&re);
if let Some(s) = args.next() {
run_trace(&prog, &s, 0).unwrap();
}
}
} else if cmd == "trace-inner" {
if let Some(re) = args.next() {
let tree = Expr::parse_tree(&re).unwrap();
let a = analyze(&tree).unwrap();
let p = compile(&a).unwrap();
if let Some(s) = args.next() {
run_trace(&p, &s, 0).unwrap();
}
}
} else if cmd == "graph" {
let re = args.next().expect("expected regexp argument");
graph(&re);
} else {
println!("commands: parse|analyze|compile|graph <expr>, run|trace|trace-inner <expr> <input>");
}
}
}
fn graph(re: &str) {
let prog = prog(re);
println!("digraph G {{");
for (i, insn) in prog.body.iter().enumerate() {
let label = format!("{:?}", insn)
.replace(r#"\"#, r#"\\"#)
.replace(r#"""#, r#"\""#);
println!(r#"{:3} [label="{}: {}"];"#, i, i, label);
match *insn {
Insn::Split(a, b) => {
println!("{:3} -> {};", i, a);
println!("{:3} -> {};", i, b);
}
Insn::Jmp(target) => {
println!("{:3} -> {};", i, target);
}
Insn::End => {}
_ => {
println!("{:3} -> {};", i, i + 1);
}
}
}
println!("}}");
}
fn prog(re: &str) -> Prog {
let tree = Expr::parse_tree(re).expect("Expected parsing regex to work");
let result = analyze(&tree).expect("Expected analyze to succeed");
compile(&result).expect("Expected compile to succeed")
}