feat: add fancy-regex
This commit is contained in:
81
__misc/fancy-regex/Cargo.lock
generated
Normal file
81
__misc/fancy-regex/Cargo.lock
generated
Normal file
@@ -0,0 +1,81 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3"
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fancy-regex 0.4.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36996e5f56f32ca51a937f325094fa450b32df871af1a89be331b7145b931bfc"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
11
__misc/fancy-regex/Cargo.toml
Normal file
11
__misc/fancy-regex/Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "fancy-regex"
|
||||
version = "0.1.0"
|
||||
authors = ["Hatter Jiang <jht5945@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
fancy-regex = "0.4.1"
|
||||
|
||||
6
__misc/fancy-regex/README.md
Normal file
6
__misc/fancy-regex/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
https://github.com/fancy-regex/fancy-regex
|
||||
|
||||
A Rust library for compiling and matching regular expressions. It uses a hybrid regex implementation designed to support a relatively rich set of features. In particular, it uses backtracking to implement "fancy" features such as look-around and backtracking, which are not supported in purely NFA-based implementations (exemplified by RE2, and implemented in Rust in the regex crate).
|
||||
|
||||
|
||||
|
||||
104
__misc/fancy-regex/src/main.rs
Normal file
104
__misc/fancy-regex/src/main.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use fancy_regex::internal::{analyze, compile, run_trace, Insn, Prog};
|
||||
use fancy_regex::*;
|
||||
use std::env;
|
||||
use std::str::FromStr;
|
||||
|
||||
fn main() {
|
||||
let mut args = env::args().skip(1);
|
||||
if let Some(cmd) = args.next() {
|
||||
if cmd == "parse" {
|
||||
if let Some(re) = args.next() {
|
||||
let e = Expr::parse_tree(&re);
|
||||
println!("{:#?}", e);
|
||||
}
|
||||
} else if cmd == "analyze" {
|
||||
if let Some(re) = args.next() {
|
||||
let tree = Expr::parse_tree(&re).unwrap();
|
||||
let a = analyze(&tree);
|
||||
println!("{:#?}", a);
|
||||
}
|
||||
} else if cmd == "compile" {
|
||||
if let Some(re) = args.next() {
|
||||
let r = Regex::new(&re).unwrap();
|
||||
r.debug_print();
|
||||
}
|
||||
} else if cmd == "run" {
|
||||
let re = args.next().expect("expected regexp argument");
|
||||
let r = Regex::new(&re).unwrap();
|
||||
let text = args.next().expect("expected text argument");
|
||||
let mut pos = 0;
|
||||
if let Some(pos_str) = args.next() {
|
||||
pos = usize::from_str(&pos_str).unwrap();
|
||||
}
|
||||
if let Some(caps) = r.captures_from_pos(&text, pos).unwrap() {
|
||||
print!("captures:");
|
||||
for i in 0..caps.len() {
|
||||
print!(" {}:", i);
|
||||
if let Some(m) = caps.get(i) {
|
||||
print!("[{}..{}] \"{}\"", m.start(), m.end(), m.as_str());
|
||||
} else {
|
||||
print!("_");
|
||||
}
|
||||
}
|
||||
println!("");
|
||||
for cap in caps.iter() {
|
||||
println!("iterate {:?}", cap);
|
||||
}
|
||||
} else {
|
||||
println!("no match");
|
||||
}
|
||||
} else if cmd == "trace" {
|
||||
if let Some(re) = args.next() {
|
||||
let prog = prog(&re);
|
||||
if let Some(s) = args.next() {
|
||||
run_trace(&prog, &s, 0).unwrap();
|
||||
}
|
||||
}
|
||||
} else if cmd == "trace-inner" {
|
||||
if let Some(re) = args.next() {
|
||||
let tree = Expr::parse_tree(&re).unwrap();
|
||||
let a = analyze(&tree).unwrap();
|
||||
let p = compile(&a).unwrap();
|
||||
if let Some(s) = args.next() {
|
||||
run_trace(&p, &s, 0).unwrap();
|
||||
}
|
||||
}
|
||||
} else if cmd == "graph" {
|
||||
let re = args.next().expect("expected regexp argument");
|
||||
graph(&re);
|
||||
} else {
|
||||
println!("commands: parse|analyze|compile|graph <expr>, run|trace|trace-inner <expr> <input>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn graph(re: &str) {
|
||||
let prog = prog(re);
|
||||
println!("digraph G {{");
|
||||
for (i, insn) in prog.body.iter().enumerate() {
|
||||
let label = format!("{:?}", insn)
|
||||
.replace(r#"\"#, r#"\\"#)
|
||||
.replace(r#"""#, r#"\""#);
|
||||
println!(r#"{:3} [label="{}: {}"];"#, i, i, label);
|
||||
match *insn {
|
||||
Insn::Split(a, b) => {
|
||||
println!("{:3} -> {};", i, a);
|
||||
println!("{:3} -> {};", i, b);
|
||||
}
|
||||
Insn::Jmp(target) => {
|
||||
println!("{:3} -> {};", i, target);
|
||||
}
|
||||
Insn::End => {}
|
||||
_ => {
|
||||
println!("{:3} -> {};", i, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("}}");
|
||||
}
|
||||
|
||||
fn prog(re: &str) -> Prog {
|
||||
let tree = Expr::parse_tree(re).expect("Expected parsing regex to work");
|
||||
let result = analyze(&tree).expect("Expected analyze to succeed");
|
||||
compile(&result).expect("Expected compile to succeed")
|
||||
}
|
||||
Reference in New Issue
Block a user