From 113911b6ac34c157694ecea0b2e060bf3f973fa1 Mon Sep 17 00:00:00 2001 From: Hatter Jiang Date: Tue, 24 Nov 2020 23:52:30 +0800 Subject: [PATCH] feat: add fancy-regex --- __misc/fancy-regex/Cargo.lock | 81 +++++++++++++++++++++++++ __misc/fancy-regex/Cargo.toml | 11 ++++ __misc/fancy-regex/README.md | 6 ++ __misc/fancy-regex/src/main.rs | 104 +++++++++++++++++++++++++++++++++ 4 files changed, 202 insertions(+) create mode 100644 __misc/fancy-regex/Cargo.lock create mode 100644 __misc/fancy-regex/Cargo.toml create mode 100644 __misc/fancy-regex/README.md create mode 100644 __misc/fancy-regex/src/main.rs diff --git a/__misc/fancy-regex/Cargo.lock b/__misc/fancy-regex/Cargo.lock new file mode 100644 index 0000000..63f2e49 --- /dev/null +++ b/__misc/fancy-regex/Cargo.lock @@ -0,0 +1,81 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3" + +[[package]] +name = "fancy-regex" +version = "0.1.0" +dependencies = [ + "fancy-regex 0.4.1", +] + +[[package]] +name = "fancy-regex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36996e5f56f32ca51a937f325094fa450b32df871af1a89be331b7145b931bfc" +dependencies = [ + "bit-set", + "regex", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "regex" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" + +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] diff --git a/__misc/fancy-regex/Cargo.toml b/__misc/fancy-regex/Cargo.toml new file mode 100644 index 0000000..0c8df97 --- /dev/null +++ b/__misc/fancy-regex/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "fancy-regex" +version = "0.1.0" +authors = ["Hatter Jiang "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +fancy-regex = "0.4.1" + diff --git a/__misc/fancy-regex/README.md b/__misc/fancy-regex/README.md new file mode 100644 index 0000000..803f6cd --- /dev/null +++ b/__misc/fancy-regex/README.md @@ -0,0 +1,6 @@ +https://github.com/fancy-regex/fancy-regex + +A Rust library for compiling and matching regular expressions. It uses a hybrid regex implementation designed to support a relatively rich set of features. In particular, it uses backtracking to implement "fancy" features such as look-around and backtracking, which are not supported in purely NFA-based implementations (exemplified by RE2, and implemented in Rust in the regex crate). + + + diff --git a/__misc/fancy-regex/src/main.rs b/__misc/fancy-regex/src/main.rs new file mode 100644 index 0000000..9a272bf --- /dev/null +++ b/__misc/fancy-regex/src/main.rs @@ -0,0 +1,104 @@ +use fancy_regex::internal::{analyze, compile, run_trace, Insn, Prog}; +use fancy_regex::*; +use std::env; +use std::str::FromStr; + +fn main() { + let mut args = env::args().skip(1); + if let Some(cmd) = args.next() { + if cmd == "parse" { + if let Some(re) = args.next() { + let e = Expr::parse_tree(&re); + println!("{:#?}", e); + } + } else if cmd == "analyze" { + if let Some(re) = args.next() { + let tree = Expr::parse_tree(&re).unwrap(); + let a = analyze(&tree); + println!("{:#?}", a); + } + } else if cmd == "compile" { + if let Some(re) = args.next() { + let r = Regex::new(&re).unwrap(); + r.debug_print(); + } + } else if cmd == "run" { + let re = args.next().expect("expected regexp argument"); + let r = Regex::new(&re).unwrap(); + let text = args.next().expect("expected text argument"); + let mut pos = 0; + if let Some(pos_str) = args.next() { + pos = usize::from_str(&pos_str).unwrap(); + } + if let Some(caps) = r.captures_from_pos(&text, pos).unwrap() { + print!("captures:"); + for i in 0..caps.len() { + print!(" {}:", i); + if let Some(m) = caps.get(i) { + print!("[{}..{}] \"{}\"", m.start(), m.end(), m.as_str()); + } else { + print!("_"); + } + } + println!(""); + for cap in caps.iter() { + println!("iterate {:?}", cap); + } + } else { + println!("no match"); + } + } else if cmd == "trace" { + if let Some(re) = args.next() { + let prog = prog(&re); + if let Some(s) = args.next() { + run_trace(&prog, &s, 0).unwrap(); + } + } + } else if cmd == "trace-inner" { + if let Some(re) = args.next() { + let tree = Expr::parse_tree(&re).unwrap(); + let a = analyze(&tree).unwrap(); + let p = compile(&a).unwrap(); + if let Some(s) = args.next() { + run_trace(&p, &s, 0).unwrap(); + } + } + } else if cmd == "graph" { + let re = args.next().expect("expected regexp argument"); + graph(&re); + } else { + println!("commands: parse|analyze|compile|graph , run|trace|trace-inner "); + } + } +} + +fn graph(re: &str) { + let prog = prog(re); + println!("digraph G {{"); + for (i, insn) in prog.body.iter().enumerate() { + let label = format!("{:?}", insn) + .replace(r#"\"#, r#"\\"#) + .replace(r#"""#, r#"\""#); + println!(r#"{:3} [label="{}: {}"];"#, i, i, label); + match *insn { + Insn::Split(a, b) => { + println!("{:3} -> {};", i, a); + println!("{:3} -> {};", i, b); + } + Insn::Jmp(target) => { + println!("{:3} -> {};", i, target); + } + Insn::End => {} + _ => { + println!("{:3} -> {};", i, i + 1); + } + } + } + println!("}}"); +} + +fn prog(re: &str) -> Prog { + let tree = Expr::parse_tree(re).expect("Expected parsing regex to work"); + let result = analyze(&tree).expect("Expected analyze to succeed"); + compile(&result).expect("Expected compile to succeed") +} \ No newline at end of file