From 2294a2f5d3e1746797e03d17b250e8c952d7b045 Mon Sep 17 00:00:00 2001 From: Hatter Jiang Date: Fri, 19 Jun 2020 01:13:18 +0800 Subject: [PATCH] fill download --- Cargo.lock | 103 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 4 +- src/cmd_download.rs | 38 ++++++++++++++-- src/har.rs | 29 ++++++------- src/main.rs | 6 +++ 5 files changed, 160 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c8b7ee..08b175f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" +dependencies = [ + "memchr", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -114,6 +123,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "fnv" version = "1.0.7" @@ -166,6 +188,12 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59f5fff90fd5d971f936ad674802482ba441b6f09ba5e15fd8b39145582ca399" +[[package]] +name = "futures-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de27142b013a8e869c14957e6d2edeef89e97c289e69d042ee3a49acd8b51789" + [[package]] name = "futures-sink" version = "0.3.5" @@ -177,6 +205,9 @@ name = "futures-task" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdb66b5f09e22019b1ab0830f7785bcea8e7a42148683f99214f73f8ec21a626" +dependencies = [ + "once_cell", +] [[package]] name = "futures-util" @@ -185,9 +216,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8764574ff08b701a084482c3c7031349104b07ac897393010494beaa18ce32c6" dependencies = [ "futures-core", + "futures-io", "futures-task", + "memchr", "pin-project", "pin-utils", + "slab", ] [[package]] @@ -225,6 +259,8 @@ name = "hardownload" version = "0.1.0" dependencies = [ "clap", + "env_logger", + "log", "reqwest", "serde", "serde_json", @@ -267,6 +303,15 @@ version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd179ae861f0c2e53da70d892f5f3029f9594be0c41dc5269cd371691b1dc2f9" +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + [[package]] name = "hyper" version = "0.13.6" @@ -510,6 +555,12 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" + [[package]] name = "openssl" version = "0.10.29" @@ -602,6 +653,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.7" @@ -658,6 +715,24 @@ version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +[[package]] +name = "regex" +version = "1.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -691,6 +766,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde", + "serde_json", "serde_urlencoded", "tokio", "tokio-tls", @@ -842,6 +918,15 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "termcolor" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" +dependencies = [ + "winapi-util", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -851,6 +936,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + [[package]] name = "time" version = "0.1.43" @@ -1128,6 +1222,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi 0.3.8", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 4fc6c01..e36208a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,8 @@ edition = "2018" [dependencies] tokio = { version = "0.2.6", features = ["full"] } clap = "2.33.1" -reqwest = "0.10.6" +reqwest = { version = "0.10", features = ["blocking", "json"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +log = "0.4.8" +env_logger = "0.7.1" diff --git a/src/cmd_download.rs b/src/cmd_download.rs index b3e01a7..ad3e8f6 100644 --- a/src/cmd_download.rs +++ b/src/cmd_download.rs @@ -21,10 +21,40 @@ impl Command for CommandDownload { let har_str = fs::read_to_string(file_name)?; let har: Har = serde_json::from_str(&har_str)?; - // println!("{:?}", har); - - har.log.entries.iter().for_each(|e| { - println!("{:<100} {:?}", e.request.url, /*e.server_ip_address,*/ e.pageref); + har.log.pages.iter().for_each(|page| { + info!("Start download page_id: {}, title: {}", page.id, page.title); + har.downlaod_page(page, Box::new(|_har: &Har, _page_id: &str, _har_page: &HarPage, har_entry: &HarEntry| { + info!("Processing url: {}", har_entry.request.url); + let client = match reqwest::blocking::ClientBuilder::new().build() { + Ok(c) => c, Err(e) => { + error!("Create client from ClientBuilder failed: {}", e); + return; + }, + }; + let reqeust = &har_entry.request; + let mut client = match reqeust.method.as_str() { + "GET" => client.get(&reqeust.url), + // "POST" => client.post(&reqeust.url), // TODO ??? + _ => { + error!("Method not supported: {}, of url: {}", reqeust.method, reqeust.url); + return; + }, + }; + for header in &reqeust.headers { + client = client.header(&header.name, &header.value); + } + let response = match client.send() { + Ok(r) => r, Err(e) => { + error!("Request url: {}, error: {}", reqeust.url, e); + return; + }, + }; + let code = response.status().as_u16(); + if code != 200 { + // TODO ... + return; + } + })) }); Ok(()) diff --git a/src/har.rs b/src/har.rs index c3487cc..7d76447 100644 --- a/src/har.rs +++ b/src/har.rs @@ -95,19 +95,18 @@ pub struct Har { pub log: HarLog, } -// TODO -pub fn downlaod_page(har: &Har, har_page: &HarPage) { - let page_id = &har_page.id; - har.log.entries.iter().filter(|e| if let Some(pageref) = &e.pageref { - pageref == page_id - } else { - false - }).for_each(|e| { - make_request(&e.request); - }); -} +impl Har { -// GET, POST -pub fn make_request(har_request: &HarRequest) -> Option<()> { // ??? - None -} \ No newline at end of file + pub fn downlaod_page(&self, har_page: &HarPage, callback: Box ()>) { + let page_id = &har_page.id; + self.log.entries.iter().filter(|e| if let Some(pageref) = &e.pageref { + pageref == page_id + } else { + warn!("Not matched url: {}", e.request.url); + false + }).for_each(|e| { + info!("Processing url: {}", e.request.url); + callback(&self, page_id, har_page, e); + }); + } +} diff --git a/src/main.rs b/src/main.rs index 195936a..2ca8829 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +#[macro_use] extern crate log; use clap::App; mod cmd; @@ -10,6 +11,9 @@ use cmd_download::CommandDownload; use cmd_default::CommandDefault; fn main() -> CommandError { + env_logger::init(); + info!("hardownload started"); + let commands: Vec> = vec![ Box::new(CommandDownload{}), ]; @@ -24,9 +28,11 @@ fn main() -> CommandError { let matches = app.get_matches(); for command in &commands { if let Some(sub_cmd_matches) = matches.subcommand_matches(command.name()) { + info!("matched subcommand: {}", command.name()); return command.run(&matches, sub_cmd_matches); } } + info!("matched default subcommand"); CommandDefault::run(&matches) }