diff --git a/src/cmd_download.rs b/src/cmd_download.rs index c39bdb2..50004af 100644 --- a/src/cmd_download.rs +++ b/src/cmd_download.rs @@ -11,6 +11,7 @@ lazy_static! { let mut s = HashSet::new(); s.insert("if-none-match"); s.insert("if-modified-since"); + s.insert("accept-encoding"); s }; } @@ -27,12 +28,16 @@ impl Command for CommandDownload { fn run(&self, _arg_matches: &ArgMatches, sub_arg_matches: &ArgMatches) -> CommandError { let file_name = sub_arg_matches.value_of("FILE_NAME").unwrap(); + info!("Reading har: {}", file_name); let har_str = fs::read_to_string(file_name)?; + info!("Parsing har: {}", file_name); let har: Har = serde_json::from_str(&har_str)?; + info!("Getting har: {}", file_name); har.log.pages.iter().for_each(|page| { info!("Start download page_id: {}, title: {}", page.id, page.title); - har.downlaod_page(page, Box::new(|_har: &Har, _page_id: &str, _har_page: &HarPage, har_entry: &HarEntry| { + + har.downlaod_page(page, Box::new(|_har: &Har, _page_id: &str, har_page: &HarPage, har_entry: &HarEntry| { info!("Processing url: {}", har_entry.request.url); let client = match reqwest::blocking::ClientBuilder::new().build() { Ok(c) => c, Err(e) => { @@ -71,8 +76,13 @@ impl Command for CommandDownload { return; }, }; - println!("GET {}, {} bytes", har_reqeust.url, bs.len()); - // todo!(); // write file + let base_parts = normalize_url_to_path(&har_page.title, "_"); + let base_path = to_base_path(&base_parts); + let base_path_part_count = base_parts.len() - 1; + let resolved_name = resolve_name(&base_path, base_path_part_count, &har_reqeust.url, "index.html"); + + println!("GET {}, --> {}, {} bytes", har_reqeust.url, resolved_name, bs.len()); + write_file(&resolved_name, &bs); })) }); @@ -80,12 +90,40 @@ impl Command for CommandDownload { } } -fn _resolve_name(_base: &str, url: &str) -> String { - let _url_to_path = _normalize_url_to_path(url); - "".into() +pub fn write_file(resolved_name: &str, bs: &[u8]) { + let splites = resolved_name.split("/").collect::>(); + if splites.len() > 1 { + let path_with_out_file_name = splites.iter() + .take(splites.len() - 1) + .map(|p| p.to_owned()) + .collect::>() + .join("/"); + if let Err(e) = fs::create_dir_all(&path_with_out_file_name) { + error!("Create dir: {}, failed: {}", path_with_out_file_name, e); + } + } + if let Err(e) = fs::write(resolved_name, bs) { + error!("Write file: {}, failed: {}", resolved_name, e); + } } -fn _normalize_url_to_path(url: &str) -> Vec { +pub fn to_base_path(parts: &[String]) -> String { + let parts_len = parts.len(); + parts.iter().take(parts_len - 1).map(|p| p.to_owned()).collect::>().join("/") + "/" +} + +pub fn resolve_name(base_path: &str, base_path_part_count: usize, url: &str, default_index: &str) -> String { + let url_to_path = normalize_url_to_path(url, default_index); + let url_to_path_str = url_to_path.join("/"); + + if url_to_path_str.starts_with(base_path) { + url_to_path.iter().skip(base_path_part_count).map(|p| p.to_owned()).collect::>().join("/") + } else { + url_to_path_str + } +} + +pub fn normalize_url_to_path(url: &str, default_index: &str) -> Vec { let mut ret = vec![]; let parsed_url = match Url::parse(url) { Ok(u) => u, Err(e) => { @@ -114,5 +152,12 @@ fn _normalize_url_to_path(url: &str) -> Vec { } } - ret + if let Some(last) = ret.last() { + if last.is_empty() && !default_index.is_empty() { + ret.pop(); + ret.push(default_index.into()); + } + } + + ret.iter().filter(|p| !p.is_empty()).map(|p| p.to_owned()).collect::>() } diff --git a/src/har.rs b/src/har.rs index 7d76447..9020a25 100644 --- a/src/har.rs +++ b/src/har.rs @@ -105,7 +105,6 @@ impl Har { warn!("Not matched url: {}", e.request.url); false }).for_each(|e| { - info!("Processing url: {}", e.request.url); callback(&self, page_id, har_page, e); }); }