add downloading

This commit is contained in:
2020-06-21 23:23:52 +08:00
parent 6b6c957802
commit 2cdf02b8e2
2 changed files with 53 additions and 9 deletions

View File

@@ -11,6 +11,7 @@ lazy_static! {
let mut s = HashSet::new();
s.insert("if-none-match");
s.insert("if-modified-since");
s.insert("accept-encoding");
s
};
}
@@ -27,12 +28,16 @@ impl Command for CommandDownload {
fn run(&self, _arg_matches: &ArgMatches, sub_arg_matches: &ArgMatches) -> CommandError {
let file_name = sub_arg_matches.value_of("FILE_NAME").unwrap();
info!("Reading har: {}", file_name);
let har_str = fs::read_to_string(file_name)?;
info!("Parsing har: {}", file_name);
let har: Har = serde_json::from_str(&har_str)?;
info!("Getting har: {}", file_name);
har.log.pages.iter().for_each(|page| {
info!("Start download page_id: {}, title: {}", page.id, page.title);
har.downlaod_page(page, Box::new(|_har: &Har, _page_id: &str, _har_page: &HarPage, har_entry: &HarEntry| {
har.downlaod_page(page, Box::new(|_har: &Har, _page_id: &str, har_page: &HarPage, har_entry: &HarEntry| {
info!("Processing url: {}", har_entry.request.url);
let client = match reqwest::blocking::ClientBuilder::new().build() {
Ok(c) => c, Err(e) => {
@@ -71,8 +76,13 @@ impl Command for CommandDownload {
return;
},
};
println!("GET {}, {} bytes", har_reqeust.url, bs.len());
// todo!(); // write file
let base_parts = normalize_url_to_path(&har_page.title, "_");
let base_path = to_base_path(&base_parts);
let base_path_part_count = base_parts.len() - 1;
let resolved_name = resolve_name(&base_path, base_path_part_count, &har_reqeust.url, "index.html");
println!("GET {}, --> {}, {} bytes", har_reqeust.url, resolved_name, bs.len());
write_file(&resolved_name, &bs);
}))
});
@@ -80,12 +90,40 @@ impl Command for CommandDownload {
}
}
fn _resolve_name(_base: &str, url: &str) -> String {
let _url_to_path = _normalize_url_to_path(url);
"".into()
pub fn write_file(resolved_name: &str, bs: &[u8]) {
let splites = resolved_name.split("/").collect::<Vec<_>>();
if splites.len() > 1 {
let path_with_out_file_name = splites.iter()
.take(splites.len() - 1)
.map(|p| p.to_owned())
.collect::<Vec<_>>()
.join("/");
if let Err(e) = fs::create_dir_all(&path_with_out_file_name) {
error!("Create dir: {}, failed: {}", path_with_out_file_name, e);
}
}
if let Err(e) = fs::write(resolved_name, bs) {
error!("Write file: {}, failed: {}", resolved_name, e);
}
}
fn _normalize_url_to_path(url: &str) -> Vec<String> {
pub fn to_base_path(parts: &[String]) -> String {
let parts_len = parts.len();
parts.iter().take(parts_len - 1).map(|p| p.to_owned()).collect::<Vec<_>>().join("/") + "/"
}
pub fn resolve_name(base_path: &str, base_path_part_count: usize, url: &str, default_index: &str) -> String {
let url_to_path = normalize_url_to_path(url, default_index);
let url_to_path_str = url_to_path.join("/");
if url_to_path_str.starts_with(base_path) {
url_to_path.iter().skip(base_path_part_count).map(|p| p.to_owned()).collect::<Vec<_>>().join("/")
} else {
url_to_path_str
}
}
pub fn normalize_url_to_path(url: &str, default_index: &str) -> Vec<String> {
let mut ret = vec![];
let parsed_url = match Url::parse(url) {
Ok(u) => u, Err(e) => {
@@ -114,5 +152,12 @@ fn _normalize_url_to_path(url: &str) -> Vec<String> {
}
}
ret
if let Some(last) = ret.last() {
if last.is_empty() && !default_index.is_empty() {
ret.pop();
ret.push(default_index.into());
}
}
ret.iter().filter(|p| !p.is_empty()).map(|p| p.to_owned()).collect::<Vec<_>>()
}

View File

@@ -105,7 +105,6 @@ impl Har {
warn!("Not matched url: {}", e.request.url);
false
}).for_each(|e| {
info!("Processing url: {}", e.request.url);
callback(&self, page_id, har_page, e);
});
}