This commit is contained in:
2025-10-20 00:58:43 +03:00
parent c4c0ad2e12
commit 207675f522
5 changed files with 205 additions and 99 deletions

7
Cargo.lock generated
View File

@@ -52,6 +52,12 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "bytes"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "cc"
version = "1.2.41"
@@ -309,6 +315,7 @@ version = "1.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
dependencies = [
"bytes",
"pin-project-lite",
"tokio-macros",
]

View File

@@ -13,7 +13,7 @@ path = "src/zspatch.rs"
[dependencies]
zstd = { version = "0.13" }
tokio = { version = "1.48", features = ["rt", "rt-multi-thread", "macros"] }
tokio = { version = "1.48", features = ["rt", "rt-multi-thread", "macros", "fs", "io-util"] }
md5 = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

View File

@@ -4,58 +4,86 @@ use std::collections::HashMap;
use std::{fs, io};
use zstd::{Decoder, Encoder};
const METADATA_VERSION: u16 = 1;
const SUPPORTED_VERSION: [u16; 1] = [1];
pub struct Zsdiff {
pub content: HashMap<String, Vec<u8>>,
pub metadata: Metadata,
}
impl Zsdiff {
pub async fn from_vec(_data: Vec<Vec<u8>>) -> Result<Self, io::Error> {
pub async fn from_vec(_data: Vec<u8>) -> Result<Self, io::Error> {
let meta_version = u16::from_be_bytes(_data[..2].try_into().unwrap());
println!(">>> Metadata version: {}", meta_version);
if !SUPPORTED_VERSION.contains(&meta_version) {
return Err(io::Error::new(
io::ErrorKind::Other,
"Metadata version mismatch",
));
}
let meta_size = u32::from_be_bytes(_data[2..6].try_into().unwrap()) as usize;
let mut index = 6;
let meta = _data[index..index + meta_size].to_vec();
let metadata: Metadata = serde_json::from_slice(&meta)?;
println!(">>> Metadata parsed successfully");
index += meta_size;
println!(">>> File count: {}", metadata.file_count);
let data = _data;
let mut content = HashMap::new();
for part in _data {
let filename_size = u32::from_be_bytes(part[0..4].try_into().unwrap()) as usize;
let filename = String::from_utf8(part[4..filename_size + 4].to_vec()).unwrap();
let cont = part[filename_size + 8..].to_vec();
while index < data.len() {
let filename_size =
u32::from_be_bytes(data[index..index + 4].try_into().unwrap()) as usize;
index += 4;
let filename = String::from_utf8(data[index..filename_size + index].to_vec()).unwrap();
index += filename_size;
let content_size =
u32::from_be_bytes(data[index..index + 4].try_into().unwrap()) as usize;
index += 4;
let cont = data[index..index + content_size].to_vec();
index += cont.len();
content.insert(filename, cont);
}
let meta = content.get("metadata.json").unwrap();
let metadata: Metadata = serde_json::from_slice(meta.as_slice())?;
content.remove("metadata.json");
Ok(Zsdiff { content, metadata })
}
pub async fn to_vec(&self) -> Vec<Vec<u8>> {
let mut parts: Vec<Vec<u8>> = Vec::new();
pub async fn to_vec(&self) -> Vec<u8> {
let mut meta_bytes: Vec<u8> = Vec::new();
meta_bytes.extend(METADATA_VERSION.to_be_bytes());
let meta = serde_json::to_vec(&self.metadata).unwrap();
meta_bytes.extend((meta.len() as u32).to_be_bytes());
meta_bytes.extend(meta);
let mut parts: Vec<u8> = Vec::new();
for (filename, content) in &self.content {
let filename_size: [u8; 4] = (filename.len() as u32).to_be_bytes();
let filename_encoded = vec![filename_size.as_slice(), filename.as_bytes()].concat();
let content_size: [u8; 4] = (content.len() as u32).to_be_bytes();
let content_encoded = vec![content_size.as_slice(), content.as_slice()].concat();
parts.push(vec![filename_encoded, content_encoded].concat())
let part = vec![filename_encoded, content_encoded].concat();
parts.extend(part)
}
let meta = serde_json::to_vec(&self.metadata).unwrap();
let meta_filename = "metadata.json";
let meta_filename_size = (meta_filename.len() as u32).to_be_bytes();
let meta_filename_encoded =
vec![meta_filename_size.as_slice(), meta_filename.as_bytes()].concat();
let meta_size = (meta.len() as u32).to_be_bytes();
let meta_encoded = vec![meta_size.as_slice(), meta.as_slice()].concat();
parts.push(vec![meta_filename_encoded, meta_encoded].concat());
parts
let out = vec![meta_bytes, parts].concat();
out
}
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Debug)]
pub struct Metadata {
pub(crate) diff_files: Vec<String>,
pub hashes: HashMap<String, String>,
pub remove_files: Vec<String>,
pub remove_folders: Vec<String>,
pub file_count: u32,
pub compress_level: i32,
}
pub async fn get_hash(data: Vec<u8>) -> String {
@@ -63,33 +91,15 @@ pub async fn get_hash(data: Vec<u8>) -> String {
format!("{:x}", hash)
}
pub async fn compress_parts(input: Vec<Vec<u8>>, output: &fs::File, level: i32) {
pub async fn compress(input: Vec<u8>, output: &fs::File, level: i32) {
let mut encoder = Encoder::new(output, level).unwrap();
for part in input.iter() {
io::copy(&mut &part[..], &mut encoder).unwrap();
}
io::copy(&mut input.as_slice(), &mut encoder).unwrap();
encoder.finish().unwrap();
}
pub async fn decompress_parts(input: Vec<u8>) -> Result<Vec<Vec<u8>>, io::Error> {
pub async fn decompress(input: Vec<u8>) -> Result<Vec<u8>, io::Error> {
let mut decoder = Decoder::new(&input[..])?;
let mut buf = Vec::new();
io::copy(&mut decoder, &mut buf)?;
let mut index = 0;
let mut parts: Vec<Vec<u8>> = Vec::new();
while index < buf.len() {
let filename_size = u32::from_be_bytes(buf[index..index + 4].try_into().unwrap()) as usize;
let filename = buf[index..index + filename_size + 4].to_vec();
index += 4 + filename_size;
let content_size = u32::from_be_bytes(buf[index..index + 4].try_into().unwrap()) as usize;
let content = buf[index..index + content_size + 4].to_vec();
index += content_size + 4;
let part = vec![filename, content].concat();
parts.push(part);
}
Ok(parts)
Ok(buf)
}

View File

@@ -44,7 +44,11 @@ async fn walk_dir(dir: String) -> HashMap<String, FileInfo> {
hash_list
}
async fn compare_hashes(old: HashMap<String, FileInfo>, new: HashMap<String, FileInfo>) -> Zsdiff {
async fn compare_hashes(
old: HashMap<String, FileInfo>,
new: HashMap<String, FileInfo>,
compress_level: i32,
) -> Zsdiff {
let mut diff_files: HashMap<String, Vec<u8>> = HashMap::new();
let mut remove_files: Vec<String> = vec![];
let mut remove_folders: Vec<String> = vec![];
@@ -72,10 +76,7 @@ async fn compare_hashes(old: HashMap<String, FileInfo>, new: HashMap<String, Fil
if old_fileinfo.is_none() {
let path = new_fileinfo.relative_path.clone();
diff_files.insert(path.clone(), fs::read(new_fileinfo.path.clone()).unwrap());
hashes.insert(
new_fileinfo.relative_path.clone(),
new_fileinfo.hash.clone(),
);
hashes.insert(path.clone(), new_fileinfo.hash.clone());
}
}
@@ -86,6 +87,8 @@ async fn compare_hashes(old: HashMap<String, FileInfo>, new: HashMap<String, Fil
hashes,
remove_files,
remove_folders,
compress_level,
file_count: diff_files.len() as u32,
},
}
}
@@ -99,14 +102,11 @@ pub async fn zsdiff(
let output_filename = &format!("{}.zdiff", filename);
let old_hashes = walk_dir(old).await;
let new_hashes = walk_dir(new).await;
let compare_hashes = compare_hashes(old_hashes, new_hashes).await;
let compare_hashes = compare_hashes(old_hashes, new_hashes, level).await;
let parts = compare_hashes.to_vec().await;
let mut size_before = 0;
for p in &parts {
size_before += p.len();
}
let size_before = parts.len();
let now = time::Instant::now();
utils::compress_parts(parts, &fs::File::create(output_filename)?, level).await;
utils::compress(parts, &fs::File::create(output_filename)?, level).await;
let output_data = fs::read(output_filename)?;
let size_after = output_data.len();
let hash = get_hash(output_data).await;

View File

@@ -1,80 +1,164 @@
mod utils;
use clap::Parser;
use std::fs::read;
use std::io::Write;
use std::path::Path;
use std::{fs, io, time};
use std::path::{Path, PathBuf};
use std::{io, time};
use tokio::fs;
use tokio::io::AsyncWriteExt;
use utils::Zsdiff;
async fn create_tmp_dir(dir_name: String) -> Result<String, io::Error> {
let name = format!("{}.tmp", dir_name);
fs::remove_dir_all(name.clone()).ok();
fs::DirBuilder::new().create(name.clone())?;
Ok(name)
let name = PathBuf::from(format!("{}_tmp", dir_name));
if name.exists() {
fs::remove_dir_all(&name).await?;
}
fs::create_dir(&name).await?;
name.to_str().map(|s| s.to_string()).ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
"Path contains invalid UTF-8 characters",
)
})
}
async fn load_file(filename: String) -> Result<Zsdiff, io::Error> {
let filename = &format!("{}.zdiff", filename);
let parts = utils::decompress_parts(read(filename)?).await?;
Ok(Zsdiff::from_vec(parts).await?)
let full_filename = format!("{}.zdiff", filename);
let compressed_data = fs::read(&full_filename).await?;
let data = utils::decompress(compressed_data).await?;
let zsdiff = Zsdiff::from_vec(data).await?;
println!(
">>> Metadata files to remove: {}",
zsdiff.metadata.remove_files.len()
);
println!(
">>> Metadata hashes to check: {}",
zsdiff.metadata.hashes.len()
);
Ok(zsdiff)
}
async fn extract_files(zsdiff: &Zsdiff, filename: String) -> Result<String, io::Error> {
let tmp_dir_name = create_tmp_dir(filename.to_string()).await?;
let path = Path::new(&tmp_dir_name);
fs::remove_dir_all(path).ok();
for (f, c) in zsdiff.content.iter() {
for (i, (f, c)) in zsdiff.content.iter().enumerate() {
println!(
">>> Processing file {}/{}: '{}'",
i + 1,
zsdiff.content.len(),
f
);
let filepath = path.join(f);
fs::create_dir_all(filepath.parent().unwrap())?;
fs::File::create(&filepath)?.write_all(c)?;
if let Some(parent) = filepath.parent() {
fs::create_dir_all(parent).await?;
}
let mut file = fs::File::create(&filepath).await?;
file.write_all(c).await?;
}
Ok(tmp_dir_name)
}
async fn check_hash(filename: String) -> Result<(), io::Error> {
let file_data = read(format!("{}.zdiff", filename))?;
let hash_file = String::from_utf8(read(format!("{}.zdiff.md5", filename))?).unwrap();
let file_data = fs::read(format!("{}.zdiff", filename)).await?;
let mut hash_file =
String::from_utf8(fs::read(format!("{}.zdiff.md5", filename)).await?).unwrap();
let hash = utils::get_hash(file_data).await;
if !hash_file.split(" ").next().unwrap().eq(&hash) {
return Err(io::Error::new(io::ErrorKind::Other, "Hash mismatch"));
hash_file = hash_file.split(" ").next().unwrap().parse().unwrap();
if !hash_file.eq(&hash) {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("Hash mismatch. Expected {}, got {}", hash_file, hash),
));
}
println!("Zsdiff hash: {}", hash);
println!(">>> Zsdiff hash: {}", hash);
Ok(())
}
async fn zspatch(filename: String, dest_dir: String) -> Result<(), io::Error> {
let diff = load_file(filename.clone()).await?;
let cloned = filename.clone();
let diff = load_file(cloned).await.ok().unwrap();
let tmp_dir_name = extract_files(&diff, filename).await?;
let now = time::Instant::now();
for name in diff.content.keys().collect::<Vec<&String>>() {
fs::File::create("metadata.json")
.await?
.write_all(serde_json::to_vec(&diff.metadata).unwrap().as_slice())
.await?;
let files_to_copy: Vec<String> = diff.content.keys().cloned().collect();
for (_, name) in files_to_copy.iter().enumerate() {
let from_path = Path::new(&tmp_dir_name).join(name);
let to_path = Path::new(&dest_dir).join(name);
fs::create_dir_all(to_path.parent().unwrap())?;
fs::copy(from_path, to_path)?;
if !from_path.exists() {
println!("ERROR: Source file doesn't exist: {:?}", from_path);
continue;
}
if let Some(parent) = to_path.parent() {
fs::create_dir_all(parent).await?;
}
fs::copy(from_path.clone(), to_path.clone()).await?;
}
for file in diff.metadata.remove_files {
for file in &diff.metadata.remove_files {
let path = Path::new(&dest_dir).join(file);
fs::remove_file(path).ok();
println!(">>> Removing file {}", path.display());
if !path.exists() {
println!("File doesn't exist, skipping");
continue;
}
fs::remove_file(path.clone()).await?
}
for folder in diff.metadata.remove_folders {
println!(">>> Starting folder removal process <<<");
println!(
">>> Folders to remove: {}",
diff.metadata.remove_folders.len()
);
for folder in &diff.metadata.remove_folders {
let path = Path::new(&dest_dir).join(folder);
fs::remove_dir_all(path).ok();
if !path.exists() {
println!("Folder doesn't exist, skipping");
continue;
}
fs::remove_dir_all(path.clone()).await?
}
for (k, hash) in diff.metadata.hashes {
println!(">>> Starting hash verification <<<");
println!(">>> Files to verify: {}", diff.metadata.hashes.len());
for (k, hash) in &diff.metadata.hashes {
let path = Path::new(&dest_dir).join(k);
let content = read(path)?;
let fs_hash = utils::get_hash(content).await;
if !fs_hash.eq(&hash) {
Err(io::Error::new(io::ErrorKind::Other, "Hash mismatch"))?
match fs::read(path.clone()).await {
Ok(content) => {
let fs_hash = utils::get_hash(content).await;
if !fs_hash.eq(hash) {
println!(
"Hash mismatch. Expected {}, got {}. Path: {}",
hash,
fs_hash,
path.display()
);
}
}
Err(e) => {
println!("Can't read file for hash verification: {}", e);
}
}
}
fs::remove_dir_all(tmp_dir_name).ok();
println!("Patching done!");
println!("Elapsed time: {:.2?}", now.elapsed());
// fs::remove_dir_all(tmp_dir_name).await?;
println!(">>> Patching done! <<<");
println!(">>> Elapsed time: {:.2?}", now.elapsed());
Ok(())
}
@@ -87,19 +171,24 @@ struct Args {
#[arg(short, long)]
metadata: bool,
#[arg(short, long)]
hash_check: bool,
check_hash: bool,
}
#[tokio::main]
async fn main() -> io::Result<()> {
let args = Args::parse();
if args.hash_check {
check_hash(args.filename.clone()).await?;
let filename = args.filename.clone();
let dest_dir = args.dest_dir.clone();
if args.check_hash {
check_hash(args.filename.clone()).await.ok();
}
if args.metadata {
let diff = load_file(args.filename).await?;
println!("{}", serde_json::to_string(&diff.metadata)?);
let diff = load_file(filename).await?;
let metadata = diff.metadata;
println!(">>> Compress level: {}", metadata.compress_level);
return Ok(());
}
zspatch(args.filename, args.dest_dir).await
zspatch(filename, dest_dir).await
}