Merge lp:~jelmer/brz/delta-refactor into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: 7904
Merged at revision: 7903
Proposed branch: lp:~jelmer/brz/delta-refactor
Merge into: lp:brz
Diff against target: 402 lines (+198/-50)
4 files modified
Cargo.lock (+28/-0)
crates/bazaar/Cargo.toml (+2/-0)
crates/bazaar/src/groupcompress/mod.rs (+160/-42)
crates/osutils/src/chunkreader.rs (+8/-8)
To merge this branch: bzr merge lp:~jelmer/brz/delta-refactor
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Review via email: mp+455485@code.launchpad.net

Commit message

Use reader pattern

Description of the change

Use reader pattern

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'Cargo.lock'
2--- Cargo.lock 2023-11-11 15:04:09 +0000
3+++ Cargo.lock 2023-11-12 22:03:08 +0000
4@@ -97,6 +97,7 @@
5 "chrono",
6 "crc32fast",
7 "fancy-regex",
8+ "flate2",
9 "lazy-regex",
10 "lazy_static",
11 "log",
12@@ -108,6 +109,7 @@
13 "sha1",
14 "tempfile",
15 "xmltree",
16+ "xz2",
17 ]
18
19 [[package]]
20@@ -793,6 +795,17 @@
21 ]
22
23 [[package]]
24+name = "lzma-sys"
25+version = "0.1.20"
26+source = "registry+https://github.com/rust-lang/crates.io-index"
27+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
28+dependencies = [
29+ "cc",
30+ "libc",
31+ "pkg-config",
32+]
33+
34+[[package]]
35 name = "malloc_buf"
36 version = "0.0.6"
37 source = "registry+https://github.com/rust-lang/crates.io-index"
38@@ -980,6 +993,12 @@
39 checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
40
41 [[package]]
42+name = "pkg-config"
43+version = "0.3.27"
44+source = "registry+https://github.com/rust-lang/crates.io-index"
45+checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
46+
47+[[package]]
48 name = "ppv-lite86"
49 version = "0.2.17"
50 source = "registry+https://github.com/rust-lang/crates.io-index"
51@@ -1752,6 +1771,15 @@
52 ]
53
54 [[package]]
55+name = "xz2"
56+version = "0.1.7"
57+source = "registry+https://github.com/rust-lang/crates.io-index"
58+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
59+dependencies = [
60+ "lzma-sys",
61+]
62+
63+[[package]]
64 name = "zerocopy"
65 version = "0.7.25"
66 source = "registry+https://github.com/rust-lang/crates.io-index"
67
68=== modified file 'crates/bazaar/Cargo.toml'
69--- crates/bazaar/Cargo.toml 2023-11-11 15:04:09 +0000
70+++ crates/bazaar/Cargo.toml 2023-11-12 22:03:08 +0000
71@@ -28,6 +28,8 @@
72 lazy-regex = "3.0.0"
73 byteorder = "1.5.0"
74 lru = "0.12.0"
75+flate2 = "1.0.28"
76+xz2 = "0.1.7"
77
78 [target.'cfg(unix)'.dependencies]
79 nix = { workspace = true }
80
81=== modified file 'crates/bazaar/src/groupcompress/mod.rs'
82--- crates/bazaar/src/groupcompress/mod.rs 2023-11-12 01:37:05 +0000
83+++ crates/bazaar/src/groupcompress/mod.rs 2023-11-12 22:03:08 +0000
84@@ -1,6 +1,8 @@
85 pub mod delta;
86 pub mod line_delta;
87+use byteorder::ReadBytesExt;
88 use sha1::{Digest as _, Sha1};
89+use std::io::Read;
90
91 lazy_static::lazy_static! {
92 pub static ref NULL_SHA1: Vec<u8> = format!("{:x}", Sha1::new().finalize()).as_bytes().to_vec();
93@@ -16,83 +18,173 @@
94 data
95 }
96
97-pub fn decode_base128_int(data: &[u8]) -> (u128, usize) {
98- let mut offset = 0;
99+pub fn read_base128_int<R: Read>(reader: &mut R) -> Result<u128, std::io::Error> {
100 let mut val: u128 = 0;
101 let mut shift = 0;
102- let mut bval = data[offset];
103- while bval >= 0x80 {
104- val |= ((bval & 0x7F) as u128) << shift;
105+ let mut bval = [0];
106+ reader.read_exact(&mut bval)?;
107+ while bval[0] >= 0x80 {
108+ val |= ((bval[0] & 0x7F) as u128) << shift;
109+ reader.read_exact(&mut bval)?;
110 shift += 7;
111- offset += 1;
112- bval = data[offset];
113- }
114- val |= (bval as u128) << shift;
115- offset += 1;
116- (val, offset)
117-}
118-
119-pub type CopyInstruction = (usize, usize, usize);
120-
121+ }
122+
123+ val |= (bval[0] as u128) << shift;
124+ Ok(val)
125+}
126+
127+#[cfg(test)]
128+mod test_base128_int {
129+ #[test]
130+ fn test_decode_base128_int() {
131+ assert_eq!(super::decode_base128_int(&[0x00]), (0, 1));
132+ assert_eq!(super::decode_base128_int(&[0x01]), (1, 1));
133+ assert_eq!(super::decode_base128_int(&[0x7F]), (127, 1));
134+ assert_eq!(super::decode_base128_int(&[0x80, 0x01]), (128, 2));
135+ assert_eq!(super::decode_base128_int(&[0xFF, 0x01]), (255, 2));
136+ assert_eq!(super::decode_base128_int(&[0x80, 0x02]), (256, 2));
137+ assert_eq!(super::decode_base128_int(&[0x81, 0x02]), (257, 2));
138+ assert_eq!(super::decode_base128_int(&[0x82, 0x02]), (258, 2));
139+ assert_eq!(super::decode_base128_int(&[0xFF, 0x7F]), (16383, 2));
140+ assert_eq!(super::decode_base128_int(&[0x80, 0x80, 0x01]), (16384, 3));
141+ assert_eq!(super::decode_base128_int(&[0xFF, 0xFF, 0x7F]), (2097151, 3));
142+ assert_eq!(
143+ super::decode_base128_int(&[0x80, 0x80, 0x80, 0x01]),
144+ (2097152, 4)
145+ );
146+ assert_eq!(
147+ super::decode_base128_int(&[0xFF, 0xFF, 0xFF, 0x7F]),
148+ (268435455, 4)
149+ );
150+ assert_eq!(
151+ super::decode_base128_int(&[0x80, 0x80, 0x80, 0x80, 0x01]),
152+ (268435456, 5)
153+ );
154+ assert_eq!(
155+ super::decode_base128_int(&[0xFF, 0xFF, 0xFF, 0xFF, 0x7F]),
156+ (34359738367, 5)
157+ );
158+ assert_eq!(
159+ super::decode_base128_int(&[0x80, 0x80, 0x80, 0x80, 0x80, 0x01]),
160+ (34359738368, 6)
161+ );
162+ }
163+
164+ #[test]
165+ fn test_encode_base128_int() {
166+ assert_eq!(super::encode_base128_int(0), [0x00]);
167+ assert_eq!(super::encode_base128_int(1), [0x01]);
168+ assert_eq!(super::encode_base128_int(127), [0x7F]);
169+ assert_eq!(super::encode_base128_int(128), [0x80, 0x01]);
170+ assert_eq!(super::encode_base128_int(255), [0xFF, 0x01]);
171+ assert_eq!(super::encode_base128_int(256), [0x80, 0x02]);
172+ assert_eq!(super::encode_base128_int(257), [0x81, 0x02]);
173+ assert_eq!(super::encode_base128_int(258), [0x82, 0x02]);
174+ assert_eq!(super::encode_base128_int(16383), [0xFF, 0x7F]);
175+ assert_eq!(super::encode_base128_int(16384), [0x80, 0x80, 0x01]);
176+ assert_eq!(super::encode_base128_int(2097151), [0xFF, 0xFF, 0x7F]);
177+ assert_eq!(super::encode_base128_int(2097152), [0x80, 0x80, 0x80, 0x01]);
178+ assert_eq!(
179+ super::encode_base128_int(268435455),
180+ [0xFF, 0xFF, 0xFF, 0x7F]
181+ );
182+ assert_eq!(
183+ super::encode_base128_int(268435456),
184+ [0x80, 0x80, 0x80, 0x80, 0x01]
185+ );
186+ assert_eq!(
187+ super::encode_base128_int(34359738367),
188+ [0xFF, 0xFF, 0xFF, 0xFF, 0x7F]
189+ );
190+ assert_eq!(
191+ super::encode_base128_int(34359738368),
192+ [0x80, 0x80, 0x80, 0x80, 0x80, 0x01]
193+ );
194+ assert_eq!(
195+ super::encode_base128_int(4398046511103),
196+ [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]
197+ );
198+ assert_eq!(
199+ super::encode_base128_int(4398046511104),
200+ [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]
201+ );
202+ }
203+}
204+
205+#[deprecated]
206+pub fn decode_base128_int(data: &[u8]) -> (u128, usize) {
207+ let mut cursor = std::io::Cursor::new(data);
208+ let val = read_base128_int(&mut cursor).unwrap();
209+ (val, cursor.position() as usize)
210+}
211+
212+#[deprecated]
213 pub fn decode_copy_instruction(
214 data: &[u8],
215 cmd: u8,
216 pos: usize,
217-) -> Result<CopyInstruction, String> {
218+) -> Result<(usize, usize, usize), String> {
219+ let mut c = std::io::Cursor::new(&data[pos..]);
220+
221+ let (offset, length) = read_copy_instruction(&mut c, cmd).unwrap();
222+
223+ Ok((offset, length, pos + c.position() as usize))
224+}
225+
226+pub type CopyInstruction = (usize, usize);
227+
228+pub fn read_copy_instruction<R: Read>(
229+ reader: &mut R,
230+ cmd: u8,
231+) -> Result<CopyInstruction, std::io::Error> {
232 if cmd & 0x80 != 0x80 {
233- return Err("copy instructions must have bit 0x80 set".to_string());
234+ return Err(std::io::Error::new(
235+ std::io::ErrorKind::Other,
236+ "copy instructions must have bit 0x80 set".to_string(),
237+ ));
238 }
239 let mut offset = 0;
240 let mut length = 0;
241- let mut new_pos = pos;
242
243 if cmd & 0x01 != 0 {
244- offset = data[new_pos] as usize;
245- new_pos += 1;
246+ offset = reader.read_u8()? as usize;
247 }
248 if cmd & 0x02 != 0 {
249- offset |= (data[new_pos] as usize) << 8;
250- new_pos += 1;
251+ offset |= (reader.read_u8()? as usize) << 8;
252 }
253 if cmd & 0x04 != 0 {
254- offset |= (data[new_pos] as usize) << 16;
255- new_pos += 1;
256+ offset |= (reader.read_u8()? as usize) << 16;
257 }
258 if cmd & 0x08 != 0 {
259- offset |= (data[new_pos] as usize) << 24;
260- new_pos += 1;
261+ offset |= (reader.read_u8()? as usize) << 24;
262 }
263 if cmd & 0x10 != 0 {
264- length = data[new_pos] as usize;
265- new_pos += 1;
266+ length = reader.read_u8()? as usize;
267 }
268 if cmd & 0x20 != 0 {
269- length |= (data[new_pos] as usize) << 8;
270- new_pos += 1;
271+ length |= (reader.read_u8()? as usize) << 8;
272 }
273 if cmd & 0x40 != 0 {
274- length |= (data[new_pos] as usize) << 16;
275- new_pos += 1;
276+ length |= (reader.read_u8()? as usize) << 16;
277 }
278 if length == 0 {
279 length = 65536;
280 }
281
282- Ok((offset, length, new_pos))
283+ Ok((offset, length))
284 }
285
286 pub fn apply_delta(basis: &[u8], delta: &[u8]) -> Result<Vec<u8>, String> {
287- let (target_length, mut pos) = decode_base128_int(delta);
288+ let mut delta = &delta[..];
289+ let target_length = read_base128_int(&mut delta).map_err(|e| e.to_string())?;
290 let mut lines = Vec::new();
291- let len_delta = delta.len();
292
293- while pos < len_delta {
294- let cmd = delta[pos];
295- pos += 1;
296+ while !delta.is_empty() {
297+ let cmd = delta.read_u8().map_err(|e| e.to_string())?;
298
299 if cmd & 0x80 != 0 {
300- let (offset, length, new_pos) = decode_copy_instruction(delta, cmd, pos)?;
301- pos = new_pos;
302+ let (offset, length) =
303+ read_copy_instruction(&mut delta, cmd).map_err(|e| e.to_string())?;
304 let last = offset + length;
305 if last > basis.len() {
306 return Err("data would copy bytes past the end of source".to_string());
307@@ -102,8 +194,8 @@
308 if cmd == 0 {
309 return Err("Command == 0 not supported yet".to_string());
310 }
311- lines.extend_from_slice(&delta[pos..pos + cmd as usize]);
312- pos += cmd as usize;
313+ lines.extend_from_slice(&delta[..cmd as usize]);
314+ delta = &delta[cmd as usize..];
315 }
316 }
317
318@@ -118,6 +210,32 @@
319 Ok(lines)
320 }
321
322+#[cfg(test)]
323+mod test_apply_delta {
324+ const TEXT1: &[u8] = b"This is a bit
325+of source text
326+which is meant to be matched
327+against other text
328+";
329+
330+ const TEXT2: &[u8] = b"This is a bit
331+of source text
332+which is meant to differ from
333+against other text
334+";
335+
336+ #[test]
337+ fn test_apply_delta() {
338+ let target =
339+ super::apply_delta(TEXT1, b"N\x90/\x1fdiffer from\nagainst other text\n").unwrap();
340+ assert_eq!(target, TEXT2);
341+ let target =
342+ super::apply_delta(TEXT2, b"M\x90/\x1ebe matched\nagainst other text\n").unwrap();
343+ assert_eq!(target, TEXT1);
344+ }
345+}
346+
347+#[deprecated]
348 pub fn apply_delta_to_source(
349 source: &[u8],
350 delta_start: usize,
351
352=== modified file 'crates/osutils/src/chunkreader.rs'
353--- crates/osutils/src/chunkreader.rs 2023-11-12 17:50:30 +0000
354+++ crates/osutils/src/chunkreader.rs 2023-11-12 22:03:08 +0000
355@@ -1,23 +1,23 @@
356 use std::borrow::Borrow;
357 use std::io::Read;
358
359-pub struct ChunksReader<'a, T: Borrow<[u8]>> {
360- chunks: Box<dyn Iterator<Item = T> + 'a>,
361+pub struct ChunksReader<T: Borrow<[u8]>> {
362+ chunks: Box<dyn Iterator<Item = T>>,
363 current_chunk: Option<T>,
364 position: usize,
365 }
366
367-impl<'a, T: Borrow<[u8]>> ChunksReader<'a, T> {
368- pub fn new(chunks: impl Iterator<Item = T> + 'a) -> Self {
369+impl<T: Borrow<[u8]>> ChunksReader<T> {
370+ pub fn new(chunks: Box<dyn Iterator<Item = T>>) -> Self {
371 ChunksReader {
372- chunks: Box::new(chunks),
373+ chunks,
374 position: 0,
375 current_chunk: None,
376 }
377 }
378 }
379
380-impl<'a, T: Borrow<[u8]>> Read for ChunksReader<'a, T> {
381+impl<T: Borrow<[u8]>> Read for ChunksReader<T> {
382 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
383 let mut bytes_read = 0;
384
385@@ -47,7 +47,7 @@
386 #[test]
387 fn test_chunks_reader_vec() {
388 let chunks = vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]];
389- let mut reader = ChunksReader::new(Box::new(chunks.iter().map(|v| v.as_slice())));
390+ let mut reader = ChunksReader::new(Box::new(chunks.into_iter()));
391
392 let mut buf = [0; 4];
393 assert_eq!(reader.read(&mut buf).unwrap(), 4);
394@@ -65,7 +65,7 @@
395 #[test]
396 fn test_chunks_reader_slice() {
397 let chunks = [[1, 2, 3], [4, 5, 6], [7, 8, 9]];
398- let mut reader = ChunksReader::new(Box::new(chunks.iter().map(|v| v.as_slice())));
399+ let mut reader = ChunksReader::new(Box::new(chunks.into_iter()));
400
401 let mut buf = [0; 4];
402 assert_eq!(reader.read(&mut buf).unwrap(), 4);

Subscribers

People subscribed via source and target branches