diff mbox series

[V2,2/3] rdiff_handler: fix compressed / encrypted delta

Message ID 20190718152447.6456-2-sbabic@denx.de
State Changes Requested
Headers show
Series None | expand

Commit Message

Stefano Babic July 18, 2019, 3:24 p.m. UTC
When delta file is compressed, the handler does not work because it
checks for the size of the compressed image instead of uncompressed one.
Do not wait for EOF in input buffer, because it cannot be known when the
handler starts: copyfile() simply stops to call the callback when all
data are successful uncompressed.

Signed-off-by: Stefano Babic <sbabic@denx.de>
---
 handlers/rdiff_handler.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

Comments

Storm, Christian July 19, 2019, 9:17 a.m. UTC | #1
Hi Stefano,

> When delta file is compressed, the handler does not work because it
> checks for the size of the compressed image instead of uncompressed one.
> Do not wait for EOF in input buffer, because it cannot be known when the
> handler starts: copyfile() simply stops to call the callback when all
> data are successful uncompressed.
> 
> Signed-off-by: Stefano Babic <sbabic@denx.de>
> ---
>  handlers/rdiff_handler.c | 18 +++++++-----------
>  1 file changed, 7 insertions(+), 11 deletions(-)
> 
> diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
> index 7b1bee3..cd50146 100644
> --- a/handlers/rdiff_handler.c
> +++ b/handlers/rdiff_handler.c
> @@ -47,8 +47,6 @@ struct rdiff_t
>  	char *inbuf;
>  	char *outbuf;
>  
> -	long long cpio_input_len;
> -
>  	uint8_t type;
>  };
>  
> @@ -111,7 +109,6 @@ static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
>  		TEST_OR_FAIL(*len <= RDIFF_BUFFER_SIZE, RS_IO_ERROR);
>  		buffers->next_in = rdiff_state->inbuf;
>  		buffers->avail_in = *len;
> -		rdiff_state->cpio_input_len -= *len;
>  		TRACE("Writing %d bytes to rdiff input buffer.", *len);
>  		(void)memcpy(rdiff_state->inbuf, buf, *len);
>  		*len = 0;
> @@ -128,11 +125,9 @@ static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
>  		}
>  		TRACE("Appending %d bytes to rdiff input buffer.", buflen);
>  		buffers->avail_in += buflen;
> -		rdiff_state->cpio_input_len -= buflen;
>  		(void)memcpy(target, buf, buflen);
>  		*len -= buflen;
>  	}
> -	buffers->eof_in = rdiff_state->cpio_input_len == 0 ? true : false;
>  	return RS_DONE;
>  }
>  
> @@ -178,9 +173,9 @@ static inline void rdiff_stats(const char* msg, struct rdiff_t *rdiff_state, rs_
>  		case RS_RUNNING: strresult = (char*)"RUNNING"; break;
>  		default: break;
>  	}
> -	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld remaining=%lld result=%s",
> +	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld result=%s",
>  		  msg, buffers->avail_in, buffers->eof_in == true ? "true":"false",

While I think the
  if (buffers->eof_in == true) { ... in line 97
still makes sense, logging eof_in=%s here could be skipped to make logs
less noisy, however, it doesn't harm either. What do you think?


> -		  buffers->avail_out, rdiff_state->cpio_input_len, strresult);
> +		  buffers->avail_out, strresult);
>  }
>  
>  static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
> @@ -189,6 +184,7 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>  	rs_buffers_t *buffers = &rdiff_state->buffers;
>  	unsigned int inbytesleft = len;
>  	rs_result result = RS_RUNNING;
> +	rs_result drain_run_result = RS_RUNNING;
>  
>  	if (buffers->next_out == NULL) {
>  		TEST_OR_FAIL(buffers->avail_out == 0, -1);
> @@ -196,7 +192,7 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>  		buffers->avail_out = RDIFF_BUFFER_SIZE;
>  	}
>  
> -	while (inbytesleft > 0 || (buffers->eof_in == true && buffers->avail_in > 0)) {
> +	while (inbytesleft > 0 || buffers->avail_in > 0) {
>  		rdiff_stats("[pre] ", rdiff_state, result);
>  		result = fill_inbuffer(rdiff_state, buf, &inbytesleft);
>  		if (result != RS_DONE && result != RS_BLOCKED) {
> @@ -207,7 +203,9 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>  			ERROR("Error processing rdiff chunk: %s", rs_strerror(result));
>  			return -1;
>  		}
> -		if (drain_outbuffer(rdiff_state) != RS_DONE) {
> +		drain_run_result = drain_outbuffer(rdiff_state);
> +		if (drain_run_result != RS_DONE) {
> +			ERROR("drain_outbuffer return error");
>  			return -1;
>  		}
>  		rdiff_stats("[post]", rdiff_state, result);
> @@ -332,8 +330,6 @@ static int apply_rdiff_patch(struct img_type *img,
>  		goto cleanup;
>  	}
>  
> -	rdiff_state.cpio_input_len = img->size;
> -
>  	int loglevelmap[] =
>  	{
>  		[OFF]        = RS_LOG_ERR,
> -- 
> 2.17.1
> 


Acked-by: Christian Storm <christian.storm@siemens.com>


I did a dig through the librsync code to see if not setting buffers->eof_in
would cause any harm, and I couldn't find an apparent reason. So, this is fine.
However, if you take a look into src/librsync.h, there's the comment, quoting:

 * \c buffers->eof_in should be true if there is no more data after what's
 * in the input buffer.  The final block checksum will run across whatever's
 * in there, without trying to accumulate anything else.

and the according "Input callbacks" section in doc/callbacks.md pointing in the
same direction. This suggests to set buffers->eof_in. As we don't do so -- for
a good reason -- maybe we should additionally add a comment in the code
indicating why?


Kind regards,
   Christian
Stefano Babic July 19, 2019, 10:15 a.m. UTC | #2
Hi Christian,

On 19/07/19 11:17, Christian Storm wrote:
> Hi Stefano,
> 
>> When delta file is compressed, the handler does not work because it
>> checks for the size of the compressed image instead of uncompressed one.
>> Do not wait for EOF in input buffer, because it cannot be known when the
>> handler starts: copyfile() simply stops to call the callback when all
>> data are successful uncompressed.
>>
>> Signed-off-by: Stefano Babic <sbabic@denx.de>
>> ---
>>  handlers/rdiff_handler.c | 18 +++++++-----------
>>  1 file changed, 7 insertions(+), 11 deletions(-)
>>
>> diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
>> index 7b1bee3..cd50146 100644
>> --- a/handlers/rdiff_handler.c
>> +++ b/handlers/rdiff_handler.c
>> @@ -47,8 +47,6 @@ struct rdiff_t
>>  	char *inbuf;
>>  	char *outbuf;
>>  
>> -	long long cpio_input_len;
>> -
>>  	uint8_t type;
>>  };
>>  
>> @@ -111,7 +109,6 @@ static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
>>  		TEST_OR_FAIL(*len <= RDIFF_BUFFER_SIZE, RS_IO_ERROR);
>>  		buffers->next_in = rdiff_state->inbuf;
>>  		buffers->avail_in = *len;
>> -		rdiff_state->cpio_input_len -= *len;
>>  		TRACE("Writing %d bytes to rdiff input buffer.", *len);
>>  		(void)memcpy(rdiff_state->inbuf, buf, *len);
>>  		*len = 0;
>> @@ -128,11 +125,9 @@ static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
>>  		}
>>  		TRACE("Appending %d bytes to rdiff input buffer.", buflen);
>>  		buffers->avail_in += buflen;
>> -		rdiff_state->cpio_input_len -= buflen;
>>  		(void)memcpy(target, buf, buflen);
>>  		*len -= buflen;
>>  	}
>> -	buffers->eof_in = rdiff_state->cpio_input_len == 0 ? true : false;
>>  	return RS_DONE;
>>  }
>>  
>> @@ -178,9 +173,9 @@ static inline void rdiff_stats(const char* msg, struct rdiff_t *rdiff_state, rs_
>>  		case RS_RUNNING: strresult = (char*)"RUNNING"; break;
>>  		default: break;
>>  	}
>> -	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld remaining=%lld result=%s",
>> +	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld result=%s",
>>  		  msg, buffers->avail_in, buffers->eof_in == true ? "true":"false",
> 
> While I think the
>   if (buffers->eof_in == true) { ... in line 97
> still makes sense, logging eof_in=%s here could be skipped to make logs
> less noisy, however, it doesn't harm either. What do you think?

True, I drop in V3

> 
> 
>> -		  buffers->avail_out, rdiff_state->cpio_input_len, strresult);
>> +		  buffers->avail_out, strresult);
>>  }
>>  
>>  static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>> @@ -189,6 +184,7 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>>  	rs_buffers_t *buffers = &rdiff_state->buffers;
>>  	unsigned int inbytesleft = len;
>>  	rs_result result = RS_RUNNING;
>> +	rs_result drain_run_result = RS_RUNNING;
>>  
>>  	if (buffers->next_out == NULL) {
>>  		TEST_OR_FAIL(buffers->avail_out == 0, -1);
>> @@ -196,7 +192,7 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>>  		buffers->avail_out = RDIFF_BUFFER_SIZE;
>>  	}
>>  
>> -	while (inbytesleft > 0 || (buffers->eof_in == true && buffers->avail_in > 0)) {
>> +	while (inbytesleft > 0 || buffers->avail_in > 0) {
>>  		rdiff_stats("[pre] ", rdiff_state, result);
>>  		result = fill_inbuffer(rdiff_state, buf, &inbytesleft);
>>  		if (result != RS_DONE && result != RS_BLOCKED) {
>> @@ -207,7 +203,9 @@ static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
>>  			ERROR("Error processing rdiff chunk: %s", rs_strerror(result));
>>  			return -1;
>>  		}
>> -		if (drain_outbuffer(rdiff_state) != RS_DONE) {
>> +		drain_run_result = drain_outbuffer(rdiff_state);
>> +		if (drain_run_result != RS_DONE) {
>> +			ERROR("drain_outbuffer return error");
>>  			return -1;
>>  		}
>>  		rdiff_stats("[post]", rdiff_state, result);
>> @@ -332,8 +330,6 @@ static int apply_rdiff_patch(struct img_type *img,
>>  		goto cleanup;
>>  	}
>>  
>> -	rdiff_state.cpio_input_len = img->size;
>> -
>>  	int loglevelmap[] =
>>  	{
>>  		[OFF]        = RS_LOG_ERR,
>> -- 
>> 2.17.1
>>
> 
> 
> Acked-by: Christian Storm <christian.storm@siemens.com>
> 
> 
> I did a dig through the librsync code to see if not setting buffers->eof_in
> would cause any harm, and I couldn't find an apparent reason. So, this is fine.
> However, if you take a look into src/librsync.h, there's the comment, quoting:
> 
>  * \c buffers->eof_in should be true if there is no more data after what's
>  * in the input buffer.  The final block checksum will run across whatever's
>  * in there, without trying to accumulate anything else.
> 

Right.

> and the according "Input callbacks" section in doc/callbacks.md pointing in the
> same direction. This suggests to set buffers->eof_in. As we don't do so -- for
> a good reason -- maybe we should additionally add a comment in the code
> indicating why?

One way. The second way could be to call rs_job_iter() again with eof_in
set after copyfile(). Something like:

diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
index cd70e7a..0ebdb7a 100644
--- a/handlers/rdiff_handler.c
+++ b/handlers/rdiff_handler.c
@@ -355,6 +355,16 @@ static int apply_rdiff_patch(struct img_type *img,
                goto cleanup;
        }

+
+       rdiff_state.buffers.eof_in = true;
+       rs_result result = rs_job_iter(rdiff_state.job,
&rdiff_state.buffers);
+
+       if (result != RS_DONE) {
+               ERROR("Error finalizing rdiff: %s", rs_strerror(result));
+               ret = -1;
+               goto cleanup;
+       }
+
        if (rdiff_state.type == FILE_HANDLER) {


It should do nothing else to be conform to API. What do you think ?

Regards,
Stefano
Storm, Christian July 19, 2019, 1:25 p.m. UTC | #3
Hi Stefano,

> > [...]
> >
> > I did a dig through the librsync code to see if not setting buffers->eof_in
> > would cause any harm, and I couldn't find an apparent reason. So, this is fine.
> > However, if you take a look into src/librsync.h, there's the comment, quoting:
> > 
> >  * \c buffers->eof_in should be true if there is no more data after what's
> >  * in the input buffer.  The final block checksum will run across whatever's
> >  * in there, without trying to accumulate anything else.
> > 
> 
> Right.
> 
> > and the according "Input callbacks" section in doc/callbacks.md pointing in the
> > same direction. This suggests to set buffers->eof_in. As we don't do so -- for
> > a good reason -- maybe we should additionally add a comment in the code
> > indicating why?
> 
> One way. The second way could be to call rs_job_iter() again with eof_in
> set after copyfile(). Something like:
> 
> diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
> index cd70e7a..0ebdb7a 100644
> --- a/handlers/rdiff_handler.c
> +++ b/handlers/rdiff_handler.c
> @@ -355,6 +355,16 @@ static int apply_rdiff_patch(struct img_type *img,
>                 goto cleanup;
>         }
> 
> +
> +       rdiff_state.buffers.eof_in = true;
> +       rs_result result = rs_job_iter(rdiff_state.job,
> &rdiff_state.buffers);
> +
> +       if (result != RS_DONE) {
> +               ERROR("Error finalizing rdiff: %s", rs_strerror(result));
> +               ret = -1;
> +               goto cleanup;
> +       }
> +
>         if (rdiff_state.type == FILE_HANDLER) {
> 
> 
> It should do nothing else to be conform to API. What do you think ?

Hm, to my understanding, in order to conform to the API, you have to tell rdiff
that there's EOF on the last input chunk handed over to it.

So, ultimately, in order be really conforming to the API, the handler callback
function signature has to be extended with, say, a flags parameter and therein
an EOF field that is set only upon the last invocation of the handler callback.
The copyfile() loop does know this and is the only decisive source for this
information. While it can't tell the actual size a priori, it can tell when it
has read the last input chunk, hence it can pass the information EOF=true to
the handler callback.

In case of rdiff, I think this isn't necessary as it has already determined the
end of its input by the buffer counts being zero. So, with this, we call it
once more than strictly necessary as the remaining input would have been
already consumed in the previous rdiff processing run if EOF was set. I'm not
sure if calling it again with zero buffer input to process is the right
approach here. On the other hand, we have to make sure that rdiff really does
recognize the end of its input which it apparently does...


Kind regards,
   Christian
Stefano Babic July 19, 2019, 1:26 p.m. UTC | #4
Hi Christian,

On 19/07/19 15:25, Christian Storm wrote:
> Hi Stefano,
> 
>>> [...]
>>>
>>> I did a dig through the librsync code to see if not setting buffers->eof_in
>>> would cause any harm, and I couldn't find an apparent reason. So, this is fine.
>>> However, if you take a look into src/librsync.h, there's the comment, quoting:
>>>
>>>  * \c buffers->eof_in should be true if there is no more data after what's
>>>  * in the input buffer.  The final block checksum will run across whatever's
>>>  * in there, without trying to accumulate anything else.
>>>
>>
>> Right.
>>
>>> and the according "Input callbacks" section in doc/callbacks.md pointing in the
>>> same direction. This suggests to set buffers->eof_in. As we don't do so -- for
>>> a good reason -- maybe we should additionally add a comment in the code
>>> indicating why?
>>
>> One way. The second way could be to call rs_job_iter() again with eof_in
>> set after copyfile(). Something like:
>>
>> diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
>> index cd70e7a..0ebdb7a 100644
>> --- a/handlers/rdiff_handler.c
>> +++ b/handlers/rdiff_handler.c
>> @@ -355,6 +355,16 @@ static int apply_rdiff_patch(struct img_type *img,
>>                 goto cleanup;
>>         }
>>
>> +
>> +       rdiff_state.buffers.eof_in = true;
>> +       rs_result result = rs_job_iter(rdiff_state.job,
>> &rdiff_state.buffers);
>> +
>> +       if (result != RS_DONE) {
>> +               ERROR("Error finalizing rdiff: %s", rs_strerror(result));
>> +               ret = -1;
>> +               goto cleanup;
>> +       }
>> +
>>         if (rdiff_state.type == FILE_HANDLER) {
>>
>>
>> It should do nothing else to be conform to API. What do you think ?
> 
> Hm, to my understanding, in order to conform to the API, you have to tell rdiff
> that there's EOF on the last input chunk handed over to it.
> 
> So, ultimately, in order be really conforming to the API, the handler callback
> function signature has to be extended with, say, a flags parameter and therein
> an EOF field that is set only upon the last invocation of the handler callback.
> The copyfile() loop does know this and is the only decisive source for this
> information. While it can't tell the actual size a priori, it can tell when it
> has read the last input chunk, hence it can pass the information EOF=true to
> the handler callback.

Mmhh... I do not want to change callbacks in copyfile()

> 
> In case of rdiff, I think this isn't necessary as it has already determined the
> end of its input by the buffer counts being zero. So, with this, we call it
> once more than strictly necessary as the remaining input would have been
> already consumed in the previous rdiff processing run if EOF was set. I'm not
> sure if calling it again with zero buffer input to process is the right
> approach here. On the other hand, we have to make sure that rdiff really does
> recognize the end of its input which it apparently does...

ok - Ip ush a new version without the above trick ;-)

Regards,
Stefano
diff mbox series

Patch

diff --git a/handlers/rdiff_handler.c b/handlers/rdiff_handler.c
index 7b1bee3..cd50146 100644
--- a/handlers/rdiff_handler.c
+++ b/handlers/rdiff_handler.c
@@ -47,8 +47,6 @@  struct rdiff_t
 	char *inbuf;
 	char *outbuf;
 
-	long long cpio_input_len;
-
 	uint8_t type;
 };
 
@@ -111,7 +109,6 @@  static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
 		TEST_OR_FAIL(*len <= RDIFF_BUFFER_SIZE, RS_IO_ERROR);
 		buffers->next_in = rdiff_state->inbuf;
 		buffers->avail_in = *len;
-		rdiff_state->cpio_input_len -= *len;
 		TRACE("Writing %d bytes to rdiff input buffer.", *len);
 		(void)memcpy(rdiff_state->inbuf, buf, *len);
 		*len = 0;
@@ -128,11 +125,9 @@  static rs_result fill_inbuffer(struct rdiff_t *rdiff_state, const void *buf, uns
 		}
 		TRACE("Appending %d bytes to rdiff input buffer.", buflen);
 		buffers->avail_in += buflen;
-		rdiff_state->cpio_input_len -= buflen;
 		(void)memcpy(target, buf, buflen);
 		*len -= buflen;
 	}
-	buffers->eof_in = rdiff_state->cpio_input_len == 0 ? true : false;
 	return RS_DONE;
 }
 
@@ -178,9 +173,9 @@  static inline void rdiff_stats(const char* msg, struct rdiff_t *rdiff_state, rs_
 		case RS_RUNNING: strresult = (char*)"RUNNING"; break;
 		default: break;
 	}
-	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld remaining=%lld result=%s",
+	TRACE("%s avail_in=%ld eof_in=%s avail_out=%ld result=%s",
 		  msg, buffers->avail_in, buffers->eof_in == true ? "true":"false",
-		  buffers->avail_out, rdiff_state->cpio_input_len, strresult);
+		  buffers->avail_out, strresult);
 }
 
 static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
@@ -189,6 +184,7 @@  static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
 	rs_buffers_t *buffers = &rdiff_state->buffers;
 	unsigned int inbytesleft = len;
 	rs_result result = RS_RUNNING;
+	rs_result drain_run_result = RS_RUNNING;
 
 	if (buffers->next_out == NULL) {
 		TEST_OR_FAIL(buffers->avail_out == 0, -1);
@@ -196,7 +192,7 @@  static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
 		buffers->avail_out = RDIFF_BUFFER_SIZE;
 	}
 
-	while (inbytesleft > 0 || (buffers->eof_in == true && buffers->avail_in > 0)) {
+	while (inbytesleft > 0 || buffers->avail_in > 0) {
 		rdiff_stats("[pre] ", rdiff_state, result);
 		result = fill_inbuffer(rdiff_state, buf, &inbytesleft);
 		if (result != RS_DONE && result != RS_BLOCKED) {
@@ -207,7 +203,9 @@  static int apply_rdiff_chunk_cb(void *out, const void *buf, unsigned int len)
 			ERROR("Error processing rdiff chunk: %s", rs_strerror(result));
 			return -1;
 		}
-		if (drain_outbuffer(rdiff_state) != RS_DONE) {
+		drain_run_result = drain_outbuffer(rdiff_state);
+		if (drain_run_result != RS_DONE) {
+			ERROR("drain_outbuffer return error");
 			return -1;
 		}
 		rdiff_stats("[post]", rdiff_state, result);
@@ -332,8 +330,6 @@  static int apply_rdiff_patch(struct img_type *img,
 		goto cleanup;
 	}
 
-	rdiff_state.cpio_input_len = img->size;
-
 	int loglevelmap[] =
 	{
 		[OFF]        = RS_LOG_ERR,