diff mbox

[1/2] exec.c: do not truncate non-empty memory backend file

Message ID 20161024092151.32386-2-haozhong.zhang@intel.com
State New
Headers show

Commit Message

Haozhong Zhang Oct. 24, 2016, 9:21 a.m. UTC
For '-object memory-backend-file,mem-path=foo,size=xyz', if the size of
file 'foo' does not match the given size 'xyz', the current QEMU will
truncate the file to the given size, which may corrupt the existing data
in that file. To avoid such data corruption, this patch disables
truncating non-empty backend files.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 exec.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

Comments

Eduardo Habkost Oct. 25, 2016, 7:30 p.m. UTC | #1
On Mon, Oct 24, 2016 at 05:21:50PM +0800, Haozhong Zhang wrote:
> For '-object memory-backend-file,mem-path=foo,size=xyz', if the size of
> file 'foo' does not match the given size 'xyz', the current QEMU will
> truncate the file to the given size, which may corrupt the existing data
> in that file. To avoid such data corruption, this patch disables
> truncating non-empty backend files.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  exec.c | 37 ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/exec.c b/exec.c
> index e63c5a1..95983c9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1188,6 +1188,15 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> +static int64_t get_file_size(int fd)
> +{
> +    int64_t size = lseek(fd, 0, SEEK_END);
> +    if (size < 0) {
> +        return -errno;
> +    }
> +    return size;
> +}
> +
>  static void *file_ram_alloc(RAMBlock *block,
>                              ram_addr_t memory,
>                              const char *path,
> @@ -1199,6 +1208,7 @@ static void *file_ram_alloc(RAMBlock *block,
>      char *c;
>      void *area = MAP_FAILED;
>      int fd = -1;
> +    int64_t file_size;
>  
>      if (kvm_enabled() && !kvm_has_sync_mmu()) {
>          error_setg(errp,
> @@ -1256,6 +1266,14 @@ static void *file_ram_alloc(RAMBlock *block,
>      block->page_size = qemu_fd_getpagesize(fd);
>      block->mr->align = MAX(block->page_size, QEMU_VMALLOC_ALIGN);
>  
> +    file_size = get_file_size(fd);
> +    if (file_size < 0) {
> +        error_setg_errno(errp, file_size,
> +                         "can't get size of backing store %s",
> +                         path);

What about block devices or filesystems where lseek(SEEK_END) is
not supported? They work today, and would break with this patch.

I suggest just continuing without any errors (and not truncating
the file) in case it is not possible to get the file size.

> +        goto error;
> +    }
> +
>      if (memory < block->page_size) {
>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
>                     "or larger than page size 0x%zx",
> @@ -1266,12 +1284,29 @@ static void *file_ram_alloc(RAMBlock *block,
>      memory = ROUND_UP(memory, block->page_size);
>  
>      /*
> +     * Do not extend/shrink the backend file if it's not empty, or its
> +     * size does not match the aligned 'size=xxx' option. Otherwise,
> +     * it is possible to corrupt the existing data in the file.
> +     *
> +     * Disabling shrinking is not enough. For example, the current
> +     * vNVDIMM implementation stores the guest NVDIMM labels at the
> +     * end of the backend file. If the backend file is later extended,
> +     * QEMU will not be able to find those labels. Therefore,
> +     * extending the non-empty backend file is disabled as well.
> +     */
> +    if (file_size && file_size != memory) {
> +        error_setg(errp, "backing store %s size %"PRId64
> +                   " does not math with aligned 'size' option %"PRIu64,

Did you mean "specified 'size' option"?

> +                   path, file_size, memory);

We already support size being smaller than the backing file and
people may rely on it, so we shouldn't change this behavior. This
can be changed to:
    if (file_size > 0 && file_size < memory)

I also suggest doing this check in a separate patch. The two
changes (skipping truncation of non-empty files and exiting on
size mismatch) don't depend on each other.

> +        goto error;
> +    }
> +    /*
>       * ftruncate is not supported by hugetlbfs in older
>       * hosts, so don't bother bailing out on errors.
>       * If anything goes wrong with it under other filesystems,
>       * mmap will fail.
>       */
> -    if (ftruncate(fd, memory)) {
> +    if (!file_size && ftruncate(fd, memory)) {
>          perror("ftruncate");
>      }
>  
> -- 
> 2.10.1
>
Haozhong Zhang Oct. 26, 2016, 4:19 a.m. UTC | #2
On 10/25/16 17:30 -0200, Eduardo Habkost wrote:
>On Mon, Oct 24, 2016 at 05:21:50PM +0800, Haozhong Zhang wrote:
>> For '-object memory-backend-file,mem-path=foo,size=xyz', if the size of
>> file 'foo' does not match the given size 'xyz', the current QEMU will
>> truncate the file to the given size, which may corrupt the existing data
>> in that file. To avoid such data corruption, this patch disables
>> truncating non-empty backend files.
>>
>> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
>> ---
>>  exec.c | 37 ++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/exec.c b/exec.c
>> index e63c5a1..95983c9 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1188,6 +1188,15 @@ void qemu_mutex_unlock_ramlist(void)
>>  }
>>
>>  #ifdef __linux__
>> +static int64_t get_file_size(int fd)
>> +{
>> +    int64_t size = lseek(fd, 0, SEEK_END);
>> +    if (size < 0) {
>> +        return -errno;
>> +    }
>> +    return size;
>> +}
>> +
>>  static void *file_ram_alloc(RAMBlock *block,
>>                              ram_addr_t memory,
>>                              const char *path,
>> @@ -1199,6 +1208,7 @@ static void *file_ram_alloc(RAMBlock *block,
>>      char *c;
>>      void *area = MAP_FAILED;
>>      int fd = -1;
>> +    int64_t file_size;
>>
>>      if (kvm_enabled() && !kvm_has_sync_mmu()) {
>>          error_setg(errp,
>> @@ -1256,6 +1266,14 @@ static void *file_ram_alloc(RAMBlock *block,
>>      block->page_size = qemu_fd_getpagesize(fd);
>>      block->mr->align = MAX(block->page_size, QEMU_VMALLOC_ALIGN);
>>
>> +    file_size = get_file_size(fd);
>> +    if (file_size < 0) {
>> +        error_setg_errno(errp, file_size,
>> +                         "can't get size of backing store %s",
>> +                         path);
>
>What about block devices or filesystems where lseek(SEEK_END) is
>not supported? They work today, and would break with this patch.
>
>I suggest just continuing without any errors (and not truncating
>the file) in case it is not possible to get the file size.
>

If it fails to get file size, I'd fall back to the 'size' option. If
it's not zero, QEMU will not truncate the file. If 'memory' is zero,
QEMU will error-out a message like "cannot get file size, 'size'
option should be provided".

>> +        goto error;
>> +    }
>> +
>>      if (memory < block->page_size) {
>>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
>>                     "or larger than page size 0x%zx",
>> @@ -1266,12 +1284,29 @@ static void *file_ram_alloc(RAMBlock *block,
>>      memory = ROUND_UP(memory, block->page_size);
>>
>>      /*
>> +     * Do not extend/shrink the backend file if it's not empty, or its
>> +     * size does not match the aligned 'size=xxx' option. Otherwise,
>> +     * it is possible to corrupt the existing data in the file.
>> +     *
>> +     * Disabling shrinking is not enough. For example, the current
>> +     * vNVDIMM implementation stores the guest NVDIMM labels at the
>> +     * end of the backend file. If the backend file is later extended,
>> +     * QEMU will not be able to find those labels. Therefore,
>> +     * extending the non-empty backend file is disabled as well.
>> +     */
>> +    if (file_size && file_size != memory) {
>> +        error_setg(errp, "backing store %s size %"PRId64
>> +                   " does not math with aligned 'size' option %"PRIu64,
>
>Did you mean "specified 'size' option"?
>

I meant aligned, because the original 'size' option might have been
aligned to a value larger than file_size at this point. I'll use
'specified' in the next version.

>> +                   path, file_size, memory);
>
>We already support size being smaller than the backing file and
>people may rely on it, so we shouldn't change this behavior. This
>can be changed to:
>    if (file_size > 0 && file_size < memory)
>

will change

>I also suggest doing this check in a separate patch. The two
>changes (skipping truncation of non-empty files and exiting on
>size mismatch) don't depend on each other.
>

will do

Thanks,
Haozhong

>> +        goto error;
>> +    }
>> +    /*
>>       * ftruncate is not supported by hugetlbfs in older
>>       * hosts, so don't bother bailing out on errors.
>>       * If anything goes wrong with it under other filesystems,
>>       * mmap will fail.
>>       */
>> -    if (ftruncate(fd, memory)) {
>> +    if (!file_size && ftruncate(fd, memory)) {
>>          perror("ftruncate");
>>      }
>>
>> --
>> 2.10.1
>>
>
>-- 
>Eduardo
diff mbox

Patch

diff --git a/exec.c b/exec.c
index e63c5a1..95983c9 100644
--- a/exec.c
+++ b/exec.c
@@ -1188,6 +1188,15 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
+static int64_t get_file_size(int fd)
+{
+    int64_t size = lseek(fd, 0, SEEK_END);
+    if (size < 0) {
+        return -errno;
+    }
+    return size;
+}
+
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1199,6 +1208,7 @@  static void *file_ram_alloc(RAMBlock *block,
     char *c;
     void *area = MAP_FAILED;
     int fd = -1;
+    int64_t file_size;
 
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
         error_setg(errp,
@@ -1256,6 +1266,14 @@  static void *file_ram_alloc(RAMBlock *block,
     block->page_size = qemu_fd_getpagesize(fd);
     block->mr->align = MAX(block->page_size, QEMU_VMALLOC_ALIGN);
 
+    file_size = get_file_size(fd);
+    if (file_size < 0) {
+        error_setg_errno(errp, file_size,
+                         "can't get size of backing store %s",
+                         path);
+        goto error;
+    }
+
     if (memory < block->page_size) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                    "or larger than page size 0x%zx",
@@ -1266,12 +1284,29 @@  static void *file_ram_alloc(RAMBlock *block,
     memory = ROUND_UP(memory, block->page_size);
 
     /*
+     * Do not extend/shrink the backend file if it's not empty, or its
+     * size does not match the aligned 'size=xxx' option. Otherwise,
+     * it is possible to corrupt the existing data in the file.
+     *
+     * Disabling shrinking is not enough. For example, the current
+     * vNVDIMM implementation stores the guest NVDIMM labels at the
+     * end of the backend file. If the backend file is later extended,
+     * QEMU will not be able to find those labels. Therefore,
+     * extending the non-empty backend file is disabled as well.
+     */
+    if (file_size && file_size != memory) {
+        error_setg(errp, "backing store %s size %"PRId64
+                   " does not math with aligned 'size' option %"PRIu64,
+                   path, file_size, memory);
+        goto error;
+    }
+    /*
      * ftruncate is not supported by hugetlbfs in older
      * hosts, so don't bother bailing out on errors.
      * If anything goes wrong with it under other filesystems,
      * mmap will fail.
      */
-    if (ftruncate(fd, memory)) {
+    if (!file_size && ftruncate(fd, memory)) {
         perror("ftruncate");
     }