diff mbox series

[4/4] um: Add support for DISCARD in the UBD Driver

Message ID 20181114170943.12065-4-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series None | expand

Commit Message

Anton Ivanov Nov. 14, 2018, 5:09 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Support for DISCARD and WRITE_ZEROES in the ubd driver using
fallocate.

DISCARD is enabled by default and can be disabled using a new
UBD command line flag.

If the underlying fs on which the UBD image is stored does not
support DISCARD the support for both DISCARD and WRITE_ZEROES
is turned off.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/file.c     | 10 +++++++
 3 files changed, 66 insertions(+), 11 deletions(-)

Comments

Anton Ivanov Nov. 14, 2018, 6:30 p.m. UTC | #1
On 11/14/18 5:09 PM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> Support for DISCARD and WRITE_ZEROES in the ubd driver using
> fallocate.
>
> DISCARD is enabled by default and can be disabled using a new
> UBD command line flag.
>
> If the underlying fs on which the UBD image is stored does not
> support DISCARD the support for both DISCARD and WRITE_ZEROES
> is turned off.
>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   arch/um/drivers/ubd_kern.c  | 66 +++++++++++++++++++++++++++++++++++++--------
>   arch/um/include/shared/os.h |  1 +
>   arch/um/os-Linux/file.c     | 10 +++++++
>   3 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
> index 1672e3c49bfb..e85c7f7fda6f 100644
> --- a/arch/um/drivers/ubd_kern.c
> +++ b/arch/um/drivers/ubd_kern.c
> @@ -154,6 +154,7 @@ struct ubd {
>   	struct openflags openflags;
>   	unsigned shared:1;
>   	unsigned no_cow:1;
> +	unsigned no_trim:1;
>   	struct cow cow;
>   	struct platform_device pdev;
>   	struct request_queue *queue;
> @@ -177,6 +178,7 @@ struct ubd {
>   	.boot_openflags =	OPEN_FLAGS, \
>   	.openflags =		OPEN_FLAGS, \
>   	.no_cow =               0, \
> +	.no_trim =		0, \
>   	.shared =		0, \
>   	.cow =			DEFAULT_COW, \
>   	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
> @@ -323,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		*index_out = n;
>   
>   	err = -EINVAL;
> -	for (i = 0; i < sizeof("rscd="); i++) {
> +	for (i = 0; i < sizeof("rscdt="); i++) {
>   		switch (*str) {
>   		case 'r':
>   			flags.w = 0;
> @@ -337,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
>   		case 'c':
>   			ubd_dev->shared = 1;
>   			break;
> +		case 't':
> +			ubd_dev->no_trim = 1;
> +			break;
>   		case '=':
>   			str++;
>   			goto break_loop;
>   		default:
>   			*error_out = "Expected '=' or flag letter "
> -				"(r, s, c, or d)";
> +				"(r, s, c, t or d)";
>   			goto out;
>   		}
>   		str++;
> @@ -415,6 +420,7 @@ __uml_help(ubd_setup,
>   "    'c' will cause the device to be treated as being shared between multiple\n"
>   "    UMLs and file locking will be turned off - this is appropriate for a\n"
>   "    cluster filesystem and inappropriate at almost all other times.\n\n"
> +"    't' will disable trim/discard support on the device (enabled by default).\n\n"
>   );
>   
>   static int udb_setup(char *str)
> @@ -513,9 +519,17 @@ static void ubd_handler(void)
>   		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
>   			struct io_thread_req *io_req = (*irq_req_buffer)[count];
>   
> -			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> -				__blk_mq_end_request(io_req->req, io_req->error);
> -
> +			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
> +				blk_queue_max_discard_sectors(io_req->req->q, 0);
> +				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
> +				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
> +			}
> +			if ((io_req->error) || (io_req->buffer == NULL))
> +				blk_mq_end_request(io_req->req, io_req->error);
> +			else {
> +				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
> +					__blk_mq_end_request(io_req->req, io_req->error);
> +			}
>   			kfree(io_req);
>   		}
>   	}
> @@ -829,6 +843,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
>   		if(err < 0) goto error;
>   		ubd_dev->cow.fd = err;
>   	}
> +	if (ubd_dev->no_trim == 0) {
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
> +		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
> +		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
> +		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);


Sorry, messed it up when reapplying,  this should have set the zero 
limits as well, not repeat the discard ones.

A revised version will be resent shortly.

A



> +	}
>   	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
>   	return 0;
>    error:
> @@ -1372,6 +1394,10 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
>   	case REQ_OP_WRITE:
>   		ret = queue_rw_req(hctx, req);
>   		break;
> +	case REQ_OP_DISCARD:
> +	case REQ_OP_WRITE_ZEROES:
> +		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
> +		break;
>   	default:
>   		WARN_ON_ONCE(1);
>   		res = BLK_STS_NOTSUPP;
> @@ -1463,7 +1489,7 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
>   			  sizeof(req->bitmap_words), req->cow_offset);
> -	if(n != sizeof(req->bitmap_words))
> +	if (n != sizeof(req->bitmap_words))
>   		return map_error(-n);
>   
>   	return map_error(0);
> @@ -1471,11 +1497,13 @@ static int update_bitmap(struct io_thread_req *req)
>   
>   static void do_io(struct io_thread_req *req)
>   {
> -	char *buf;
> +	char *buf = NULL;
>   	unsigned long len;
>   	int n, nsectors, start, end, bit;
>   	__u64 off;
>   
> +	/* FLUSH is really a special case, we cannot "case" it with others */
> +
>   	if (req_op(req->req) == REQ_OP_FLUSH) {
>   		/* fds[0] is always either the rw image or our cow file */
>   		req->error = map_error(-os_sync_file(req->fds[0]));
> @@ -1495,26 +1523,42 @@ static void do_io(struct io_thread_req *req)
>   		off = req->offset + req->offsets[bit] +
>   			start * req->sectorsize;
>   		len = (end - start) * req->sectorsize;
> -		buf = &req->buffer[start * req->sectorsize];
> +		if (req->buffer != NULL)
> +			buf = &req->buffer[start * req->sectorsize];
>   
> -		if (req_op(req->req) == REQ_OP_READ) {
> +		switch (req_op(req->req)) {
> +		case REQ_OP_READ:
>   			n = 0;
>   			do {
>   				buf = &buf[n];
>   				len -= n;
>   				n = os_pread_file(req->fds[bit], buf, len, off);
> -				if(n < 0){
> +				if (n < 0) {
>   					req->error = map_error(-n);
>   					return;
>   				}
>   			} while((n < len) && (n != 0));
>   			if (n < len) memset(&buf[n], 0, len - n);
> -		} else {
> +			break;
> +		case REQ_OP_WRITE:
>   			n = os_pwrite_file(req->fds[bit], buf, len, off);
>   			if(n != len){
>   				req->error = map_error(-n);
>   				return;
>   			}
> +			break;
> +		case REQ_OP_DISCARD:
> +		case REQ_OP_WRITE_ZEROES:
> +			n = os_falloc_punch(req->fds[bit], off, len);
> +			if (n) {
> +				req->error = map_error(-n);
> +				return;
> +			}
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			req->error = BLK_STS_NOTSUPP;
> +			return;
>   		}
>   
>   		start = end;
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 048ae37eb5aa..ebf23012a59b 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
>   extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
> +extern int os_falloc_punch(int fd, unsigned long long offset, int count);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index c0197097c86e..f25b110d4e70 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
>   {
>   	return makedev(major, minor);
>   }
> +
> +int os_falloc_punch(int fd, unsigned long long offset, int len)
> +{
> +	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
> +
> +	if (n < 0)
> +		return -errno;
> +	return n;
> +}
> +
diff mbox series

Patch

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 1672e3c49bfb..e85c7f7fda6f 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -154,6 +154,7 @@  struct ubd {
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
 	struct request_queue *queue;
@@ -177,6 +178,7 @@  struct ubd {
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -323,7 +325,7 @@  static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -337,12 +339,15 @@  static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -415,6 +420,7 @@  __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -513,9 +519,17 @@  static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 
-			if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-				__blk_mq_end_request(io_req->req, io_req->error);
-
+			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
+				blk_queue_max_discard_sectors(io_req->req->q, 0);
+				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
+				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
+			}
+			if ((io_req->error) || (io_req->buffer == NULL))
+				blk_mq_end_request(io_req->req, io_req->error);
+			else {
+				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+					__blk_mq_end_request(io_req->req, io_req->error);
+			}
 			kfree(io_req);
 		}
 	}
@@ -829,6 +843,14 @@  static int ubd_open_dev(struct ubd *ubd_dev)
 		if(err < 0) goto error;
 		ubd_dev->cow.fd = err;
 	}
+	if (ubd_dev->no_trim == 0) {
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+	}
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 	return 0;
  error:
@@ -1372,6 +1394,10 @@  static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_WRITE:
 		ret = queue_rw_req(hctx, req);
 		break;
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		res = BLK_STS_NOTSUPP;
@@ -1463,7 +1489,7 @@  static int update_bitmap(struct io_thread_req *req)
 
 	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
 			  sizeof(req->bitmap_words), req->cow_offset);
-	if(n != sizeof(req->bitmap_words))
+	if (n != sizeof(req->bitmap_words))
 		return map_error(-n);
 
 	return map_error(0);
@@ -1471,11 +1497,13 @@  static int update_bitmap(struct io_thread_req *req)
 
 static void do_io(struct io_thread_req *req)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
 	__u64 off;
 
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
 	if (req_op(req->req) == REQ_OP_FLUSH) {
 		/* fds[0] is always either the rw image or our cow file */
 		req->error = map_error(-os_sync_file(req->fds[0]));
@@ -1495,26 +1523,42 @@  static void do_io(struct io_thread_req *req)
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (req->buffer != NULL)
+			buf = &req->buffer[start * req->sectorsize];
 
-		if (req_op(req->req) == REQ_OP_READ) {
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
 				n = os_pread_file(req->fds[bit], buf, len, off);
-				if(n < 0){
+				if (n < 0) {
 					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
+			break;
+		case REQ_OP_WRITE:
 			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
 				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 048ae37eb5aa..ebf23012a59b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -175,6 +175,7 @@  extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c0197097c86e..f25b110d4e70 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -610,3 +610,13 @@  unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+