Patchwork [2/2] UBUNTU: SAUCE: trace: add trace events for open(), exec() and uselib()

login
register
mail settings
Submitter Scott James Remnant
Date Oct. 27, 2009, 10:05 a.m.
Message ID <E1N2qD9-0007zT-6S@zelda.netsplit.com>
Download mbox | patch
Permalink /patch/37017/
State Accepted
Headers show

Comments

Scott James Remnant - Oct. 27, 2009, 10:05 a.m.
This patch uses TRACE_EVENT to add tracepoints for the open(),
exec() and uselib() syscalls so that ureadahead can cheaply trace
the boot sequence to determine what to read to speed up the next.

It's not upstream because it will need to be rebased onto the syscall
trace events whenever that gets merged, and is a stop-gap.

Signed-off-by: Scott James Remnant <scott@ubuntu.com>
---
 fs/exec.c                 |    8 +++++
 fs/open.c                 |    4 ++
 include/trace/events/fs.h |   71 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/fs.h
Stefan Bader - Oct. 29, 2009, 10:35 a.m.
Ok, as those trace events add negligible overhead when inactive and 
also the events we trace should not be done exhaustively and improve
the boot process as tests show.

Scott James Remnant wrote:
> This patch uses TRACE_EVENT to add tracepoints for the open(),
> exec() and uselib() syscalls so that ureadahead can cheaply trace
> the boot sequence to determine what to read to speed up the next.
> 
> It's not upstream because it will need to be rebased onto the syscall
> trace events whenever that gets merged, and is a stop-gap.
> 
> Signed-off-by: Scott James Remnant <scott@ubuntu.com>

Acked-by: Stefan Bader <stefan.bader@canonical.com>

> ---
>  fs/exec.c                 |    8 +++++
>  fs/open.c                 |    4 ++
>  include/trace/events/fs.h |   71 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 83 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/events/fs.h
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 172ceb6..c936999 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -56,6 +56,8 @@
>  #include <linux/fsnotify.h>
>  #include <linux/fs_struct.h>
>  
> +#include <trace/events/fs.h>
> +
>  #include <asm/uaccess.h>
>  #include <asm/mmu_context.h>
>  #include <asm/tlb.h>
> @@ -130,6 +132,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
>  
>  	fsnotify_open(file->f_path.dentry);
>  
> +	tmp = getname(library);
> +	trace_uselib(tmp);
> +	putname(library);
> +
>  	error = -ENOEXEC;
>  	if(file->f_op) {
>  		struct linux_binfmt * fmt;
> @@ -665,6 +671,8 @@ struct file *open_exec(const char *name)
>  
>  	fsnotify_open(file->f_path.dentry);
>  
> +	trace_open_exec(name);
> +
>  	err = deny_write_access(file);
>  	if (err)
>  		goto exit;
> diff --git a/fs/open.c b/fs/open.c
> index 04b9aad..41c87f3 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -31,6 +31,9 @@
>  #include <linux/falloc.h>
>  #include <linux/fs_struct.h>
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/fs.h>
> +
>  int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  {
>  	int retval = -ENODEV;
> @@ -1041,6 +1044,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
>  			} else {
>  				fsnotify_open(f->f_path.dentry);
>  				fd_install(fd, f);
> +				trace_do_sys_open(tmp, flags, mode);
>  			}
>  		}
>  		putname(tmp);
> diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
> new file mode 100644
> index 0000000..e967c55
> --- /dev/null
> +++ b/include/trace/events/fs.h
> @@ -0,0 +1,71 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM fs
> +
> +#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_FS_H
> +
> +#include <linux/fs.h>
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(do_sys_open,
> +
> +	TP_PROTO(char *filename, int flags, int mode),
> +
> +	TP_ARGS(filename, flags, mode),
> +
> +	TP_STRUCT__entry(
> +		__string(	filename, filename		)
> +		__field(	int, flags			)
> +		__field(	int, mode			)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(filename, filename);
> +		__entry->flags = flags;
> +		__entry->mode = mode;
> +	),
> +
> +	TP_printk("\"%s\" %x %o",
> +		  __get_str(filename), __entry->flags, __entry->mode)
> +);
> +
> +TRACE_EVENT(uselib,
> +
> +	TP_PROTO(char *filename),
> +
> +	TP_ARGS(filename),
> +
> +	TP_STRUCT__entry(
> +		__string(	filename, filename		)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(filename, filename);
> +	),
> +
> +	TP_printk("\"%s\"",
> +		  __get_str(filename))
> +);
> +
> +TRACE_EVENT(open_exec,
> +
> +	TP_PROTO(char *filename),
> +
> +	TP_ARGS(filename),
> +
> +	TP_STRUCT__entry(
> +		__string(	filename, filename		)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(filename, filename);
> +	),
> +
> +	TP_printk("\"%s\"",
> +		  __get_str(filename))
> +);
> +
> +#endif /* _TRACE_FS_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
Andy Whitcroft - Oct. 29, 2009, 11 a.m.
On Tue, Oct 27, 2009 at 10:05:32AM +0000, Scott James Remnant wrote:
> This patch uses TRACE_EVENT to add tracepoints for the open(),
> exec() and uselib() syscalls so that ureadahead can cheaply trace
> the boot sequence to determine what to read to speed up the next.
> 
> It's not upstream because it will need to be rebased onto the syscall
> trace events whenever that gets merged, and is a stop-gap.
> 
> Signed-off-by: Scott James Remnant <scott@ubuntu.com>
> ---
>  fs/exec.c                 |    8 +++++
>  fs/open.c                 |    4 ++
>  include/trace/events/fs.h |   71 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 83 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/events/fs.h
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 172ceb6..c936999 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -56,6 +56,8 @@
>  #include <linux/fsnotify.h>
>  #include <linux/fs_struct.h>
>  
> +#include <trace/events/fs.h>
> +
>  #include <asm/uaccess.h>
>  #include <asm/mmu_context.h>
>  #include <asm/tlb.h>
> @@ -130,6 +132,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
>  
>  	fsnotify_open(file->f_path.dentry);
>  
> +	tmp = getname(library);
> +	trace_uselib(tmp);
> +	putname(library);
> +

So the other two tracepoints look basically sane and cheap when off.
This one does add this getname/putname pair.  This appears to allocate
another copy of the filename and copy it from userspace.
getname() may fail to allocate memory and return a failure code, as the
trace point does not appear to check the string is valid would you not
need to check ERR_PTR(tmp) before calling the tracepint.

Can we not just hold the current filename a smidge longer and use that one,
that would be a lot less expensive me thinks if slightly more complex as
a patch.  This would make sure we could log it if we did it and avoid
the cost?

-apw
Andy Whitcroft - Oct. 29, 2009, 12:17 p.m.
> Can we not just hold the current filename a smidge longer and use that one,
> that would be a lot less expensive me thinks if slightly more complex as
> a patch.  This would make sure we could log it if we did it and avoid
> the cost?

After discussing this it seems the uselib modifications are not required
therefore we can drop that part while we sort out the issues here.
Without that part:

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw

Patch

diff --git a/fs/exec.c b/fs/exec.c
index 172ceb6..c936999 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,8 @@ 
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 
+#include <trace/events/fs.h>
+
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
@@ -130,6 +132,10 @@  SYSCALL_DEFINE1(uselib, const char __user *, library)
 
 	fsnotify_open(file->f_path.dentry);
 
+	tmp = getname(library);
+	trace_uselib(tmp);
+	putname(library);
+
 	error = -ENOEXEC;
 	if(file->f_op) {
 		struct linux_binfmt * fmt;
@@ -665,6 +671,8 @@  struct file *open_exec(const char *name)
 
 	fsnotify_open(file->f_path.dentry);
 
+	trace_open_exec(name);
+
 	err = deny_write_access(file);
 	if (err)
 		goto exit;
diff --git a/fs/open.c b/fs/open.c
index 04b9aad..41c87f3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,6 +31,9 @@ 
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs.h>
+
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
@@ -1041,6 +1044,7 @@  long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 			} else {
 				fsnotify_open(f->f_path.dentry);
 				fd_install(fd, f);
+				trace_do_sys_open(tmp, flags, mode);
 			}
 		}
 		putname(tmp);
diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
new file mode 100644
index 0000000..e967c55
--- /dev/null
+++ b/include/trace/events/fs.h
@@ -0,0 +1,71 @@ 
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs
+
+#if !defined(_TRACE_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_H
+
+#include <linux/fs.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(do_sys_open,
+
+	TP_PROTO(char *filename, int flags, int mode),
+
+	TP_ARGS(filename, flags, mode),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+		__field(	int, flags			)
+		__field(	int, mode			)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+		__entry->flags = flags;
+		__entry->mode = mode;
+	),
+
+	TP_printk("\"%s\" %x %o",
+		  __get_str(filename), __entry->flags, __entry->mode)
+);
+
+TRACE_EVENT(uselib,
+
+	TP_PROTO(char *filename),
+
+	TP_ARGS(filename),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+	),
+
+	TP_printk("\"%s\"",
+		  __get_str(filename))
+);
+
+TRACE_EVENT(open_exec,
+
+	TP_PROTO(char *filename),
+
+	TP_ARGS(filename),
+
+	TP_STRUCT__entry(
+		__string(	filename, filename		)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, filename);
+	),
+
+	TP_printk("\"%s\"",
+		  __get_str(filename))
+);
+
+#endif /* _TRACE_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>