@@ -276,6 +276,24 @@ extern void FASTCALL(raise_softirq_irqoff(unsigned int
nr));
extern void FASTCALL(raise_softirq(unsigned int nr));
+/*
+ * softirq delayed works : should be delayed at do_softirq() end
+ */
+struct softirq_delay {
+ struct softirq_delay *next;
+ void (*func)(struct softirq_delay *);
+};
+
+int softirq_delay_queue(struct softirq_delay *sdel);
+
+static inline void softirq_delay_init(struct softirq_delay *sdel,
+ void (*func)(struct softirq_delay *))
+{
+ sdel->next = NULL;
+ sdel->func = func;
+}
+
+
/* Tasklets --- multithreaded analogue of BHs.
Main feature differing them of generic softirqs: tasklet
@@ -11,19 +11,21 @@
#ifndef _LINUX_TRACE_IRQFLAGS_H
#define _LINUX_TRACE_IRQFLAGS_H
+#define softirq_enter() do { current->softirq_context++; } while (0)
+#define softirq_exit() do { current->softirq_context--; } while (0)
+#define softirq_context(p) ((p)->softirq_context)
+#define running_from_softirq() (softirq_context(current) > 0)
+
#ifdef CONFIG_TRACE_IRQFLAGS
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
# define trace_hardirq_context(p) ((p)->hardirq_context)
-# define trace_softirq_context(p) ((p)->softirq_context)
# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
# define trace_hardirq_enter() do { current->hardirq_context++; } while
(0)
# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
-# define trace_softirq_enter() do { current->softirq_context++; } while
(0)
-# define trace_softirq_exit() do { current->softirq_context--; } while (0)
# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
#else
# define trace_hardirqs_on() do { } while (0)
@@ -31,13 +33,10 @@
# define trace_softirqs_on(ip) do { } while (0)
# define trace_softirqs_off(ip) do { } while (0)
# define trace_hardirq_context(p) 0
-# define trace_softirq_context(p) 0
# define trace_hardirqs_enabled(p) 0
# define trace_softirqs_enabled(p) 0
# define trace_hardirq_enter() do { } while (0)
# define trace_hardirq_exit() do { } while (0)
-# define trace_softirq_enter() do { } while (0)
-# define trace_softirq_exit() do { } while (0)
# define INIT_TRACE_IRQFLAGS
#endif
@@ -115,12 +115,16 @@ enum sock_shutdown_cmd {
struct socket {
socket_state state;
unsigned long flags;
- const struct proto_ops *ops;
+ /*
+ * Please keep fasync_list & wait fields in the same cache line
+ */
struct fasync_struct *fasync_list;
+ wait_queue_head_t wait;
+
struct file *file;
struct sock *sk;
- wait_queue_head_t wait;
short type;
+ const struct proto_ops *ops;
};
struct vm_area_struct;
@@ -1111,8 +1111,8 @@ struct task_struct {
unsigned long softirq_enable_ip;
unsigned int softirq_enable_event;
int hardirq_context;
- int softirq_context;
#endif
+ int softirq_context;
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 30UL
u64 curr_chain_key;
@@ -236,6 +236,7 @@ struct sock {
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
struct proto *sk_prot_creator;
+ struct softirq_delay sk_delay;
rwlock_t sk_callback_lock;
int sk_err,
sk_err_soft;
@@ -859,6 +860,7 @@ extern void *sock_kmalloc(struct sock *sk, int size,
gfp_t priority);
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
extern void sk_send_sigurg(struct sock *sk);
+extern void inet_def_readable(struct sock *sk, int len);
/*
* Functions to fill in entries in struct proto_ops when a protocol
@@ -28,6 +28,7 @@ static __inline__ int udplite_getfrag(void *from, char
*to, int offset,
/* Designate sk as UDP-Lite socket */
static inline int udplite_sk_init(struct sock *sk)
{
+ sk->sk_data_ready = inet_def_readable;
udp_sk(sk)->pcflag = UDPLITE_BIT;
return 0;
}
@@ -1643,7 +1643,7 @@ print_usage_bug(struct task_struct *curr, struct
held_lock *this,
printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
- trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
+ softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
trace_hardirqs_enabled(curr),
trace_softirqs_enabled(curr));
print_lock(this);
@@ -194,6 +194,42 @@ void local_bh_enable_ip(unsigned long ip)
}
EXPORT_SYMBOL(local_bh_enable_ip);
+
+#define SOFTIRQ_DELAY_END (struct softirq_delay *)1L
+static DEFINE_PER_CPU(struct softirq_delay *, softirq_delay_head) = {
+ SOFTIRQ_DELAY_END
+};
+
+/*
+ * Caller must disable preemption, and take care of appropriate
+ * locking and refcounting
+ */
+int softirq_delay_queue(struct softirq_delay *sdel)
+{
+ if (!sdel->next) {
+ sdel->next = __get_cpu_var(softirq_delay_head);
+ __get_cpu_var(softirq_delay_head) = sdel;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Because locking is provided by subsystem, please note
+ * that sdel->func(sdel) is responsible for setting sdel->next to NULL
+ */
+static void softirq_delay_exec(void)
+{
+ struct softirq_delay *sdel;
+
+ while ((sdel = __get_cpu_var(softirq_delay_head)) != SOFTIRQ_DELAY_END) {
+ __get_cpu_var(softirq_delay_head) = sdel->next;
+ sdel->func(sdel); /* sdel->next = NULL;*/
+ }
+}
+
+
+
/*
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
* and we fall back to softirqd after that.
@@ -216,7 +252,7 @@ asmlinkage void __do_softirq(void)
account_system_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0));
- trace_softirq_enter();
+ softirq_enter();
cpu = smp_processor_id();
restart:
@@ -236,6 +272,8 @@ restart:
pending >>= 1;
} while (pending);
+ softirq_delay_exec();
+
local_irq_disable();
pending = local_softirq_pending();
@@ -245,7 +283,7 @@ restart:
if (pending)
wakeup_softirqd();
- trace_softirq_exit();
+ softirq_exit();
account_system_vtime(current);
_local_bh_enable();
@@ -157,11 +157,11 @@ static void init_shared_classes(void)
#define SOFTIRQ_ENTER() \
local_bh_disable(); \
local_irq_disable(); \
- trace_softirq_enter(); \
+ softirq_enter(); \
WARN_ON(!in_softirq());
#define SOFTIRQ_EXIT() \
- trace_softirq_exit(); \
+ softirq_exit(); \
local_irq_enable(); \
local_bh_enable();
@@ -213,6 +213,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
/* Maximal space eaten by iovec or ancilliary data plus some space */
int sysctl_optmem_max __read_mostly = sizeof(unsigned
long)*(2*UIO_MAXIOV+512);
+static void sock_readable_defer(struct softirq_delay *sdel);
+
static int sock_set_timeout(long *timeo_p, char __user *optval, int
optlen)
{
struct timeval tv;
@@ -996,6 +998,7 @@ struct sock *sk_clone(const struct sock *sk, const
gfp_t priority)
#endif
rwlock_init(&newsk->sk_dst_lock);
+ softirq_delay_init(&newsk->sk_delay, sock_readable_defer);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1509,6 +1512,45 @@ static void sock_def_readable(struct sock *sk, int
len)
read_unlock(&sk->sk_callback_lock);
}
+/*
+ * helper function called by softirq_delay_exec(),
+ * if inet_def_readable() queued us.
+ */
+static void sock_readable_defer(struct softirq_delay *sdel)
+{
+ struct sock *sk = container_of(sdel, struct sock, sk_delay);
+
+ sdel->next = NULL;
+ /*
+ * At this point, we dont own a lock on socket, only a reference.
+ * We must commit above write, or another cpu could miss a wakeup
+ */
+ smp_wmb();
+ sock_def_readable(sk, 0);
+ sock_put(sk);
+}
+
+/*
+ * Custom version of sock_def_readable()
+ * We want to defer scheduler processing at the end of do_softirq()
+ * Called with socket locked.
+ */
+void inet_def_readable(struct sock *sk, int len)
+{
+ if (running_from_softirq()) {
+ if (softirq_delay_queue(&sk->sk_delay))
+ /*
+ * If we queued this socket, take a reference on it
+ * Caller owns socket lock, so write to sk_delay.next
+ * will be committed before unlock.
+ */
+ sock_hold(sk);
+ } else
+ sock_def_readable(sk, len);
+}
+
+EXPORT_SYMBOL(inet_def_readable);
+
static void sock_def_write_space(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
@@ -1586,6 +1628,7 @@ void sock_init_data(struct socket *sock, struct sock
*sk)
sk->sk_sleep = NULL;
rwlock_init(&sk->sk_dst_lock);
+ softirq_delay_init(&sk->sk_delay, sock_readable_defer);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
@@ -1226,6 +1226,12 @@ int udp_destroy_sock(struct sock *sk)
return 0;
}
+static int udp_init_sock(struct sock *sk)
+{
+ sk->sk_data_ready = inet_def_readable;
+ return 0;
+}
+
/*
* Socket option code for UDP
*/
@@ -1439,6 +1445,7 @@ struct proto udp_prot = {
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_init_sock,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
@@ -856,6 +856,12 @@ int udpv6_destroy_sock(struct sock *sk)
return 0;
}
+static int udpv6_init_sock(struct sock *sk)
+{
+ sk->sk_data_ready = inet_def_readable;
+ return 0;
+}
+
/*
* Socket option code for UDP
*/
@@ -979,6 +985,7 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udpv6_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,