@@ -315,7 +315,8 @@
#define H_SCM_HEALTH 0x400
#define H_SCM_PERFORMANCE_STATS 0x418
#define H_RPT_INVALIDATE 0x448
-#define MAX_HCALL_OPCODE H_RPT_INVALIDATE
+#define H_IDLE_HINT 0x44C
+#define MAX_HCALL_OPCODE H_IDLE_HINT
/* Scope args for H_SCM_UNBIND_ALL */
#define H_UNBIND_SCOPE_ALL (0x1)
@@ -931,6 +931,17 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (tvcpu->arch.ceded)
kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
+ case H_IDLE_HINT:
+ target = kvmppc_get_gpr(vcpu, 4);
+ tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ if (!tvcpu) {
+ ret = H_PARAMETER;
+ break;
+ }
+ ret = kvm_vcpu_provide_idle_hint(tvcpu);
+ kvmppc_set_gpr(vcpu, 4, ret);
+ ret = H_SUCCESS;
+ break;
case H_CONFER:
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
@@ -1145,6 +1156,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_CEDE:
case H_PROD:
case H_CONFER:
+ case H_IDLE_HINT:
case H_REGISTER_VPA:
case H_SET_MODE:
case H_LOGICAL_CI_LOAD:
@@ -5359,6 +5371,7 @@ static unsigned int default_hcall_list[] = {
H_PROD,
H_CONFER,
H_REGISTER_VPA,
+ H_IDLE_HINT,
#ifdef CONFIG_KVM_XICS
H_EOI,
H_CPPR,
@@ -46,6 +46,7 @@
{H_CEDE, "H_CEDE"}, \
{H_CONFER, "H_CONFER"}, \
{H_PROD, "H_PROD"}, \
+ {H_IDLE_HINT, "H_IDLE_HINT"}, \
{H_GET_PPP, "H_GET_PPP"}, \
{H_SET_PPP, "H_SET_PPP"}, \
{H_PURR, "H_PURR"}, \
@@ -843,6 +843,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
+unsigned long kvm_vcpu_provide_idle_hint(struct kvm_vcpu *target);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
@@ -1688,6 +1688,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
+extern unsigned long get_idle_hint(struct task_struct *p);
/**
* task_nice - return the nice value of a given task.
@@ -6812,6 +6812,19 @@ int __sched yield_to(struct task_struct *p, bool preempt)
}
EXPORT_SYMBOL_GPL(yield_to);
+/*
+ * Provide hint to the VM indicating if the previous vCPU can be scheduled
+ * instantly or not.
+ */
+unsigned long __sched get_idle_hint(struct task_struct *p)
+{
+ unsigned long ret = 0;
+ if (p->sched_class->get_idle_hint)
+ ret = p->sched_class->get_idle_hint(p);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(get_idle_hint);
+
int io_schedule_prepare(void)
{
int old_iowait = current->in_iowait;
@@ -7236,6 +7236,16 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
return true;
}
+static unsigned long get_idle_hint_fair(struct task_struct *p)
+{
+ unsigned int prev_cpu = task_cpu(p);
+
+ if (available_idle_cpu(prev_cpu) || sched_idle_cpu(prev_cpu))
+ return 1;
+
+ return 0;
+}
+
#ifdef CONFIG_SMP
/**************************************************
* Fair scheduling class load-balancing methods.
@@ -11264,6 +11274,8 @@ DEFINE_SCHED_CLASS(fair) = {
.task_change_group = task_change_group_fair,
#endif
+ .get_idle_hint = get_idle_hint_fair,
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
@@ -1871,6 +1871,7 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
#endif
+ unsigned long (*get_idle_hint)(struct task_struct *p);
};
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
@@ -2907,6 +2907,23 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+unsigned long kvm_vcpu_provide_idle_hint(struct kvm_vcpu *target)
+{
+ struct pid *pid;
+ struct task_struct *task = NULL;
+
+ rcu_read_lock();
+ pid = rcu_dereference(target->pid);
+ if (pid)
+ task = get_pid_task(pid, PIDTYPE_PID);
+ rcu_read_unlock();
+ if (!task)
+ return 0;
+
+ return get_idle_hint(task);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_provide_idle_hint);
+
/*
* Helper that checks whether a VCPU is eligible for directed yield.
* Most eligible candidate to yield is decided by following heuristics:
H_IDLE_HINT is a new hcall introduced to provide a hint to the guestOS indicating if a given vCPU can be scheduled instantly or not. The task scheduler generally prefers previous cpu of a task if it is available_idle. So if a prev_cpu of the corresponding vCPU task_struct is found to be available_idle or sched_idle then hint guestOS that the given vCPU can be scheduled instantly by the hypervisor. Signed-off-by: Parth Shah <parth@linux.ibm.com> --- arch/powerpc/include/asm/hvcall.h | 3 ++- arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++++ arch/powerpc/kvm/trace_hv.h | 1 + include/linux/kvm_host.h | 1 + include/linux/sched.h | 1 + kernel/sched/core.c | 13 +++++++++++++ kernel/sched/fair.c | 12 ++++++++++++ kernel/sched/sched.h | 1 + virt/kvm/kvm_main.c | 17 +++++++++++++++++ 9 files changed, 61 insertions(+), 1 deletion(-)