diff mbox series

[SRU,OEM] drm/i915: Backport gen9+ watermark fixes from 5.0

Message ID 20190307065803.16835-1-tjaalton@ubuntu.com
State New
Headers show
Series [SRU,OEM] drm/i915: Backport gen9+ watermark fixes from 5.0 | expand

Commit Message

Timo Aaltonen March 7, 2019, 6:58 a.m. UTC
From: Timo Aaltonen <timo.aaltonen@canonical.com>

BugLink: https://launchpad.net/bugs/1817848

Apply 50 backported commits plus build fixes from:

https://github.com/vsyrjala/linux.git skl_wm_backport_4.15

They fix screen flicker issues with new panel models etc.
A partial backport was not feasible.

Signed-off-by: Timo Aaltonen <timo.aaltonen@canonical.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c       |   21 +-
 drivers/gpu/drm/i915/i915_drv.c           |  301 +++++
 drivers/gpu/drm/i915/i915_drv.h           |   60 +-
 drivers/gpu/drm/i915/i915_gem.c           |    4 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c |    2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c       |    2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c    |    2 +-
 drivers/gpu/drm/i915/i915_reg.h           |   51 +
 drivers/gpu/drm/i915/intel_atomic.c       |    6 +-
 drivers/gpu/drm/i915/intel_atomic_plane.c |   68 +-
 drivers/gpu/drm/i915/intel_audio.c        |    2 +-
 drivers/gpu/drm/i915/intel_bios.c         |    2 +-
 drivers/gpu/drm/i915/intel_cdclk.c        |    2 +-
 drivers/gpu/drm/i915/intel_ddi.c          |    2 +-
 drivers/gpu/drm/i915/intel_display.c      |  256 ++++-
 drivers/gpu/drm/i915/intel_dp.c           |    4 +-
 drivers/gpu/drm/i915/intel_drv.h          |  104 +-
 drivers/gpu/drm/i915/intel_lvds.c         |    2 +-
 drivers/gpu/drm/i915/intel_mocs.c         |    2 +-
 drivers/gpu/drm/i915/intel_panel.c        |    2 +-
 drivers/gpu/drm/i915/intel_pm.c           | 1206 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_psr.c          |    4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c   |    2 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c   |   41 +-
 drivers/gpu/drm/i915/intel_sprite.c       |   16 +-
 drivers/gpu/drm/i915/intel_uncore.c       |    6 +-
 26 files changed, 1629 insertions(+), 541 deletions(-)

Comments

AceLan Kao March 12, 2019, 7:44 a.m. UTC | #1
QA have done the verification on these changes, should be safe enough to merge them.

Acked-By: AceLan Kao <acelan.kao@canonical.com>
Timo Aaltonen March 15, 2019, 6:29 a.m. UTC | #2
On 7.3.2019 8.58, Timo Aaltonen wrote:
> From: Timo Aaltonen <timo.aaltonen@canonical.com>
> 
> BugLink: https://launchpad.net/bugs/1817848
> 
> Apply 50 backported commits plus build fixes from:
> 
> https://github.com/vsyrjala/linux.git skl_wm_backport_4.15
> 
> They fix screen flicker issues with new panel models etc.
> A partial backport was not feasible.
> 
> Signed-off-by: Timo Aaltonen <timo.aaltonen@canonical.com>

applied to oem-next
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c65e381b85f3..b35937c82f80 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3448,31 +3448,32 @@  static int i915_ddb_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_device *dev = &dev_priv->drm;
-	struct skl_ddb_allocation *ddb;
 	struct skl_ddb_entry *entry;
-	enum pipe pipe;
-	int plane;
+	struct intel_crtc *crtc;
 
 	if (INTEL_GEN(dev_priv) < 9)
 		return 0;
 
 	drm_modeset_lock_all(dev);
 
-	ddb = &dev_priv->wm.skl_hw.ddb;
-
 	seq_printf(m, "%-15s%8s%8s%8s\n", "", "Start", "End", "Size");
 
-	for_each_pipe(dev_priv, pipe) {
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		enum pipe pipe = crtc->pipe;
+		enum plane_id plane_id;
+
 		seq_printf(m, "Pipe %c\n", pipe_name(pipe));
 
-		for_each_universal_plane(dev_priv, pipe, plane) {
-			entry = &ddb->plane[pipe][plane];
-			seq_printf(m, "  Plane%-8d%8u%8u%8u\n", plane + 1,
+		for_each_plane_id_on_crtc(crtc, plane_id) {
+			entry = &crtc_state->wm.skl.plane_ddb_y[plane_id];
+			seq_printf(m, "  Plane%-8d%8u%8u%8u\n", plane_id + 1,
 				   entry->start, entry->end,
 				   skl_ddb_entry_size(entry));
 		}
 
-		entry = &ddb->plane[pipe][PLANE_CURSOR];
+		entry = &crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR];
 		seq_printf(m, "  %-13s%8u%8u%8u\n", "Cursor", entry->start,
 			   entry->end, skl_ddb_entry_size(entry));
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 10ff0e2ffb8d..243ba011ea87 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1073,6 +1073,301 @@  static void intel_sanitize_options(struct drm_i915_private *dev_priv)
 	intel_gvt_sanitize_options(dev_priv);
 }
 
+static enum dram_rank skl_get_dimm_rank(u8 size, u32 rank)
+{
+	if (size == 0)
+		return I915_DRAM_RANK_INVALID;
+	if (rank == SKL_DRAM_RANK_SINGLE)
+		return I915_DRAM_RANK_SINGLE;
+	else if (rank == SKL_DRAM_RANK_DUAL)
+		return I915_DRAM_RANK_DUAL;
+
+	return I915_DRAM_RANK_INVALID;
+}
+
+static bool
+skl_is_16gb_dimm(enum dram_rank rank, u8 size, u8 width)
+{
+	if (rank == I915_DRAM_RANK_SINGLE && width == 8 && size == 16)
+		return true;
+	else if (rank == I915_DRAM_RANK_DUAL && width == 8 && size == 32)
+		return true;
+	else if (rank == SKL_DRAM_RANK_SINGLE && width == 16 && size == 8)
+		return true;
+	else if (rank == SKL_DRAM_RANK_DUAL && width == 16 && size == 16)
+		return true;
+
+	return false;
+}
+
+static int
+skl_dram_get_channel_info(struct dram_channel_info *ch, u32 val)
+{
+	u32 tmp_l, tmp_s;
+	u32 s_val = val >> SKL_DRAM_S_SHIFT;
+
+	if (!val)
+		return -EINVAL;
+
+	tmp_l = val & SKL_DRAM_SIZE_MASK;
+	tmp_s = s_val & SKL_DRAM_SIZE_MASK;
+
+	if (tmp_l == 0 && tmp_s == 0)
+		return -EINVAL;
+
+	ch->l_info.size = tmp_l;
+	ch->s_info.size = tmp_s;
+
+	tmp_l = (val & SKL_DRAM_WIDTH_MASK) >> SKL_DRAM_WIDTH_SHIFT;
+	tmp_s = (s_val & SKL_DRAM_WIDTH_MASK) >> SKL_DRAM_WIDTH_SHIFT;
+	ch->l_info.width = (1 << tmp_l) * 8;
+	ch->s_info.width = (1 << tmp_s) * 8;
+
+	tmp_l = val & SKL_DRAM_RANK_MASK;
+	tmp_s = s_val & SKL_DRAM_RANK_MASK;
+	ch->l_info.rank = skl_get_dimm_rank(ch->l_info.size, tmp_l);
+	ch->s_info.rank = skl_get_dimm_rank(ch->s_info.size, tmp_s);
+
+	if (ch->l_info.rank == I915_DRAM_RANK_DUAL ||
+	    ch->s_info.rank == I915_DRAM_RANK_DUAL)
+		ch->rank = I915_DRAM_RANK_DUAL;
+	else if (ch->l_info.rank == I915_DRAM_RANK_SINGLE &&
+		 ch->s_info.rank == I915_DRAM_RANK_SINGLE)
+		ch->rank = I915_DRAM_RANK_DUAL;
+	else
+		ch->rank = I915_DRAM_RANK_SINGLE;
+
+	ch->is_16gb_dimm = skl_is_16gb_dimm(ch->l_info.rank, ch->l_info.size,
+					    ch->l_info.width) ||
+			   skl_is_16gb_dimm(ch->s_info.rank, ch->s_info.size,
+					    ch->s_info.width);
+
+	DRM_DEBUG_KMS("(size:width:rank) L(%dGB:X%d:%s) S(%dGB:X%d:%s)\n",
+		      ch->l_info.size, ch->l_info.width,
+		      ch->l_info.rank ? "dual" : "single",
+		      ch->s_info.size, ch->s_info.width,
+		      ch->s_info.rank ? "dual" : "single");
+
+	return 0;
+}
+
+static bool
+intel_is_dram_symmetric(u32 val_ch0, u32 val_ch1,
+			struct dram_channel_info *ch0)
+{
+	return (val_ch0 == val_ch1 &&
+		(ch0->s_info.size == 0 ||
+		 (ch0->l_info.size == ch0->s_info.size &&
+		  ch0->l_info.width == ch0->s_info.width &&
+		  ch0->l_info.rank == ch0->s_info.rank)));
+}
+
+static int
+skl_dram_get_channels_info(struct drm_i915_private *dev_priv)
+{
+	struct dram_info *dram_info = &dev_priv->dram_info;
+	struct dram_channel_info ch0, ch1;
+	u32 val_ch0, val_ch1;
+	int ret;
+
+	val_ch0 = I915_READ(SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
+	ret = skl_dram_get_channel_info(&ch0, val_ch0);
+	if (ret == 0)
+		dram_info->num_channels++;
+
+	val_ch1 = I915_READ(SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
+	ret = skl_dram_get_channel_info(&ch1, val_ch1);
+	if (ret == 0)
+		dram_info->num_channels++;
+
+	if (dram_info->num_channels == 0) {
+		DRM_INFO("Number of memory channels is zero\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If any of the channel is single rank channel, worst case output
+	 * will be same as if single rank memory, so consider single rank
+	 * memory.
+	 */
+	if (ch0.rank == I915_DRAM_RANK_SINGLE ||
+	    ch1.rank == I915_DRAM_RANK_SINGLE)
+		dram_info->rank = I915_DRAM_RANK_SINGLE;
+	else
+		dram_info->rank = max(ch0.rank, ch1.rank);
+
+	if (dram_info->rank == I915_DRAM_RANK_INVALID) {
+		DRM_INFO("couldn't get memory rank information\n");
+		return -EINVAL;
+	}
+
+	dram_info->is_16gb_dimm = ch0.is_16gb_dimm || ch1.is_16gb_dimm;
+
+	dev_priv->dram_info.symmetric_memory = intel_is_dram_symmetric(val_ch0,
+								       val_ch1,
+								       &ch0);
+
+	DRM_DEBUG_KMS("memory configuration is %sSymmetric memory\n",
+		      dev_priv->dram_info.symmetric_memory ? "" : "not ");
+	return 0;
+}
+
+static int
+skl_get_dram_info(struct drm_i915_private *dev_priv)
+{
+	struct dram_info *dram_info = &dev_priv->dram_info;
+	u32 mem_freq_khz, val;
+	int ret;
+
+	ret = skl_dram_get_channels_info(dev_priv);
+	if (ret)
+		return ret;
+
+	val = I915_READ(SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
+	mem_freq_khz = DIV_ROUND_UP((val & SKL_REQ_DATA_MASK) *
+				    SKL_MEMORY_FREQ_MULTIPLIER_HZ, 1000);
+
+	dram_info->bandwidth_kbps = dram_info->num_channels *
+							mem_freq_khz * 8;
+
+	if (dram_info->bandwidth_kbps == 0) {
+		DRM_INFO("Couldn't get system memory bandwidth\n");
+		return -EINVAL;
+	}
+
+	dram_info->valid = true;
+	return 0;
+}
+
+static int
+bxt_get_dram_info(struct drm_i915_private *dev_priv)
+{
+	struct dram_info *dram_info = &dev_priv->dram_info;
+	u32 dram_channels;
+	u32 mem_freq_khz, val;
+	u8 num_active_channels;
+	int i;
+
+	val = I915_READ(BXT_P_CR_MC_BIOS_REQ_0_0_0);
+	mem_freq_khz = DIV_ROUND_UP((val & BXT_REQ_DATA_MASK) *
+				    BXT_MEMORY_FREQ_MULTIPLIER_HZ, 1000);
+
+	dram_channels = val & BXT_DRAM_CHANNEL_ACTIVE_MASK;
+	num_active_channels = hweight32(dram_channels);
+
+	/* Each active bit represents 4-byte channel */
+	dram_info->bandwidth_kbps = (mem_freq_khz * num_active_channels * 4);
+
+	if (dram_info->bandwidth_kbps == 0) {
+		DRM_INFO("Couldn't get system memory bandwidth\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Now read each DUNIT8/9/10/11 to check the rank of each dimms.
+	 */
+	for (i = BXT_D_CR_DRP0_DUNIT_START; i <= BXT_D_CR_DRP0_DUNIT_END; i++) {
+		u8 size, width;
+		enum dram_rank rank;
+		u32 tmp;
+
+		val = I915_READ(BXT_D_CR_DRP0_DUNIT(i));
+		if (val == 0xFFFFFFFF)
+			continue;
+
+		dram_info->num_channels++;
+		tmp = val & BXT_DRAM_RANK_MASK;
+
+		if (tmp == BXT_DRAM_RANK_SINGLE)
+			rank = I915_DRAM_RANK_SINGLE;
+		else if (tmp == BXT_DRAM_RANK_DUAL)
+			rank = I915_DRAM_RANK_DUAL;
+		else
+			rank = I915_DRAM_RANK_INVALID;
+
+		tmp = val & BXT_DRAM_SIZE_MASK;
+		if (tmp == BXT_DRAM_SIZE_4GB)
+			size = 4;
+		else if (tmp == BXT_DRAM_SIZE_6GB)
+			size = 6;
+		else if (tmp == BXT_DRAM_SIZE_8GB)
+			size = 8;
+		else if (tmp == BXT_DRAM_SIZE_12GB)
+			size = 12;
+		else if (tmp == BXT_DRAM_SIZE_16GB)
+			size = 16;
+		else
+			size = 0;
+
+		tmp = (val & BXT_DRAM_WIDTH_MASK) >> BXT_DRAM_WIDTH_SHIFT;
+		width = (1 << tmp) * 8;
+		DRM_DEBUG_KMS("dram size:%dGB width:X%d rank:%s\n", size,
+			      width, rank == I915_DRAM_RANK_SINGLE ? "single" :
+			      rank == I915_DRAM_RANK_DUAL ? "dual" : "unknown");
+
+		/*
+		 * If any of the channel is single rank channel,
+		 * worst case output will be same as if single rank
+		 * memory, so consider single rank memory.
+		 */
+		if (dram_info->rank == I915_DRAM_RANK_INVALID)
+			dram_info->rank = rank;
+		else if (rank == I915_DRAM_RANK_SINGLE)
+			dram_info->rank = I915_DRAM_RANK_SINGLE;
+	}
+
+	if (dram_info->rank == I915_DRAM_RANK_INVALID) {
+		DRM_INFO("couldn't get memory rank information\n");
+		return -EINVAL;
+	}
+
+	dram_info->valid = true;
+	return 0;
+}
+
+static void
+intel_get_dram_info(struct drm_i915_private *dev_priv)
+{
+	struct dram_info *dram_info = &dev_priv->dram_info;
+	char bandwidth_str[32];
+	int ret;
+
+	dram_info->valid = false;
+	dram_info->rank = I915_DRAM_RANK_INVALID;
+	dram_info->bandwidth_kbps = 0;
+	dram_info->num_channels = 0;
+
+	/*
+	 * Assume 16Gb DIMMs are present until proven otherwise.
+	 * This is only used for the level 0 watermark latency
+	 * w/a which does not apply to bxt/glk.
+	 */
+	dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv);
+
+	if (INTEL_GEN(dev_priv) < 9 || IS_GEMINILAKE(dev_priv))
+		return;
+
+	/* Need to calculate bandwidth only for Gen9 */
+	if (IS_BROXTON(dev_priv))
+		ret = bxt_get_dram_info(dev_priv);
+	else if (INTEL_GEN(dev_priv) == 9)
+		ret = skl_get_dram_info(dev_priv);
+	else
+		ret = skl_dram_get_channels_info(dev_priv);
+	if (ret)
+		return;
+
+	if (dram_info->bandwidth_kbps)
+		sprintf(bandwidth_str, "%d KBps", dram_info->bandwidth_kbps);
+	else
+		sprintf(bandwidth_str, "unknown");
+	DRM_DEBUG_KMS("DRAM bandwidth:%s, total-channels: %u\n",
+		      bandwidth_str, dram_info->num_channels);
+	DRM_DEBUG_KMS("DRAM rank: %s rank 16GB-dimm:%s\n",
+		      (dram_info->rank == I915_DRAM_RANK_DUAL) ?
+		      "dual" : "single", yesno(dram_info->is_16gb_dimm));
+}
+
 /**
  * i915_driver_init_hw - setup state requiring device access
  * @dev_priv: device private
@@ -1181,6 +1476,12 @@  static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto out_ggtt;
 
+	/*
+	 * Fill the dram structure to get the system raw bandwidth and
+	 * dram info. This will be used for memory latency calculation.
+	 */
+	intel_get_dram_info(dev_priv);
+
 	return 0;
 
 out_ggtt:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7e9431af999d..6fa6931b425d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -331,6 +331,9 @@  enum plane_id {
 	PLANE_SPRITE0,
 	PLANE_SPRITE1,
 	PLANE_SPRITE2,
+	PLANE_SPRITE3,
+	PLANE_SPRITE4,
+	PLANE_SPRITE5,
 	PLANE_CURSOR,
 	I915_MAX_PLANES,
 };
@@ -577,6 +580,30 @@  struct i915_hotplug {
 	     (__i)++) \
 		for_each_if (crtc)
 
+#define for_each_oldnew_intel_crtc_in_state(__state, crtc, old_crtc_state, new_crtc_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_crtc && \
+		     ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
+		      (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \
+		      (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
+	     (__i)++) \
+		for_each_if (crtc)
+
+#define for_each_old_intel_plane_in_state(__state, plane, old_plane_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+		      (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), 1); \
+	     (__i)++) \
+		for_each_if (plane)
+
+#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
+	     (__i)++) \
+		for_each_if (plane)
 
 #define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
 	for ((__i) = 0; \
@@ -1868,11 +1895,10 @@  static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
 }
 
 struct skl_ddb_allocation {
-	struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */
-	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES];
+	u8 enabled_slices; /* GEN11 has configurable 2 slices */
 };
 
-struct skl_wm_values {
+struct skl_ddb_values {
 	unsigned dirty_pipes;
 	struct skl_ddb_allocation ddb;
 };
@@ -1887,6 +1913,7 @@  struct skl_wm_level {
 struct skl_wm_params {
 	bool x_tiled, y_tiled;
 	bool rc_surface;
+	bool is_planar;
 	uint32_t width;
 	uint8_t cpp;
 	uint32_t plane_pixel_rate;
@@ -1895,6 +1922,7 @@  struct skl_wm_params {
 	uint_fixed_16_16_t plane_blocks_per_line;
 	uint_fixed_16_16_t y_tile_minimum;
 	uint32_t linetime_us;
+	uint32_t dbuf_block_size;
 };
 
 /*
@@ -2537,7 +2565,7 @@  struct drm_i915_private {
 		/* current hardware state */
 		union {
 			struct ilk_wm_values hw;
-			struct skl_wm_values skl_hw;
+			struct skl_ddb_values skl_hw;
 			struct vlv_wm_values vlv;
 			struct g4x_wm_values g4x;
 		};
@@ -2559,6 +2587,19 @@  struct drm_i915_private {
 		bool distrust_bios_wm;
 	} wm;
 
+	struct dram_info {
+		bool valid;
+		bool is_16gb_dimm;
+		u8 num_channels;
+		enum dram_rank {
+			I915_DRAM_RANK_INVALID = 0,
+			I915_DRAM_RANK_SINGLE,
+			I915_DRAM_RANK_DUAL
+		} rank;
+		u32 bandwidth_kbps;
+		bool symmetric_memory;
+	} dram_info;
+
 	struct i915_runtime_pm runtime_pm;
 
 	struct {
@@ -2763,6 +2804,15 @@  struct drm_i915_private {
 	 */
 };
 
+struct dram_channel_info {
+	struct info {
+		u8 size, width;
+		enum dram_rank rank;
+	} l_info, s_info;
+	enum dram_rank rank;
+	bool is_16gb_dimm;
+};
+
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 {
 	return container_of(dev, struct drm_i915_private, drm);
@@ -3185,7 +3235,7 @@  intel_info(const struct drm_i915_private *dev_priv)
 
 #define HAS_FW_BLC(dev_priv) 	(INTEL_GEN(dev_priv) > 2)
 #define HAS_FBC(dev_priv)	((dev_priv)->info.has_fbc)
-#define HAS_CUR_FBC(dev_priv)	(!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7)
+#define HAS_CUR_FBC(dev_priv)	(!HAS_GMCH_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 7)
 
 #define HAS_IPS(dev_priv)	(IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5cfba89ed586..9375767b95b4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5025,10 +5025,10 @@  i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
 	int i;
 
-	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
+	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
 	    !IS_CHERRYVIEW(dev_priv))
 		dev_priv->num_fence_regs = 32;
-	else if (INTEL_INFO(dev_priv)->gen >= 4 ||
+	else if (INTEL_GEN(dev_priv) >= 4 ||
 		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
 		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
 		dev_priv->num_fence_regs = 16;
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 012250f25255..a5faff2af30e 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -64,7 +64,7 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	int fence_pitch_shift;
 	u64 val;
 
-	if (INTEL_INFO(fence->i915)->gen >= 6) {
+	if (INTEL_GEN(fence->i915) >= 6) {
 		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
 		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2af65ecf2df8..e299d756d64b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2080,7 +2080,7 @@  static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 	ppgtt->base.i915 = dev_priv;
 	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
 
-	if (INTEL_INFO(dev_priv)->gen < 8)
+	if (INTEL_GEN(dev_priv) < 8)
 		return gen6_ppgtt_init(ppgtt);
 	else
 		return gen8_ppgtt_init(ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 03e7abc7e043..0840b8549f98 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -431,7 +431,7 @@  int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	reserved_base = 0;
 	reserved_size = 0;
 
-	switch (INTEL_INFO(dev_priv)->gen) {
+	switch (INTEL_GEN(dev_priv)) {
 	case 2:
 	case 3:
 		break;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 07d5b733f045..dc29888bc614 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6428,6 +6428,9 @@  enum {
 
 #define _PLANE_BUF_CFG_1_B			0x7127c
 #define _PLANE_BUF_CFG_2_B			0x7137c
+#define  SKL_DDB_ENTRY_MASK			0x3FF
+#define  ICL_DDB_ENTRY_MASK			0x7FF
+#define  DDB_ENTRY_END_SHIFT			16
 #define _PLANE_BUF_CFG_1(pipe)	\
 	_PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B)
 #define _PLANE_BUF_CFG_2(pipe)	\
@@ -8692,6 +8695,54 @@  enum skl_power_gate {
 #define  DC_STATE_DEBUG_MASK_CORES	(1<<0)
 #define  DC_STATE_DEBUG_MASK_MEMORY_UP	(1<<1)
 
+#define BXT_P_CR_MC_BIOS_REQ_0_0_0	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114)
+#define  BXT_REQ_DATA_MASK			0x3F
+#define  BXT_DRAM_CHANNEL_ACTIVE_SHIFT		12
+#define  BXT_DRAM_CHANNEL_ACTIVE_MASK		(0xF << 12)
+#define  BXT_MEMORY_FREQ_MULTIPLIER_HZ		133333333
+
+#define BXT_D_CR_DRP0_DUNIT8			0x1000
+#define BXT_D_CR_DRP0_DUNIT9			0x1200
+#define  BXT_D_CR_DRP0_DUNIT_START		8
+#define  BXT_D_CR_DRP0_DUNIT_END		11
+#define BXT_D_CR_DRP0_DUNIT(x)	_MMIO(MCHBAR_MIRROR_BASE_SNB + \
+				      _PIPE((x) - 8, BXT_D_CR_DRP0_DUNIT8,\
+						 BXT_D_CR_DRP0_DUNIT9))
+#define  BXT_DRAM_RANK_MASK			0x3
+#define  BXT_DRAM_RANK_SINGLE			0x1
+#define  BXT_DRAM_RANK_DUAL			0x3
+#define  BXT_DRAM_WIDTH_MASK			(0x3 << 4)
+#define  BXT_DRAM_WIDTH_SHIFT			4
+#define  BXT_DRAM_WIDTH_X8			(0x0 << 4)
+#define  BXT_DRAM_WIDTH_X16			(0x1 << 4)
+#define  BXT_DRAM_WIDTH_X32			(0x2 << 4)
+#define  BXT_DRAM_WIDTH_X64			(0x3 << 4)
+#define  BXT_DRAM_SIZE_MASK			(0x7 << 6)
+#define  BXT_DRAM_SIZE_SHIFT			6
+#define  BXT_DRAM_SIZE_4GB			(0x0 << 6)
+#define  BXT_DRAM_SIZE_6GB			(0x1 << 6)
+#define  BXT_DRAM_SIZE_8GB			(0x2 << 6)
+#define  BXT_DRAM_SIZE_12GB			(0x3 << 6)
+#define  BXT_DRAM_SIZE_16GB			(0x4 << 6)
+
+#define SKL_MEMORY_FREQ_MULTIPLIER_HZ		266666666
+#define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04)
+#define  SKL_REQ_DATA_MASK			(0xF << 0)
+
+#define SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5010)
+#define  SKL_DRAM_S_SHIFT			16
+#define  SKL_DRAM_SIZE_MASK			0x3F
+#define  SKL_DRAM_WIDTH_MASK			(0x3 << 8)
+#define  SKL_DRAM_WIDTH_SHIFT			8
+#define  SKL_DRAM_WIDTH_X8			(0x0 << 8)
+#define  SKL_DRAM_WIDTH_X16			(0x1 << 8)
+#define  SKL_DRAM_WIDTH_X32			(0x2 << 8)
+#define  SKL_DRAM_RANK_MASK			(0x1 << 10)
+#define  SKL_DRAM_RANK_SHIFT			10
+#define  SKL_DRAM_RANK_SINGLE			(0x0 << 10)
+#define  SKL_DRAM_RANK_DUAL			(0x1 << 10)
+
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
  * since on HSW we can't write to it using I915_WRITE. */
 #define D_COMP_HSW			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index 36d4e635e4ce..75d7e93bd09c 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -179,6 +179,7 @@  intel_crtc_duplicate_state(struct drm_crtc *crtc)
 	crtc_state->fifo_changed = false;
 	crtc_state->wm.need_postvbl_update = false;
 	crtc_state->fb_bits = 0;
+	crtc_state->update_planes = 0;
 
 	return &crtc_state->base;
 }
@@ -224,6 +225,7 @@  int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct drm_atomic_state *drm_state = crtc_state->base.state;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state);
 	int num_scalers_need;
 	int i, j;
 
@@ -301,8 +303,8 @@  int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 				continue;
 			}
 
-			plane_state = intel_atomic_get_existing_plane_state(drm_state,
-									    intel_plane);
+			plane_state = intel_atomic_get_new_plane_state(intel_state,
+								       intel_plane);
 			scaler_id = &plane_state->scaler_id;
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 8e6dc159f64d..2d007f5ed0f3 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -196,6 +196,9 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	else
 		crtc_state->active_planes &= ~BIT(intel_plane->id);
 
+	if (state->visible || old_plane_state->base.visible)
+		crtc_state->update_planes |= BIT(intel_plane->id);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &crtc_state->base,
 					       old_plane_state,
@@ -230,29 +233,49 @@  static int intel_plane_atomic_check(struct drm_plane *plane,
 						   to_intel_plane_state(new_plane_state));
 }
 
-static void intel_plane_atomic_update(struct drm_plane *plane,
-				      struct drm_plane_state *old_state)
+void intel_update_planes_on_crtc(struct intel_atomic_state *old_state,
+				 struct intel_crtc *crtc,
+				 struct intel_crtc_state *old_crtc_state,
+				 struct intel_crtc_state *new_crtc_state)
 {
-	struct intel_atomic_state *state = to_intel_atomic_state(old_state->state);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	const struct intel_plane_state *new_plane_state =
-		intel_atomic_get_new_plane_state(state, intel_plane);
-	struct drm_crtc *crtc = new_plane_state->base.crtc ?: old_state->crtc;
-
-	if (new_plane_state->base.visible) {
-		const struct intel_crtc_state *new_crtc_state =
-			intel_atomic_get_new_crtc_state(state, to_intel_crtc(crtc));
-
-		trace_intel_update_plane(plane,
-					 to_intel_crtc(crtc));
-
-		intel_plane->update_plane(intel_plane,
-					  new_crtc_state, new_plane_state);
-	} else {
-		trace_intel_disable_plane(plane,
-					  to_intel_crtc(crtc));
-
-		intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
+	u32 update_mask = new_crtc_state->update_planes;
+	struct intel_plane_state *new_plane_state;
+	struct intel_plane *plane;
+	int i;
+
+	for_each_new_intel_plane_in_state(old_state, plane, new_plane_state, i) {
+		if (crtc->pipe != plane->pipe ||
+		    !(update_mask & BIT(plane->id)))
+			continue;
+
+		if (new_plane_state->base.visible) {
+			trace_intel_update_plane(&plane->base, crtc);
+
+			plane->update_plane(plane, new_crtc_state, new_plane_state);
+		} else if (new_plane_state->slave) {
+			struct intel_plane *master =
+				new_plane_state->linked_plane;
+
+			/*
+			 * We update the slave plane from this function because
+			 * programming it from the master plane's update_plane
+			 * callback runs into issues when the Y plane is
+			 * reassigned, disabled or used by a different plane.
+			 *
+			 * The slave plane is updated with the master plane's
+			 * plane_state.
+			 */
+			new_plane_state =
+				intel_atomic_get_new_plane_state(old_state, master);
+
+			trace_intel_update_plane(&plane->base, crtc);
+
+			plane->update_slave(plane, new_crtc_state, new_plane_state);
+		} else {
+			trace_intel_disable_plane(&plane->base, crtc);
+
+			plane->disable_plane(plane, new_crtc_state);
+		}
 	}
 }
 
@@ -260,7 +283,6 @@  const struct drm_plane_helper_funcs intel_plane_helper_funcs = {
 	.prepare_fb = intel_prepare_plane_fb,
 	.cleanup_fb = intel_cleanup_plane_fb,
 	.atomic_check = intel_plane_atomic_check,
-	.atomic_update = intel_plane_atomic_update,
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index d1a8c0fa2d38..2e8498da4c69 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -679,7 +679,7 @@  void intel_init_audio_hooks(struct drm_i915_private *dev_priv)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->display.audio_codec_enable = ilk_audio_codec_enable;
 		dev_priv->display.audio_codec_disable = ilk_audio_codec_disable;
-	} else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) {
+	} else if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) {
 		dev_priv->display.audio_codec_enable = hsw_audio_codec_enable;
 		dev_priv->display.audio_codec_disable = hsw_audio_codec_disable;
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 292daa335385..291c93d27d68 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -392,7 +392,7 @@  parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
 static int intel_bios_ssc_frequency(struct drm_i915_private *dev_priv,
 				    bool alternate)
 {
-	switch (INTEL_INFO(dev_priv)->gen) {
+	switch (INTEL_GEN(dev_priv)) {
 	case 2:
 		return alternate ? 66667 : 48000;
 	case 3:
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 2f678aff46f8..1b9203d54055 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -2086,7 +2086,7 @@  static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 		return max_cdclk_freq;
 	else if (IS_CHERRYVIEW(dev_priv))
 		return max_cdclk_freq*95/100;
-	else if (INTEL_INFO(dev_priv)->gen < 4)
+	else if (INTEL_GEN(dev_priv) < 4)
 		return 2*max_cdclk_freq*90/100;
 	else
 		return max_cdclk_freq*90/100;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 38e53d6b8127..de8fab3c2ce0 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2156,7 +2156,7 @@  static void intel_ddi_clk_select(struct intel_encoder *encoder,
 
 		I915_WRITE(DPLL_CTRL2, val);
 
-	} else if (INTEL_INFO(dev_priv)->gen < 9) {
+	} else if (INTEL_GEN(dev_priv) < 9) {
 		I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a83e18c72f7b..91b4e66c4556 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2062,12 +2062,12 @@  static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_pr
 
 static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
 {
-	if (INTEL_INFO(dev_priv)->gen >= 9)
+	if (INTEL_GEN(dev_priv) >= 9)
 		return 256 * 1024;
 	else if (IS_I965G(dev_priv) || IS_I965GM(dev_priv) ||
 		 IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		return 128 * 1024;
-	else if (INTEL_INFO(dev_priv)->gen >= 4)
+	else if (INTEL_GEN(dev_priv) >= 4)
 		return 4 * 1024;
 	else
 		return 0;
@@ -2640,7 +2640,7 @@  static int i9xx_format_to_fourcc(int format)
 	}
 }
 
-static int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
+int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
 {
 	switch (format) {
 	case PLANE_CTL_FORMAT_RGB_565:
@@ -2761,7 +2761,7 @@  static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 		intel_pre_disable_primary_noatomic(&crtc->base);
 
 	trace_intel_disable_plane(&plane->base, crtc);
-	plane->disable_plane(plane, crtc);
+	plane->disable_plane(plane, crtc_state);
 }
 
 static void
@@ -3306,7 +3306,7 @@  static void i9xx_update_primary_plane(struct intel_plane *primary,
 }
 
 static void i9xx_disable_primary_plane(struct intel_plane *primary,
-				       struct intel_crtc *crtc)
+				       const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
 	enum plane plane = primary->plane;
@@ -5103,24 +5103,32 @@  static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 		intel_update_watermarks(crtc);
 }
 
-static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask)
+static void intel_crtc_disable_planes(struct intel_atomic_state *state,
+				      struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->dev;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_plane *p;
-	int pipe = intel_crtc->pipe;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	unsigned int update_mask = new_crtc_state->update_planes;
+	const struct intel_plane_state *old_plane_state;
+	struct intel_plane *plane;
+	unsigned fb_bits = 0;
+	int i;
+
+	intel_crtc_dpms_overlay_disable(crtc);
 
-	intel_crtc_dpms_overlay_disable(intel_crtc);
+	for_each_old_intel_plane_in_state(state, plane, old_plane_state, i) {
+		if (crtc->pipe != plane->pipe ||
+		    !(update_mask & BIT(plane->id)))
+			continue;
 
-	drm_for_each_plane_mask(p, dev, plane_mask)
-		to_intel_plane(p)->disable_plane(to_intel_plane(p), intel_crtc);
+		plane->disable_plane(plane, new_crtc_state);
 
-	/*
-	 * FIXME: Once we grow proper nuclear flip support out of this we need
-	 * to compute the mask of flip planes precisely. For the time being
-	 * consider this a flip to a NULL plane.
-	 */
-	intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe));
+		if (old_plane_state->base.visible)
+			fb_bits |= plane->frontbuffer_bit;
+	}
+
+	intel_frontbuffer_flip(dev_priv, fb_bits);
 }
 
 static void intel_encoders_pre_pll_enable(struct drm_crtc *crtc,
@@ -6219,7 +6227,7 @@  static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
 	const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	/* GDG double wide on either pipe, otherwise pipe A only */
-	return INTEL_INFO(dev_priv)->gen < 4 &&
+	return INTEL_GEN(dev_priv) < 4 &&
 		(crtc->pipe == PIPE_A || IS_I915G(dev_priv));
 }
 
@@ -8094,7 +8102,7 @@  static void haswell_set_pipemisc(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *config = intel_crtc->config;
 
-	if (IS_BROADWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 9) {
+	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) {
 		u32 val = 0;
 
 		switch (intel_crtc->config->pipe_bpp) {
@@ -9440,9 +9448,9 @@  static void i845_update_cursor(struct intel_plane *plane,
 }
 
 static void i845_disable_cursor(struct intel_plane *plane,
-				struct intel_crtc *crtc)
+				const struct intel_crtc_state *crtc_state)
 {
-	i845_update_cursor(plane, NULL, NULL);
+	i845_update_cursor(plane, crtc_state, NULL);
 }
 
 static bool i845_cursor_get_hw_state(struct intel_plane *plane)
@@ -9627,6 +9635,10 @@  static void i9xx_update_cursor(struct intel_plane *plane,
 	 * CURCNTR and CUR_FBC_CTL are always
 	 * armed by the CURBASE write only.
 	 */
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		skl_write_cursor_wm(plane, crtc_state);
+
 	if (plane->cursor.base != base ||
 	    plane->cursor.size != fbc_ctl ||
 	    plane->cursor.cntl != cntl) {
@@ -9650,9 +9662,9 @@  static void i9xx_update_cursor(struct intel_plane *plane,
 }
 
 static void i9xx_disable_cursor(struct intel_plane *plane,
-				struct intel_crtc *crtc)
+				const struct intel_crtc_state *crtc_state)
 {
-	i9xx_update_cursor(plane, NULL, NULL);
+	i9xx_update_cursor(plane, crtc_state, NULL);
 }
 
 static bool i9xx_cursor_get_hw_state(struct intel_plane *plane)
@@ -10393,6 +10405,101 @@  static bool check_single_encoder_cloning(struct drm_atomic_state *state,
 	return true;
 }
 
+static int icl_add_linked_planes(struct intel_atomic_state *state)
+{
+	struct intel_plane *plane, *linked;
+	struct intel_plane_state *plane_state, *linked_plane_state;
+	int i;
+
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		linked = plane_state->linked_plane;
+
+		if (!linked)
+			continue;
+
+		linked_plane_state = intel_atomic_get_plane_state(state, linked);
+		if (IS_ERR(linked_plane_state))
+			return PTR_ERR(linked_plane_state);
+
+		WARN_ON(linked_plane_state->linked_plane != plane);
+		WARN_ON(linked_plane_state->slave == plane_state->slave);
+	}
+
+	return 0;
+}
+
+static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state);
+	struct intel_plane *plane, *linked;
+	struct intel_plane_state *plane_state;
+	int i;
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	/*
+	 * Destroy all old plane links and make the slave plane invisible
+	 * in the crtc_state->active_planes mask.
+	 */
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		if (plane->pipe != crtc->pipe || !plane_state->linked_plane)
+			continue;
+
+		plane_state->linked_plane = NULL;
+		if (plane_state->slave && !plane_state->base.visible) {
+			crtc_state->active_planes &= ~BIT(plane->id);
+			crtc_state->update_planes |= BIT(plane->id);
+		}
+
+		plane_state->slave = false;
+	}
+
+	if (!crtc_state->nv12_planes)
+		return 0;
+
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		struct intel_plane_state *linked_state = NULL;
+
+		if (plane->pipe != crtc->pipe ||
+		    !(crtc_state->nv12_planes & BIT(plane->id)))
+			continue;
+
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, linked) {
+			if (!icl_is_nv12_y_plane(linked->id))
+				continue;
+
+			if (crtc_state->active_planes & BIT(linked->id))
+				continue;
+
+			linked_state = intel_atomic_get_plane_state(state, linked);
+			if (IS_ERR(linked_state))
+				return PTR_ERR(linked_state);
+
+			break;
+		}
+
+		if (!linked_state) {
+			DRM_DEBUG_KMS("Need %d free Y planes for NV12\n",
+				      hweight8(crtc_state->nv12_planes));
+
+			return -EINVAL;
+		}
+
+		plane_state->linked_plane = linked;
+
+		linked_state->slave = true;
+		linked_state->linked_plane = plane;
+		crtc_state->active_planes |= BIT(linked->id);
+		crtc_state->update_planes |= BIT(linked->id);
+		DRM_DEBUG_KMS("Using %s as Y plane for %s\n", linked->base.name, plane->base.name);
+	}
+
+	return 0;
+}
+
 static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 				   struct drm_crtc_state *crtc_state)
 {
@@ -10464,6 +10571,8 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 		if (mode_changed)
 			ret = skl_update_scaler_crtc(pipe_config);
 
+		if (!ret)
+			ret = icl_check_nv12_planes(pipe_config);
 		if (!ret)
 			ret = skl_check_pipe_max_pixel_rate(intel_crtc,
 							    pipe_config);
@@ -10476,8 +10585,6 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 }
 
 static const struct drm_crtc_helper_funcs intel_helper_funcs = {
-	.atomic_begin = intel_begin_crtc_commit,
-	.atomic_flush = intel_finish_crtc_commit,
 	.atomic_check = intel_crtc_atomic_check,
 };
 
@@ -11350,6 +11457,8 @@  static void verify_wm_state(struct drm_crtc *crtc,
 	struct skl_pipe_wm hw_wm, *sw_wm;
 	struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
 	struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry;
+	struct skl_ddb_entry hw_ddb_y[I915_MAX_PLANES];
+	struct skl_ddb_entry hw_ddb_uv[I915_MAX_PLANES];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	const enum pipe pipe = intel_crtc->pipe;
 	int plane, level, max_level = ilk_wm_max_level(dev_priv);
@@ -11360,9 +11469,16 @@  static void verify_wm_state(struct drm_crtc *crtc,
 	skl_pipe_wm_get_hw_state(crtc, &hw_wm);
 	sw_wm = &to_intel_crtc_state(new_state)->wm.skl.optimal;
 
+	skl_pipe_ddb_get_hw_state(intel_crtc, hw_ddb_y, hw_ddb_uv);
+
 	skl_ddb_get_hw_state(dev_priv, &hw_ddb);
 	sw_ddb = &dev_priv->wm.skl_hw.ddb;
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		if (hw_ddb.enabled_slices != sw_ddb->enabled_slices)
+			DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
+				  sw_ddb->enabled_slices,
+				  hw_ddb.enabled_slices);
 	/* planes */
 	for_each_universal_plane(dev_priv, pipe, plane) {
 		hw_plane_wm = &hw_wm.planes[plane];
@@ -11397,8 +11513,8 @@  static void verify_wm_state(struct drm_crtc *crtc,
 		}
 
 		/* DDB */
-		hw_ddb_entry = &hw_ddb.plane[pipe][plane];
-		sw_ddb_entry = &sw_ddb->plane[pipe][plane];
+		hw_ddb_entry = &hw_ddb_y[plane];
+		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[plane];
 
 		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
 			DRM_ERROR("mismatch in DDB state pipe %c plane %d (expected (%u,%u), found (%u,%u))\n",
@@ -11447,8 +11563,8 @@  static void verify_wm_state(struct drm_crtc *crtc,
 		}
 
 		/* DDB */
-		hw_ddb_entry = &hw_ddb.plane[pipe][PLANE_CURSOR];
-		sw_ddb_entry = &sw_ddb->plane[pipe][PLANE_CURSOR];
+		hw_ddb_entry = &hw_ddb_y[PLANE_CURSOR];
+		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[PLANE_CURSOR];
 
 		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
 			DRM_ERROR("mismatch in DDB state pipe %c cursor (expected (%u,%u), found (%u,%u))\n",
@@ -12081,6 +12197,10 @@  static int intel_atomic_check(struct drm_device *dev,
 		intel_state->cdclk.logical = dev_priv->cdclk.logical;
 	}
 
+	ret = icl_add_linked_planes(intel_state);
+	if (ret)
+		return ret;
+
 	ret = drm_atomic_helper_check_planes(dev, state);
 	if (ret)
 		return ret;
@@ -12113,6 +12233,7 @@  static void intel_update_crtc(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *old_intel_cstate = to_intel_crtc_state(old_crtc_state);
 	struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state);
 	bool modeset = needs_modeset(new_crtc_state);
 
@@ -12130,7 +12251,12 @@  static void intel_update_crtc(struct drm_crtc *crtc,
 		    to_intel_plane_state(crtc->primary->state));
 	}
 
-	drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
+	intel_begin_crtc_commit(crtc, old_crtc_state);
+
+	intel_update_planes_on_crtc(to_intel_atomic_state(state), intel_crtc,
+				    old_intel_cstate, pipe_config);
+
+	intel_finish_crtc_commit(crtc, old_crtc_state);
 }
 
 static void intel_update_crtcs(struct drm_atomic_state *state)
@@ -12160,13 +12286,18 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 	bool progress;
 	enum pipe pipe;
 	int i;
-
-	const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {};
+	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	u8 required_slices = intel_state->wm_results.ddb.enabled_slices;
+	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
 		/* ignore allocations for crtc's that have been turned off. */
 		if (new_crtc_state->active)
-			entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb;
+			entries[i] = to_intel_crtc_state(old_crtc_state)->wm.skl.ddb;
+
+	/* If 2nd DBuf slice required, enable it here */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
 
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
@@ -12188,14 +12319,13 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 			if (updated & cmask || !cstate->base.active)
 				continue;
 
-			if (skl_ddb_allocation_overlaps(dev_priv,
+			if (skl_ddb_allocation_overlaps(&cstate->wm.skl.ddb,
 							entries,
-							&cstate->wm.skl.ddb,
-							i))
+							INTEL_INFO(dev_priv)->num_pipes, i))
 				continue;
 
 			updated |= cmask;
-			entries[i] = &cstate->wm.skl.ddb;
+			entries[i] = cstate->wm.skl.ddb;
 
 			/*
 			 * If this is an already active pipe, it's DDB changed,
@@ -12218,6 +12348,10 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 			progress = true;
 		}
 	} while (progress);
+
+	/* If 2nd DBuf slice is no more required disable it */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
 }
 
 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
@@ -12298,7 +12432,7 @@  static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 				       to_intel_crtc_state(new_crtc_state));
 
 		if (old_crtc_state->active) {
-			intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
+			intel_crtc_disable_planes(intel_state, intel_crtc);
 			dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state);
 			intel_crtc->active = false;
 			intel_fbc_disable(intel_crtc);
@@ -13033,15 +13167,17 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_framebuffer *old_fb;
-	struct drm_crtc_state *crtc_state = crtc->state;
 	struct i915_vma *old_vma, *vma;
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->state);
+	struct intel_crtc_state *new_crtc_state;
 
 	/*
 	 * When crtc is inactive or there is a modeset pending,
 	 * wait for it to complete in the slowpath
 	 */
-	if (!crtc_state->active || needs_modeset(crtc_state) ||
-	    to_intel_crtc_state(crtc_state)->update_pipe)
+	if (!crtc_state->base.active || needs_modeset(&crtc_state->base) ||
+	    crtc_state->update_pipe)
 		goto slow;
 
 	old_plane_state = plane->state;
@@ -13071,6 +13207,12 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 	if (!new_plane_state)
 		return -ENOMEM;
 
+	new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc));
+	if (!new_crtc_state) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
 	drm_atomic_set_fb_for_plane(new_plane_state, fb);
 
 	new_plane_state->src_x = src_x;
@@ -13082,9 +13224,8 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 	new_plane_state->crtc_w = crtc_w;
 	new_plane_state->crtc_h = crtc_h;
 
-	ret = intel_plane_atomic_check_with_state(to_intel_crtc_state(crtc->state),
-						  to_intel_crtc_state(crtc->state), /* FIXME need a new crtc state? */
-						  to_intel_plane_state(plane->state),
+	ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state,
+						  to_intel_plane_state(old_plane_state),
 						  to_intel_plane_state(new_plane_state));
 	if (ret)
 		goto out_free;
@@ -13121,14 +13262,25 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 	/* Swap plane state */
 	plane->state = new_plane_state;
 
+	/*
+	 * We cannot swap crtc_state as it may be in use by an atomic commit or
+	 * page flip that's running simultaneously. If we swap crtc_state and
+	 * destroy the old state, we will cause a use-after-free there.
+	 *
+	 * Only update active_planes, which is needed for our internal
+	 * bookkeeping. Either value will do the right thing when updating
+	 * planes atomically. If the cursor was part of the atomic update then
+	 * we would have taken the slowpath.
+	 */
+	crtc_state->active_planes = new_crtc_state->active_planes;
+
 	if (plane->state->visible) {
 		trace_intel_update_plane(plane, to_intel_crtc(crtc));
-		intel_plane->update_plane(intel_plane,
-					  to_intel_crtc_state(crtc->state),
+		intel_plane->update_plane(intel_plane, crtc_state,
 					  to_intel_plane_state(plane->state));
 	} else {
 		trace_intel_disable_plane(plane, to_intel_crtc(crtc));
-		intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
+		intel_plane->disable_plane(intel_plane, crtc_state);
 	}
 
 	old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma);
@@ -13138,6 +13290,8 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 out_free:
+	if (new_crtc_state)
+		intel_crtc_destroy_state(crtc, &new_crtc_state->base);
 	if (ret)
 		intel_plane_destroy_state(plane, new_plane_state);
 	else
@@ -13938,7 +14092,7 @@  static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	 * gen2/3 display engine uses the fence if present,
 	 * so the tiling mode must match the fb modifier exactly.
 	 */
-	if (INTEL_INFO(dev_priv)->gen < 4 &&
+	if (INTEL_GEN(dev_priv) < 4 &&
 	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
 		DRM_DEBUG_KMS("tiling_mode must match fb modifier exactly on gen2/3\n");
 		goto err;
@@ -14126,7 +14280,7 @@  void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 {
 	intel_init_cdclk_hooks(dev_priv);
 
-	if (INTEL_INFO(dev_priv)->gen >= 9) {
+	if (INTEL_GEN(dev_priv) >= 9) {
 		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
 			skylake_get_initial_plane_config;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2398afb9825c..a9af1f7201bf 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1417,7 +1417,7 @@  static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv,
 static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv,
 				    enum port port)
 {
-	if (INTEL_INFO(dev_priv)->gen >= 9)
+	if (INTEL_GEN(dev_priv) >= 9)
 		return skl_aux_ctl_reg(dev_priv, port);
 	else if (HAS_PCH_SPLIT(dev_priv))
 		return ilk_aux_ctl_reg(dev_priv, port);
@@ -1428,7 +1428,7 @@  static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv,
 static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv,
 				     enum port port, int index)
 {
-	if (INTEL_INFO(dev_priv)->gen >= 9)
+	if (INTEL_GEN(dev_priv) >= 9)
 		return skl_aux_data_reg(dev_priv, port, index);
 	else if (HAS_PCH_SPLIT(dev_priv))
 		return ilk_aux_data_reg(dev_priv, port, index);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index aa8097bf8a2c..01b480bdd996 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -141,6 +141,10 @@ 
 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
 
+#define KBps(x) (1000 * (x))
+#define MBps(x) KBps(1000 * (x))
+#define GBps(x) ((u64)1000 * MBps((x)))
+
 /*
  * Display related stuff
  */
@@ -395,7 +399,7 @@  struct intel_atomic_state {
 	bool skip_intermediate_wm;
 
 	/* Gen9+ only */
-	struct skl_wm_values wm_results;
+	struct skl_ddb_values wm_results;
 
 	struct i915_sw_fence commit_ready;
 
@@ -439,6 +443,26 @@  struct intel_plane_state {
 	 */
 	int scaler_id;
 
+	/*
+	 * linked_plane:
+	 *
+	 * ICL planar formats require 2 planes that are updated as pairs.
+	 * This member is used to make sure the other plane is also updated
+	 * when required, and for update_slave() to find the correct
+	 * plane_state to pass as argument.
+	 */
+	struct intel_plane *linked_plane;
+
+	/*
+	 * slave:
+	 * If set don't update use the linked plane's state for updating
+	 * this plane during atomic commit with the update_slave() callback.
+	 *
+	 * It's also used by the watermark code to ignore wm calculations on
+	 * this plane. They're calculated by the linked plane's wm code.
+	 */
+	u32 slave;
+
 	struct drm_intel_sprite_colorkey ckey;
 };
 
@@ -507,7 +531,9 @@  struct intel_pipe_wm {
 
 struct skl_plane_wm {
 	struct skl_wm_level wm[8];
+	struct skl_wm_level uv_wm[8];
 	struct skl_wm_level trans_wm;
+	bool is_planar;
 };
 
 struct skl_pipe_wm {
@@ -572,6 +598,8 @@  struct intel_crtc_wm_state {
 			/* gen9+ only needs 1-step wm programming */
 			struct skl_pipe_wm optimal;
 			struct skl_ddb_entry ddb;
+			struct skl_ddb_entry plane_ddb_y[I915_MAX_PLANES];
+			struct skl_ddb_entry plane_ddb_uv[I915_MAX_PLANES];
 		} skl;
 
 		struct {
@@ -780,6 +808,10 @@  struct intel_crtc_state {
 
 	/* bitmask of visible planes (enum plane_id) */
 	u8 active_planes;
+	u8 nv12_planes;
+
+	/* bitmask of planes that will be updated during the commit */
+	u8 update_planes;
 
 	/* HDMI scrambling status */
 	bool hdmi_scrambling;
@@ -859,8 +891,11 @@  struct intel_plane {
 	void (*update_plane)(struct intel_plane *plane,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct intel_plane_state *plane_state);
+	void (*update_slave)(struct intel_plane *plane,
+			     const struct intel_crtc_state *crtc_state,
+			     const struct intel_plane_state *plane_state);
 	void (*disable_plane)(struct intel_plane *plane,
-			      struct intel_crtc *crtc);
+			      const struct intel_crtc_state *crtc_state);
 	bool (*get_hw_state)(struct intel_plane *plane);
 	int (*check_plane)(struct intel_plane *plane,
 			   struct intel_crtc_state *crtc_state,
@@ -1188,6 +1223,27 @@  hdmi_to_dig_port(struct intel_hdmi *intel_hdmi)
 	return container_of(intel_hdmi, struct intel_digital_port, hdmi);
 }
 
+static inline struct intel_plane_state *
+intel_atomic_get_plane_state(struct intel_atomic_state *state,
+				 struct intel_plane *plane)
+{
+	struct drm_plane_state *ret =
+		drm_atomic_get_plane_state(&state->base, &plane->base);
+
+	if (IS_ERR(ret))
+		return ERR_CAST(ret);
+
+	return to_intel_plane_state(ret);
+}
+
+static inline struct intel_plane_state *
+intel_atomic_get_old_plane_state(struct intel_atomic_state *state,
+				 struct intel_plane *plane)
+{
+	return to_intel_plane_state(drm_atomic_get_old_plane_state(&state->base,
+								   &plane->base));
+}
+
 static inline struct intel_plane_state *
 intel_atomic_get_new_plane_state(struct intel_atomic_state *state,
 				 struct intel_plane *plane)
@@ -1497,6 +1553,7 @@  u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 		     unsigned int rotation);
 int skl_check_plane_surface(struct intel_plane_state *plane_state);
 int i9xx_check_plane_surface(struct intel_plane_state *plane_state);
+int skl_format_to_fourcc(int format, bool rgb_order, bool alpha);
 
 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
@@ -1783,6 +1840,8 @@  bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 					enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices);
 
 static inline void
 assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
@@ -1879,6 +1938,9 @@  void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
+void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
+			       struct skl_ddb_entry *ddb_y,
+			       struct skl_ddb_entry *ddb_uv);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
@@ -1890,10 +1952,13 @@  int intel_enable_sagv(struct drm_i915_private *dev_priv);
 int intel_disable_sagv(struct drm_i915_private *dev_priv);
 bool skl_wm_level_equals(const struct skl_wm_level *l1,
 			 const struct skl_wm_level *l2);
-bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
-				 const struct skl_ddb_entry **entries,
-				 const struct skl_ddb_entry *ddb,
-				 int ignore);
+bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
+				 const struct skl_ddb_entry entries[],
+				 int num_entries, int ignore_idx);
+void skl_write_plane_wm(struct intel_plane *plane,
+			const struct intel_crtc_state *crtc_state);
+void skl_write_cursor_wm(struct intel_plane *plane,
+			 const struct intel_crtc_state *crtc_state);
 bool ilk_disable_lp_wm(struct drm_device *dev);
 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
@@ -1922,9 +1987,19 @@  void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state);
 void skl_update_plane(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state,
 		      const struct intel_plane_state *plane_state);
-void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc);
+void skl_disable_plane(struct intel_plane *plane,
+		       const struct intel_crtc_state *crtc_state);
 bool skl_plane_get_hw_state(struct intel_plane *plane);
 
+static inline bool icl_is_nv12_y_plane(enum plane_id id)
+{
+	/* Don't need to do a gen check, these planes are only available on gen11 */
+	if (id == PLANE_SPRITE4 || id == PLANE_SPRITE5)
+		return true;
+
+	return false;
+}
+
 /* intel_tv.c */
 void intel_tv_init(struct drm_i915_private *dev_priv);
 
@@ -1974,17 +2049,6 @@  intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
 		return NULL;
 }
 
-static inline struct intel_plane_state *
-intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
-				      struct intel_plane *plane)
-{
-	struct drm_plane_state *plane_state;
-
-	plane_state = drm_atomic_get_existing_plane_state(state, &plane->base);
-
-	return to_intel_plane_state(plane_state);
-}
-
 int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 			       struct intel_crtc *intel_crtc,
 			       struct intel_crtc_state *crtc_state);
@@ -1995,6 +2059,10 @@  struct drm_plane_state *intel_plane_duplicate_state(struct drm_plane *plane);
 void intel_plane_destroy_state(struct drm_plane *plane,
 			       struct drm_plane_state *state);
 extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
+void intel_update_planes_on_crtc(struct intel_atomic_state *old_state,
+				 struct intel_crtc *crtc,
+				 struct intel_crtc_state *old_crtc_state,
+				 struct intel_crtc_state *new_crtc_state);
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *crtc_state,
 					const struct intel_plane_state *old_plane_state,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 49aa392d23e8..9542a5365f48 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -187,7 +187,7 @@  static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
 	/* Convert from 100ms to 100us units */
 	pps->t4 = val * 1000;
 
-	if (INTEL_INFO(dev_priv)->gen <= 4 &&
+	if (INTEL_GEN(dev_priv) <= 4 &&
 	    pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
 		DRM_DEBUG_KMS("Panel power timings uninitialized, "
 			      "setting defaults\n");
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index f4c46b0b8f0a..abb7a8c1e340 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -187,7 +187,7 @@  static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 		table->table = broxton_mocs_table;
 		result = true;
 	} else {
-		WARN_ONCE(INTEL_INFO(dev_priv)->gen >= 9,
+		WARN_ONCE(INTEL_GEN(dev_priv) >= 9,
 			  "Platform that should have a MOCS table does not.\n");
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 76f3aa5af055..7ecd92ffe478 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -497,7 +497,7 @@  static u32 i9xx_get_backlight(struct intel_connector *connector)
 	u32 val;
 
 	val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
-	if (INTEL_INFO(dev_priv)->gen < 4)
+	if (INTEL_GEN(dev_priv) < 4)
 		val >>= 1;
 
 	if (panel->backlight.combination_mode) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c4e5db551fc2..a4117b3af73f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2852,6 +2852,15 @@  static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 			}
 		}
 
+		/*
+		 * WA Level-0 adjustment for 16GB DIMMs: SKL+
+		 * If we could not get dimm info enable this WA to prevent from
+		 * any underrun. If not able to get Dimm info assume 16GB dimm
+		 * to avoid any underrun.
+		 */
+		if (dev_priv->dram_info.is_16gb_dimm)
+			wm[0] += 1;
+
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		uint64_t sskpd = I915_READ64(MCH_SSKPD);
 
@@ -2924,8 +2933,8 @@  static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
 		unsigned int latency = wm[level];
 
 		if (latency == 0) {
-			DRM_ERROR("%s WM%d latency not provided\n",
-				  name, level);
+			DRM_DEBUG_KMS("%s WM%d latency not provided\n",
+				      name, level);
 			continue;
 		}
 
@@ -3549,6 +3558,11 @@  bool ilk_disable_lp_wm(struct drm_device *dev)
 	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
 }
 
+static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
+{
+	return 1;
+}
+
 /*
  * FIXME: We still don't have the proper code detect if we need to apply the WA,
  * so assume we'll always need it in order to avoid underruns.
@@ -3729,18 +3743,54 @@  bool intel_can_enable_sagv(struct drm_atomic_state *state)
 	return true;
 }
 
+static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
+			      const struct intel_crtc_state *cstate,
+			      const u64 total_data_rate,
+			      const int num_active,
+			      struct skl_ddb_allocation *ddb)
+{
+	const struct drm_display_mode *adjusted_mode;
+	u64 total_data_bw;
+	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
+
+	WARN_ON(ddb_size == 0);
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return ddb_size - 4; /* 4 blocks for bypass path allocation */
+
+	adjusted_mode = &cstate->base.adjusted_mode;
+	total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
+
+	/*
+	 * 12GB/s is maximum BW supported by single DBuf slice.
+	 */
+	if (num_active > 1 || total_data_bw >= GBps(12)) {
+		ddb->enabled_slices = 2;
+	} else {
+		ddb->enabled_slices = 1;
+		ddb_size /= 2;
+	}
+
+	return ddb_size;
+}
+
 static void
-skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
+skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 				   const struct intel_crtc_state *cstate,
+				   const u64 total_data_rate,
+				   struct skl_ddb_allocation *ddb,
 				   struct skl_ddb_entry *alloc, /* out */
 				   int *num_active /* out */)
 {
 	struct drm_atomic_state *state = cstate->base.state;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *for_crtc = cstate->base.crtc;
-	unsigned int pipe_size, ddb_size;
-	int nth_active_pipe;
+	const struct drm_crtc_state *crtc_state;
+	const struct drm_crtc *crtc;
+	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
+	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
+	u16 ddb_size;
+	u32 i;
 
 	if (WARN_ON(!state) || !cstate->base.active) {
 		alloc->start = 0;
@@ -3754,20 +3804,18 @@  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 	else
 		*num_active = hweight32(dev_priv->active_crtcs);
 
-	ddb_size = INTEL_INFO(dev_priv)->ddb_size;
-	WARN_ON(ddb_size == 0);
-
-	ddb_size -= 4; /* 4 blocks for bypass path allocation */
+	ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
+				      *num_active, ddb);
 
 	/*
-	 * If the state doesn't change the active CRTC's, then there's
-	 * no need to recalculate; the existing pipe allocation limits
-	 * should remain unchanged.  Note that we're safe from racing
-	 * commits since any racing commit that changes the active CRTC
-	 * list would need to grab _all_ crtc locks, including the one
-	 * we currently hold.
+	 * If the state doesn't change the active CRTC's or there is no
+	 * modeset request, then there's no need to recalculate;
+	 * the existing pipe allocation limits should remain unchanged.
+	 * Note that we're safe from racing commits since any racing commit
+	 * that changes the active CRTC list or do modeset would need to
+	 * grab _all_ crtc locks, including the one we currently hold.
 	 */
-	if (!intel_state->active_pipe_changes) {
+	if (!intel_state->active_pipe_changes && !intel_state->modeset) {
 		/*
 		 * alloc may be cleared by clear_intel_crtc_state,
 		 * copy from old state to be sure
@@ -3776,11 +3824,32 @@  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 		return;
 	}
 
-	nth_active_pipe = hweight32(intel_state->active_crtcs &
-				    (drm_crtc_mask(for_crtc) - 1));
-	pipe_size = ddb_size / hweight32(intel_state->active_crtcs);
-	alloc->start = nth_active_pipe * ddb_size / *num_active;
-	alloc->end = alloc->start + pipe_size;
+	/*
+	 * Watermark/ddb requirement highly depends upon width of the
+	 * framebuffer, So instead of allocating DDB equally among pipes
+	 * distribute DDB based on resolution/width of the display.
+	 */
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+		const struct drm_display_mode *adjusted_mode;
+		int hdisplay, vdisplay;
+		enum pipe pipe;
+
+		if (!crtc_state->enable)
+			continue;
+
+		pipe = to_intel_crtc(crtc)->pipe;
+		adjusted_mode = &crtc_state->adjusted_mode;
+		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
+		total_width += hdisplay;
+
+		if (pipe < for_pipe)
+			width_before_pipe += hdisplay;
+		else if (pipe == for_pipe)
+			pipe_width = hdisplay;
+	}
+
+	alloc->start = ddb_size * width_before_pipe / total_width;
+	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
 }
 
 static unsigned int skl_cursor_allocation(int num_active)
@@ -3791,43 +3860,88 @@  static unsigned int skl_cursor_allocation(int num_active)
 	return 8;
 }
 
-static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
+static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
+				       struct skl_ddb_entry *entry, u32 reg)
 {
-	entry->start = reg & 0x3ff;
-	entry->end = (reg >> 16) & 0x3ff;
+	u16 mask;
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		mask = ICL_DDB_ENTRY_MASK;
+	else
+		mask = SKL_DDB_ENTRY_MASK;
+	entry->start = reg & mask;
+	entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
+
 	if (entry->end)
 		entry->end += 1;
 }
 
-void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
-			  struct skl_ddb_allocation *ddb /* out */)
-{
-	struct intel_crtc *crtc;
+static void
+skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
+			   const enum pipe pipe,
+			   const enum plane_id plane_id,
+			   struct skl_ddb_entry *ddb_y,
+			   struct skl_ddb_entry *ddb_uv)
+{
+	u32 val, val2;
+	u32 fourcc = 0;
+
+	/* Cursor doesn't support NV12/planar, so no extra calculation needed */
+	if (plane_id == PLANE_CURSOR) {
+		val = I915_READ(CUR_BUF_CFG(pipe));
+		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
+		return;
+	}
 
-	memset(ddb, 0, sizeof(*ddb));
+	val = I915_READ(PLANE_CTL(pipe, plane_id));
 
-	for_each_intel_crtc(&dev_priv->drm, crtc) {
-		enum intel_display_power_domain power_domain;
-		enum plane_id plane_id;
-		enum pipe pipe = crtc->pipe;
+	/* No DDB allocated for disabled planes */
+	if (val & PLANE_CTL_ENABLE)
+		fourcc = skl_format_to_fourcc(val & PLANE_CTL_FORMAT_MASK,
+					      val & PLANE_CTL_ORDER_RGBX,
+					      val & PLANE_CTL_ALPHA_MASK);
 
-		power_domain = POWER_DOMAIN_PIPE(pipe);
-		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
-			continue;
+	if (INTEL_GEN(dev_priv) >= 11) {
+		val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
+		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
+	} else {
+		val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
+		val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
 
-		for_each_plane_id_on_crtc(crtc, plane_id) {
-			u32 val;
+		if (fourcc == DRM_FORMAT_NV12)
+			swap(val, val2);
 
-			if (plane_id != PLANE_CURSOR)
-				val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
-			else
-				val = I915_READ(CUR_BUF_CFG(pipe));
+		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
+		skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
+	}
+}
 
-			skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val);
-		}
+void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
+			       struct skl_ddb_entry *ddb_y,
+			       struct skl_ddb_entry *ddb_uv)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = crtc->pipe;
+	enum plane_id plane_id;
 
-		intel_display_power_put(dev_priv, power_domain);
-	}
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return;
+
+	for_each_plane_id_on_crtc(crtc, plane_id)
+		skl_ddb_get_hw_plane_state(dev_priv, pipe,
+					   plane_id,
+					   &ddb_y[plane_id],
+					   &ddb_uv[plane_id]);
+
+	intel_display_power_put(dev_priv, power_domain);
+}
+
+void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
+			  struct skl_ddb_allocation *ddb /* out */)
+{
+	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
 }
 
 /*
@@ -3979,28 +4093,29 @@  int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 	return 0;
 }
 
-static unsigned int
+static u64
 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
-			     const struct drm_plane_state *pstate,
-			     int y)
+			     const struct intel_plane_state *intel_pstate,
+			     const int plane)
 {
-	struct intel_plane *plane = to_intel_plane(pstate->plane);
-	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
+	struct intel_plane *intel_plane =
+		to_intel_plane(intel_pstate->base.plane);
 	uint32_t data_rate;
 	uint32_t width = 0, height = 0;
 	struct drm_framebuffer *fb;
 	u32 format;
 	uint_fixed_16_16_t down_scale_amount;
+	u64 rate;
 
 	if (!intel_pstate->base.visible)
 		return 0;
 
-	fb = pstate->fb;
+	fb = intel_pstate->base.fb;
 	format = fb->format->format;
 
-	if (plane->id == PLANE_CURSOR)
+	if (intel_plane->id == PLANE_CURSOR)
 		return 0;
-	if (y && format != DRM_FORMAT_NV12)
+	if (plane == 1 && format != DRM_FORMAT_NV12)
 		return 0;
 
 	/*
@@ -4011,39 +4126,32 @@  skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	/* for planar format */
-	if (format == DRM_FORMAT_NV12) {
-		if (y)  /* y-plane data rate */
-			data_rate = width * height *
-				fb->format->cpp[0];
-		else    /* uv-plane data rate */
-			data_rate = (width / 2) * (height / 2) *
-				fb->format->cpp[1];
-	} else {
-		/* for packed formats */
-		data_rate = width * height * fb->format->cpp[0];
+	/* UV plane does 1/2 pixel sub-sampling */
+	if (plane == 1 && format == DRM_FORMAT_NV12) {
+		width /= 2;
+		height /= 2;
 	}
 
+	data_rate = width * height;
+
 	down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
 
-	return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
+	rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
+
+	rate *= fb->format->cpp[plane];
+	return rate;
 }
 
-/*
- * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
- * a 8192x4096@32bpp framebuffer:
- *   3 * 4096 * 8192  * 4 < 2^32
- */
-static unsigned int
+static u64
 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
-				 unsigned *plane_data_rate,
-				 unsigned *plane_y_data_rate)
+				 u64 *plane_data_rate,
+				 u64 *uv_plane_data_rate)
 {
 	struct drm_crtc_state *cstate = &intel_cstate->base;
 	struct drm_atomic_state *state = cstate->state;
 	struct drm_plane *plane;
 	const struct drm_plane_state *pstate;
-	unsigned int total_data_rate = 0;
+	u64 total_data_rate = 0;
 
 	if (WARN_ON(!state))
 		return 0;
@@ -4051,29 +4159,83 @@  skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
 	/* Calculate and cache data rate for each plane */
 	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
 		enum plane_id plane_id = to_intel_plane(plane)->id;
-		unsigned int rate;
+		u64 rate;
+		const struct intel_plane_state *intel_pstate =
+			to_intel_plane_state(pstate);
 
-		/* packed/uv */
+		/* packed/y */
 		rate = skl_plane_relative_data_rate(intel_cstate,
-						    pstate, 0);
+						    intel_pstate, 0);
 		plane_data_rate[plane_id] = rate;
-
 		total_data_rate += rate;
 
-		/* y-plane */
+		/* uv-plane */
 		rate = skl_plane_relative_data_rate(intel_cstate,
-						    pstate, 1);
-		plane_y_data_rate[plane_id] = rate;
-
+						    intel_pstate, 1);
+		uv_plane_data_rate[plane_id] = rate;
 		total_data_rate += rate;
 	}
 
 	return total_data_rate;
 }
 
+static u64
+icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
+				 u64 *plane_data_rate)
+{
+	struct drm_crtc_state *cstate = &intel_cstate->base;
+	struct drm_atomic_state *state = cstate->state;
+	struct drm_plane *plane;
+	const struct drm_plane_state *pstate;
+	u64 total_data_rate = 0;
+
+	if (WARN_ON(!state))
+		return 0;
+
+	/* Calculate and cache data rate for each plane */
+	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
+		const struct intel_plane_state *intel_pstate =
+			to_intel_plane_state(pstate);
+		enum plane_id plane_id = to_intel_plane(plane)->id;
+		u64 rate;
+
+		if (!intel_pstate->linked_plane) {
+			rate = skl_plane_relative_data_rate(intel_cstate,
+							    intel_pstate, 0);
+			plane_data_rate[plane_id] = rate;
+			total_data_rate += rate;
+		} else {
+			enum plane_id y_plane_id;
+
+			/*
+			 * The slave plane might not iterate in
+			 * drm_atomic_crtc_state_for_each_plane_state(),
+			 * and needs the master plane state which may be
+			 * NULL if we try get_new_plane_state(), so we
+			 * always calculate from the master.
+			 */
+			if (intel_pstate->slave)
+				continue;
+
+			/* Y plane rate is calculated on the slave */
+			rate = skl_plane_relative_data_rate(intel_cstate,
+							    intel_pstate, 0);
+			y_plane_id = intel_pstate->linked_plane->id;
+			plane_data_rate[y_plane_id] = rate;
+			total_data_rate += rate;
+
+			rate = skl_plane_relative_data_rate(intel_cstate,
+							    intel_pstate, 1);
+			plane_data_rate[plane_id] = rate;
+			total_data_rate += rate;
+		}
+	}
+
+	return total_data_rate;
+}
+
 static uint16_t
-skl_ddb_min_alloc(const struct drm_plane_state *pstate,
-		  const int y)
+skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane)
 {
 	struct drm_framebuffer *fb = pstate->fb;
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
@@ -4084,8 +4246,8 @@  skl_ddb_min_alloc(const struct drm_plane_state *pstate,
 	if (WARN_ON(!fb))
 		return 0;
 
-	/* For packed formats, no y-plane, return 0 */
-	if (y && fb->format->format != DRM_FORMAT_NV12)
+	/* For packed formats, and uv-plane, return 0 */
+	if (plane == 1 && fb->format->format != DRM_FORMAT_NV12)
 		return 0;
 
 	/* For Non Y-tile return 8-blocks */
@@ -4104,15 +4266,12 @@  skl_ddb_min_alloc(const struct drm_plane_state *pstate,
 	src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
 
 	/* Halve UV plane width and height for NV12 */
-	if (fb->format->format == DRM_FORMAT_NV12 && !y) {
+	if (plane == 1) {
 		src_w /= 2;
 		src_h /= 2;
 	}
 
-	if (fb->format->format == DRM_FORMAT_NV12 && !y)
-		plane_bpp = fb->format->cpp[1];
-	else
-		plane_bpp = fb->format->cpp[0];
+	plane_bpp = fb->format->cpp[plane];
 
 	if (drm_rotation_90_or_270(pstate->rotation)) {
 		switch (plane_bpp) {
@@ -4140,22 +4299,32 @@  skl_ddb_min_alloc(const struct drm_plane_state *pstate,
 
 static void
 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
-		 uint16_t *minimum, uint16_t *y_minimum)
+		 uint16_t *minimum, uint16_t *uv_minimum)
 {
 	const struct drm_plane_state *pstate;
 	struct drm_plane *plane;
 
 	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
 		enum plane_id plane_id = to_intel_plane(plane)->id;
+		struct intel_plane_state *plane_state = to_intel_plane_state(pstate);
 
 		if (plane_id == PLANE_CURSOR)
 			continue;
 
-		if (!pstate->visible)
+		/* slave plane must be invisible and calculated from master */
+		if (!pstate->visible || WARN_ON(plane_state->slave))
 			continue;
 
-		minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
-		y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
+		if (!plane_state->linked_plane) {
+			minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
+			uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
+		} else {
+			enum plane_id y_plane_id =
+				plane_state->linked_plane->id;
+
+			minimum[y_plane_id] = skl_ddb_min_alloc(pstate, 0);
+			minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
+		}
 	}
 
 	minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
@@ -4167,23 +4336,22 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 {
 	struct drm_atomic_state *state = cstate->base.state;
 	struct drm_crtc *crtc = cstate->base.crtc;
-	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
 	struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
 	uint16_t alloc_size, start;
 	uint16_t minimum[I915_MAX_PLANES] = {};
-	uint16_t y_minimum[I915_MAX_PLANES] = {};
-	unsigned int total_data_rate;
+	uint16_t uv_minimum[I915_MAX_PLANES] = {};
+	u64 total_data_rate;
 	enum plane_id plane_id;
 	int num_active;
-	unsigned plane_data_rate[I915_MAX_PLANES] = {};
-	unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
+	u64 plane_data_rate[I915_MAX_PLANES] = {};
+	u64 uv_plane_data_rate[I915_MAX_PLANES] = {};
 	uint16_t total_min_blocks = 0;
 
 	/* Clear the partitioning for disabled planes. */
-	memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
-	memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
+	memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y));
+	memset(cstate->wm.skl.plane_ddb_uv, 0, sizeof(cstate->wm.skl.plane_ddb_uv));
 
 	if (WARN_ON(!state))
 		return 0;
@@ -4193,12 +4361,23 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		return 0;
 	}
 
-	skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
+	if (INTEL_GEN(dev_priv) < 11)
+		total_data_rate =
+			skl_get_total_relative_data_rate(cstate,
+							 plane_data_rate,
+							 uv_plane_data_rate);
+	else
+		total_data_rate =
+			icl_get_total_relative_data_rate(cstate,
+							 plane_data_rate);
+
+	skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
+					   ddb, alloc, &num_active);
 	alloc_size = skl_ddb_entry_size(alloc);
 	if (alloc_size == 0)
 		return 0;
 
-	skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
+	skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum);
 
 	/*
 	 * 1. Allocate the mininum required blocks for each active plane
@@ -4208,7 +4387,7 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
 		total_min_blocks += minimum[plane_id];
-		total_min_blocks += y_minimum[plane_id];
+		total_min_blocks += uv_minimum[plane_id];
 	}
 
 	if (total_min_blocks > alloc_size) {
@@ -4219,8 +4398,8 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	}
 
 	alloc_size -= total_min_blocks;
-	ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
-	ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
+	cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
+	cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
 
 	/*
 	 * 2. Distribute the remaining space in proportion to the amount of
@@ -4228,16 +4407,13 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	 *
 	 * FIXME: we may not allocate every single block here.
 	 */
-	total_data_rate = skl_get_total_relative_data_rate(cstate,
-							   plane_data_rate,
-							   plane_y_data_rate);
 	if (total_data_rate == 0)
 		return 0;
 
 	start = alloc->start;
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
-		unsigned int data_rate, y_data_rate;
-		uint16_t plane_blocks, y_plane_blocks = 0;
+		u64 data_rate, uv_data_rate;
+		uint16_t plane_blocks, uv_plane_blocks;
 
 		if (plane_id == PLANE_CURSOR)
 			continue;
@@ -4250,32 +4426,32 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		 * result is < available as data_rate / total_data_rate < 1
 		 */
 		plane_blocks = minimum[plane_id];
-		plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
-					total_data_rate);
+		plane_blocks += div64_u64(alloc_size * data_rate, total_data_rate);
 
 		/* Leave disabled planes at (0,0) */
 		if (data_rate) {
-			ddb->plane[pipe][plane_id].start = start;
-			ddb->plane[pipe][plane_id].end = start + plane_blocks;
+			cstate->wm.skl.plane_ddb_y[plane_id].start = start;
+			cstate->wm.skl.plane_ddb_y[plane_id].end = start + plane_blocks;
 		}
 
 		start += plane_blocks;
 
-		/*
-		 * allocation for y_plane part of planar format:
-		 */
-		y_data_rate = plane_y_data_rate[plane_id];
+		/* Allocate DDB for UV plane for planar format/NV12 */
+		uv_data_rate = uv_plane_data_rate[plane_id];
 
-		y_plane_blocks = y_minimum[plane_id];
-		y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
-					total_data_rate);
+		uv_plane_blocks = uv_minimum[plane_id];
+		uv_plane_blocks += div64_u64(alloc_size * uv_data_rate, total_data_rate);
 
-		if (y_data_rate) {
-			ddb->y_plane[pipe][plane_id].start = start;
-			ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
+		/* Gen11+ uses a separate plane for UV watermarks */
+		WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_plane_blocks);
+
+		if (uv_data_rate) {
+			cstate->wm.skl.plane_ddb_uv[plane_id].start = start;
+			cstate->wm.skl.plane_ddb_uv[plane_id].end =
+				start + uv_plane_blocks;
 		}
 
-		start += y_plane_blocks;
+		start += uv_plane_blocks;
 	}
 
 	return 0;
@@ -4289,7 +4465,7 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 */
 static uint_fixed_16_16_t
 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
-	       uint8_t cpp, uint32_t latency)
+	       uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
 {
 	uint32_t wm_intermediate_val;
 	uint_fixed_16_16_t ret;
@@ -4298,7 +4474,7 @@  skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
 		return FP_16_16_MAX;
 
 	wm_intermediate_val = latency * pixel_rate * cpp;
-	ret = div_fixed16(wm_intermediate_val, 1000 * 512);
+	ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
 
 	if (INTEL_GEN(dev_priv) >= 10)
 		ret = add_fixed16_u32(ret, 1);
@@ -4325,7 +4501,7 @@  static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
 }
 
 static uint_fixed_16_16_t
-intel_get_linetime_us(struct intel_crtc_state *cstate)
+intel_get_linetime_us(const struct intel_crtc_state *cstate)
 {
 	uint32_t pixel_rate;
 	uint32_t crtc_htotal;
@@ -4368,12 +4544,12 @@  skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
 }
 
 static int
-skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
-			    struct intel_crtc_state *cstate,
+skl_compute_plane_wm_params(const struct intel_crtc_state *cstate,
 			    const struct intel_plane_state *intel_pstate,
-			    struct skl_wm_params *wp)
+			    struct skl_wm_params *wp, int plane_id)
 {
 	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	const struct drm_framebuffer *fb = pstate->fb;
 	uint32_t interm_pbpl;
@@ -4381,8 +4557,11 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 		to_intel_atomic_state(cstate->base.state);
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 
-	if (!intel_wm_plane_visible(cstate, intel_pstate))
-		return 0;
+	/* only NV12 format has two planes */
+	if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) {
+		DRM_DEBUG_KMS("Non NV12 format have single plane\n");
+		return -EINVAL;
+	}
 
 	wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
 		      fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
@@ -4391,6 +4570,7 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 	wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
 	wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 			 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
+	wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
 
 	if (plane->id == PLANE_CURSOR) {
 		wp->width = intel_pstate->base.crtc_w;
@@ -4403,11 +4583,19 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 		wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	}
 
-	wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
-							    fb->format->cpp[0];
+	if (plane_id == 1 && wp->is_planar)
+		wp->width /= 2;
+
+	wp->cpp = fb->format->cpp[plane_id];
 	wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
 							     intel_pstate);
 
+	if (INTEL_GEN(dev_priv) >= 11 &&
+	    fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
+		wp->dbuf_block_size = 256;
+	else
+		wp->dbuf_block_size = 512;
+
 	if (drm_rotation_90_or_270(pstate->rotation)) {
 
 		switch (wp->cpp) {
@@ -4434,7 +4622,8 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 	wp->plane_bytes_per_line = wp->width * wp->cpp;
 	if (wp->y_tiled) {
 		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
-					   wp->y_min_scanlines, 512);
+					   wp->y_min_scanlines,
+					   wp->dbuf_block_size);
 
 		if (INTEL_GEN(dev_priv) >= 10)
 			interm_pbpl++;
@@ -4442,10 +4631,12 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 		wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
 							wp->y_min_scanlines);
 	} else if (wp->x_tiled && IS_GEN9(dev_priv)) {
-		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+					   wp->dbuf_block_size);
 		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	} else {
-		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+					   wp->dbuf_block_size) + 1;
 		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	}
 
@@ -4457,16 +4648,16 @@  skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 	return 0;
 }
 
-static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-				struct intel_crtc_state *cstate,
+static int skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 				const struct intel_plane_state *intel_pstate,
 				uint16_t ddb_allocation,
 				int level,
 				const struct skl_wm_params *wp,
-				uint16_t *out_blocks, /* out */
-				uint8_t *out_lines, /* out */
-				bool *enabled /* out */)
+				const struct skl_wm_level *result_prev,
+				struct skl_wm_level *result /* out */)
 {
+	struct drm_i915_private *dev_priv =
+		to_i915(intel_pstate->base.plane->dev);
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
@@ -4475,12 +4666,10 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(cstate->base.state);
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+	uint32_t min_disp_buf_needed;
 
-	if (latency == 0 ||
-	    !intel_wm_plane_visible(cstate, intel_pstate)) {
-		*enabled = false;
-		return 0;
-	}
+	if (latency == 0)
+		return level == 0 ? -EINVAL : 0;
 
 	/* Display WA #1141: kbl,cfl */
 	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
@@ -4492,7 +4681,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		latency += 15;
 
 	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
-				 wp->cpp, latency);
+				 wp->cpp, latency, wp->dbuf_block_size);
 	method2 = skl_wm_method2(wp->plane_pixel_rate,
 				 cstate->base.adjusted_mode.crtc_htotal,
 				 latency,
@@ -4502,15 +4691,25 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		selected_result = max_fixed16(method2, wp->y_tile_minimum);
 	} else {
 		if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
-		     512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
+		     wp->dbuf_block_size < 1) &&
+		     (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) {
 			selected_result = method2;
-		else if (ddb_allocation >=
-			 fixed16_to_u32_round_up(wp->plane_blocks_per_line))
-			selected_result = min_fixed16(method1, method2);
-		else if (latency >= wp->linetime_us)
-			selected_result = min_fixed16(method1, method2);
-		else
+		} else if (ddb_allocation >=
+			 fixed16_to_u32_round_up(wp->plane_blocks_per_line)) {
+			if (INTEL_GEN(dev_priv) == 9 &&
+			    !IS_GEMINILAKE(dev_priv))
+				selected_result = min_fixed16(method1, method2);
+			else
+				selected_result = method2;
+		} else if (latency >= wp->linetime_us) {
+			if (INTEL_GEN(dev_priv) == 9 &&
+			    !IS_GEMINILAKE(dev_priv))
+				selected_result = min_fixed16(method1, method2);
+			else
+				selected_result = method2;
+		} else {
 			selected_result = method1;
+		}
 	}
 
 	res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
@@ -4530,11 +4729,43 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		} else {
 			res_blocks++;
 		}
+
+		/*
+		 * Make sure result blocks for higher latency levels are atleast
+		 * as high as level below the current level.
+		 * Assumption in DDB algorithm optimization for special cases.
+		 * Also covers Display WA #1125 for RC.
+		 */
+		if (result_prev->plane_res_b > res_blocks)
+			res_blocks = result_prev->plane_res_b;
 	}
 
-	if (res_blocks >= ddb_allocation || res_lines > 31) {
-		*enabled = false;
+	if (INTEL_GEN(dev_priv) >= 11) {
+		if (wp->y_tiled) {
+			uint32_t extra_lines;
+			uint_fixed_16_16_t fp_min_disp_buf_needed;
 
+			if (res_lines % wp->y_min_scanlines == 0)
+				extra_lines = wp->y_min_scanlines;
+			else
+				extra_lines = wp->y_min_scanlines * 2 -
+					      res_lines % wp->y_min_scanlines;
+
+			fp_min_disp_buf_needed = mul_u32_fixed16(res_lines +
+						extra_lines,
+						wp->plane_blocks_per_line);
+			min_disp_buf_needed = fixed16_to_u32_round_up(
+						fp_min_disp_buf_needed);
+		} else {
+			min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10);
+		}
+	} else {
+		min_disp_buf_needed = res_blocks;
+	}
+
+	if ((level > 0 && res_lines > 31) ||
+	    res_blocks >= ddb_allocation ||
+	    min_disp_buf_needed >= ddb_allocation) {
 		/*
 		 * If there are no valid level 0 watermarks, then we can't
 		 * support this display configuration.
@@ -4552,55 +4783,59 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		}
 	}
 
-	*out_blocks = res_blocks;
-	*out_lines = res_lines;
-	*enabled = true;
+	/*
+	 * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A)
+	 * disable wm level 1-7 on NV12 planes
+	 */
+	if (wp->is_planar && level >= 1 &&
+	    (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
+	     IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) {
+		result->plane_en = false;
+		return 0;
+	}
+
+	/* The number of lines are ignored for the level 0 watermark. */
+	result->plane_res_b = res_blocks;
+	result->plane_res_l = res_lines;
+	result->plane_en = true;
 
 	return 0;
 }
 
 static int
-skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
-		      struct skl_ddb_allocation *ddb,
-		      struct intel_crtc_state *cstate,
+skl_compute_wm_levels(const struct intel_crtc_state *cstate,
 		      const struct intel_plane_state *intel_pstate,
+		      uint16_t ddb_blocks,
 		      const struct skl_wm_params *wm_params,
-		      struct skl_plane_wm *wm)
+		      struct skl_wm_level *levels)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
-	struct drm_plane *plane = intel_pstate->base.plane;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	uint16_t ddb_blocks;
-	enum pipe pipe = intel_crtc->pipe;
+	struct drm_i915_private *dev_priv =
+		to_i915(intel_pstate->base.plane->dev);
 	int level, max_level = ilk_wm_max_level(dev_priv);
+	struct skl_wm_level *result_prev = &levels[0];
 	int ret;
 
-	if (WARN_ON(!intel_pstate->base.fb))
-		return -EINVAL;
-
-	ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
-
 	for (level = 0; level <= max_level; level++) {
-		struct skl_wm_level *result = &wm->wm[level];
+		struct skl_wm_level *result = &levels[level];
 
-		ret = skl_compute_plane_wm(dev_priv,
-					   cstate,
+		ret = skl_compute_plane_wm(cstate,
 					   intel_pstate,
 					   ddb_blocks,
 					   level,
 					   wm_params,
-					   &result->plane_res_b,
-					   &result->plane_res_l,
-					   &result->plane_en);
+					   result_prev,
+					   result);
 		if (ret)
 			return ret;
+
+		result_prev = result;
 	}
 
 	return 0;
 }
 
 static uint32_t
-skl_compute_linetime_wm(struct intel_crtc_state *cstate)
+skl_compute_linetime_wm(const struct intel_crtc_state *cstate)
 {
 	struct drm_atomic_state *state = cstate->base.state;
 	struct drm_i915_private *dev_priv = to_i915(state->dev);
@@ -4622,41 +4857,50 @@  skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 	return linetime_wm;
 }
 
-static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
-				      struct skl_wm_params *wp,
-				      struct skl_wm_level *wm_l0,
-				      uint16_t ddb_allocation,
-				      struct skl_wm_level *trans_wm /* out */)
+static void skl_compute_transition_wm(const struct intel_crtc_state *cstate,
+				      const struct skl_wm_params *wp,
+				      struct skl_plane_wm *wm,
+				      uint16_t ddb_allocation)
 {
 	struct drm_device *dev = cstate->base.crtc->dev;
 	const struct drm_i915_private *dev_priv = to_i915(dev);
 	uint16_t trans_min, trans_y_tile_min;
 	const uint16_t trans_amount = 10; /* This is configurable amount */
-	uint16_t trans_offset_b, res_blocks;
-
-	if (!cstate->base.active)
-		goto exit;
+	uint16_t wm0_sel_res_b, trans_offset_b, res_blocks;
 
 	/* Transition WM are not recommended by HW team for GEN9 */
 	if (INTEL_GEN(dev_priv) <= 9)
-		goto exit;
+		return;
 
 	/* Transition WM don't make any sense if ipc is disabled */
 	if (!dev_priv->ipc_enabled)
-		goto exit;
+		return;
 
-	if (INTEL_GEN(dev_priv) >= 10)
+	trans_min = 14;
+	if (INTEL_GEN(dev_priv) >= 11)
 		trans_min = 4;
 
 	trans_offset_b = trans_min + trans_amount;
 
+	/*
+	 * The spec asks for Selected Result Blocks for wm0 (the real value),
+	 * not Result Blocks (the integer value). Pay attention to the capital
+	 * letters. The value wm_l0->plane_res_b is actually Result Blocks, but
+	 * since Result Blocks is the ceiling of Selected Result Blocks plus 1,
+	 * and since we later will have to get the ceiling of the sum in the
+	 * transition watermarks calculation, we can just pretend Selected
+	 * Result Blocks is Result Blocks minus 1 and it should work for the
+	 * current platforms.
+	 */
+	wm0_sel_res_b = wm->wm[0].plane_res_b - 1;
+
 	if (wp->y_tiled) {
 		trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
 							wp->y_tile_minimum);
-		res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
+		res_blocks = max(wm0_sel_res_b, trans_y_tile_min) +
 				trans_offset_b;
 	} else {
-		res_blocks = wm_l0->plane_res_b + trans_offset_b;
+		res_blocks = wm0_sel_res_b + trans_offset_b;
 
 		/* WA BUG:1938466 add one block for non y-tile planes */
 		if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
@@ -4667,25 +4911,131 @@  static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
 	res_blocks += 1;
 
 	if (res_blocks < ddb_allocation) {
-		trans_wm->plane_res_b = res_blocks;
-		trans_wm->plane_en = true;
-		return;
+		wm->trans_wm.plane_res_b = res_blocks;
+		wm->trans_wm.plane_en = true;
+	}
+}
+
+static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
+				     const struct intel_plane_state *plane_state,
+				     enum plane_id plane_id, int color_plane)
+{
+	struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
+	u16 ddb_blocks = skl_ddb_entry_size(&crtc_state->wm.skl.plane_ddb_y[plane_id]);
+	struct skl_wm_params wm_params;
+	int ret;
+
+	ret = skl_compute_plane_wm_params(crtc_state, plane_state,
+					  &wm_params, color_plane);
+	if (ret)
+		return ret;
+
+	ret = skl_compute_wm_levels(crtc_state, plane_state,
+				    ddb_blocks, &wm_params, wm->wm);
+	if (ret)
+		return ret;
+
+	skl_compute_transition_wm(crtc_state, &wm_params, wm, ddb_blocks);
+
+	return 0;
+}
+
+static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
+				 const struct intel_plane_state *plane_state,
+				 enum plane_id plane_id)
+{
+	struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];
+	u16 ddb_blocks = skl_ddb_entry_size(&crtc_state->wm.skl.plane_ddb_uv[plane_id]);
+	struct skl_wm_params wm_params;
+	int ret;
+
+	wm->is_planar = true;
+
+	/* uv plane watermarks must also be validated for NV12/Planar */
+	ret = skl_compute_plane_wm_params(crtc_state, plane_state,
+					  &wm_params, 1);
+	if (ret)
+		return ret;
+
+	ret = skl_compute_wm_levels(crtc_state, plane_state,
+				    ddb_blocks, &wm_params, wm->uv_wm);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int skl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
+			      struct intel_crtc_state *crtc_state,
+			      const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum plane_id plane_id = plane->id;
+	int ret;
+
+	if (!intel_wm_plane_visible(crtc_state, plane_state))
+		return 0;
+
+	ret = skl_build_plane_wm_single(crtc_state, plane_state,
+					plane_id, 0);
+	if (ret)
+		return ret;
+
+	if (fb->format->format == DRM_FORMAT_NV12) {
+		ret = skl_build_plane_wm_uv(crtc_state, plane_state,
+					    plane_id);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int icl_build_plane_wm(struct skl_pipe_wm *pipe_wm,
+			      struct intel_crtc_state *crtc_state,
+			      const struct intel_plane_state *plane_state)
+{
+	enum plane_id plane_id = to_intel_plane(plane_state->base.plane)->id;
+	int ret;
+
+	/* Watermarks calculated in master */
+	if (plane_state->slave)
+		return 0;
+
+	if (plane_state->linked_plane) {
+		const struct drm_framebuffer *fb = plane_state->base.fb;
+		enum plane_id y_plane_id = plane_state->linked_plane->id;
+
+		WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state));
+		WARN_ON(fb->format->format != DRM_FORMAT_NV12);
+
+		ret = skl_build_plane_wm_single(crtc_state, plane_state,
+						y_plane_id, 0);
+		if (ret)
+			return ret;
+
+		ret = skl_build_plane_wm_single(crtc_state, plane_state,
+						plane_id, 1);
+		if (ret)
+			return ret;
+	} else if (intel_wm_plane_visible(crtc_state, plane_state)) {
+		ret = skl_build_plane_wm_single(crtc_state, plane_state,
+						plane_id, 0);
+		if (ret)
+			return ret;
 	}
 
-exit:
-	trans_wm->plane_en = false;
+	return 0;
 }
 
 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
-			     struct skl_ddb_allocation *ddb,
 			     struct skl_pipe_wm *pipe_wm)
 {
-	struct drm_device *dev = cstate->base.crtc->dev;
+	struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
 	struct drm_crtc_state *crtc_state = &cstate->base;
-	const struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_plane *plane;
 	const struct drm_plane_state *pstate;
-	struct skl_plane_wm *wm;
 	int ret;
 
 	/*
@@ -4697,27 +5047,17 @@  static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
 	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
 		const struct intel_plane_state *intel_pstate =
 						to_intel_plane_state(pstate);
-		enum plane_id plane_id = to_intel_plane(plane)->id;
-		struct skl_wm_params wm_params;
-		enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
-		uint16_t ddb_blocks;
-
-		wm = &pipe_wm->planes[plane_id];
-		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
-		memset(&wm_params, 0, sizeof(struct skl_wm_params));
 
-		ret = skl_compute_plane_wm_params(dev_priv, cstate,
-						  intel_pstate, &wm_params);
-		if (ret)
-			return ret;
-
-		ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
-					    intel_pstate, &wm_params, wm);
+		if (INTEL_GEN(dev_priv) >= 11)
+			ret = icl_build_plane_wm(pipe_wm,
+						 cstate, intel_pstate);
+		else
+			ret = skl_build_plane_wm(pipe_wm,
+						 cstate, intel_pstate);
 		if (ret)
 			return ret;
-		skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
-					  ddb_blocks, &wm->trans_wm);
 	}
+
 	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
 
 	return 0;
@@ -4728,9 +5068,9 @@  static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
 				const struct skl_ddb_entry *entry)
 {
 	if (entry->end)
-		I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
+		I915_WRITE_FW(reg, (entry->end - 1) << 16 | entry->start);
 	else
-		I915_WRITE(reg, 0);
+		I915_WRITE_FW(reg, 0);
 }
 
 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
@@ -4745,19 +5085,22 @@  static void skl_write_wm_level(struct drm_i915_private *dev_priv,
 		val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
 	}
 
-	I915_WRITE(reg, val);
+	I915_WRITE_FW(reg, val);
 }
 
-static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
-			       const struct skl_plane_wm *wm,
-			       const struct skl_ddb_allocation *ddb,
-			       enum plane_id plane_id)
+void skl_write_plane_wm(struct intel_plane *plane,
+			const struct intel_crtc_state *crtc_state)
 {
-	struct drm_crtc *crtc = &intel_crtc->base;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	int level, max_level = ilk_wm_max_level(dev_priv);
-	enum pipe pipe = intel_crtc->pipe;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	const struct skl_plane_wm *wm =
+		&crtc_state->wm.skl.optimal.planes[plane_id];
+	const struct skl_ddb_entry *ddb_y =
+		&crtc_state->wm.skl.plane_ddb_y[plane_id];
+	const struct skl_ddb_entry *ddb_uv =
+		&crtc_state->wm.skl.plane_ddb_uv[plane_id];
 
 	for (level = 0; level <= max_level; level++) {
 		skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
@@ -4766,21 +5109,32 @@  static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
 	skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
 			   &wm->trans_wm);
 
-	skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
-			    &ddb->plane[pipe][plane_id]);
-	skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id),
-			    &ddb->y_plane[pipe][plane_id]);
+	if (INTEL_GEN(dev_priv) >= 11) {
+		skl_ddb_entry_write(dev_priv,
+				    PLANE_BUF_CFG(pipe, plane_id), ddb_y);
+		return;
+	}
+
+	if (wm->is_planar)
+		swap(ddb_y, ddb_uv);
+
+	skl_ddb_entry_write(dev_priv,
+			    PLANE_BUF_CFG(pipe, plane_id), ddb_y);
+	skl_ddb_entry_write(dev_priv,
+			    PLANE_NV12_BUF_CFG(pipe, plane_id), ddb_uv);
 }
 
-static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
-				const struct skl_plane_wm *wm,
-				const struct skl_ddb_allocation *ddb)
+void skl_write_cursor_wm(struct intel_plane *plane,
+			 const struct intel_crtc_state *crtc_state)
 {
-	struct drm_crtc *crtc = &intel_crtc->base;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	int level, max_level = ilk_wm_max_level(dev_priv);
-	enum pipe pipe = intel_crtc->pipe;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	const struct skl_plane_wm *wm =
+		&crtc_state->wm.skl.optimal.planes[plane_id];
+	const struct skl_ddb_entry *ddb =
+		&crtc_state->wm.skl.plane_ddb_y[plane_id];
 
 	for (level = 0; level <= max_level; level++) {
 		skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
@@ -4788,22 +5142,30 @@  static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
 	}
 	skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
 
-	skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
-			    &ddb->plane[pipe][PLANE_CURSOR]);
+	skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), ddb);
 }
 
 bool skl_wm_level_equals(const struct skl_wm_level *l1,
 			 const struct skl_wm_level *l2)
 {
-	if (l1->plane_en != l2->plane_en)
-		return false;
+	return l1->plane_en == l2->plane_en &&
+		l1->plane_res_l == l2->plane_res_l &&
+		l1->plane_res_b == l2->plane_res_b;
+}
 
-	/* If both planes aren't enabled, the rest shouldn't matter */
-	if (!l1->plane_en)
-		return true;
+static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,
+				const struct skl_plane_wm *wm1,
+				const struct skl_plane_wm *wm2)
+{
+	int level, max_level = ilk_wm_max_level(dev_priv);
 
-	return (l1->plane_res_l == l2->plane_res_l &&
-		l1->plane_res_b == l2->plane_res_b);
+	for (level = 0; level <= max_level; level++) {
+		if (!skl_wm_level_equals(&wm1->wm[level], &wm2->wm[level]) ||
+		    !skl_wm_level_equals(&wm1->uv_wm[level], &wm2->uv_wm[level]))
+			return false;
+	}
+
+	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);
 }
 
 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
@@ -4812,16 +5174,15 @@  static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
 	return a->start < b->end && b->start < a->end;
 }
 
-bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
-				 const struct skl_ddb_entry **entries,
-				 const struct skl_ddb_entry *ddb,
-				 int ignore)
+bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb,
+				 const struct skl_ddb_entry entries[],
+				 int num_entries, int ignore_idx)
 {
-	enum pipe pipe;
+	int i;
 
-	for_each_pipe(dev_priv, pipe) {
-		if (pipe != ignore && entries[pipe] &&
-		    skl_ddb_entries_overlap(ddb, entries[pipe]))
+	for (i = 0; i < num_entries; i++) {
+		if (i != ignore_idx &&
+		    skl_ddb_entries_overlap(ddb, &entries[i]))
 			return true;
 	}
 
@@ -4831,13 +5192,12 @@  bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
 			      const struct skl_pipe_wm *old_pipe_wm,
 			      struct skl_pipe_wm *pipe_wm, /* out */
-			      struct skl_ddb_allocation *ddb, /* out */
 			      bool *changed /* out */)
 {
 	struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
 	int ret;
 
-	ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
+	ret = skl_build_pipe_wm(intel_cstate, pipe_wm);
 	if (ret)
 		return ret;
 
@@ -4863,34 +5223,29 @@  pipes_modified(struct drm_atomic_state *state)
 }
 
 static int
-skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
+skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state,
+			    struct intel_crtc_state *new_crtc_state)
 {
-	struct drm_atomic_state *state = cstate->base.state;
-	struct drm_device *dev = state->dev;
-	struct drm_crtc *crtc = cstate->base.crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
-	struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
-	struct drm_plane_state *plane_state;
-	struct drm_plane *plane;
-	enum pipe pipe = intel_crtc->pipe;
-
-	WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
+	struct intel_atomic_state *state = to_intel_atomic_state(new_crtc_state->base.state);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_plane *plane;
 
-	drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
-		enum plane_id plane_id = to_intel_plane(plane)->id;
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		struct intel_plane_state *plane_state;
+		enum plane_id plane_id = plane->id;
 
-		if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
-					&new_ddb->plane[pipe][plane_id]) &&
-		    skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
-					&new_ddb->y_plane[pipe][plane_id]))
+		if (skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_y[plane_id],
+					&new_crtc_state->wm.skl.plane_ddb_y[plane_id]) &&
+		    skl_ddb_entry_equal(&old_crtc_state->wm.skl.plane_ddb_uv[plane_id],
+					&new_crtc_state->wm.skl.plane_ddb_uv[plane_id]))
 			continue;
 
-		plane_state = drm_atomic_get_plane_state(state, plane);
+		plane_state = intel_atomic_get_plane_state(state, plane);
 		if (IS_ERR(plane_state))
 			return PTR_ERR(plane_state);
+
+		new_crtc_state->update_planes |= BIT(plane_id);
 	}
 
 	return 0;
@@ -4899,13 +5254,93 @@  skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
 static int
 skl_compute_ddb(struct drm_atomic_state *state)
 {
-	struct drm_device *dev = state->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	const struct drm_i915_private *dev_priv = to_i915(state->dev);
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct intel_crtc *intel_crtc;
 	struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
+	struct intel_crtc_state *old_crtc_state;
+	struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int ret, i;
+
+	memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
+
+	for_each_oldnew_intel_crtc_in_state(intel_state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		ret = skl_allocate_pipe_ddb(new_crtc_state, ddb);
+		if (ret)
+			return ret;
+
+		ret = skl_ddb_add_affected_planes(old_crtc_state,
+						  new_crtc_state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+skl_print_wm_changes(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_crtc_state *old_crtc_state;
+	const struct intel_crtc_state *new_crtc_state;
+	struct intel_plane *plane;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+			enum plane_id plane_id = plane->id;
+			const struct skl_ddb_entry *old, *new;
+
+			old = &old_crtc_state->wm.skl.plane_ddb_y[plane_id];
+			new = &new_crtc_state->wm.skl.plane_ddb_y[plane_id];
+
+			if (skl_ddb_entry_equal(old, new))
+				continue;
+
+			DRM_DEBUG_KMS("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
+				      plane->base.base.id, plane->base.name,
+				      old->start, old->end,
+				      new->start, new->end);
+		}
+	}
+}
+
+static int
+skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed)
+{
+	struct drm_device *dev = state->dev;
+	const struct drm_i915_private *dev_priv = to_i915(dev);
+	const struct drm_crtc *crtc;
+	const struct drm_crtc_state *cstate;
+	struct intel_crtc *intel_crtc;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	uint32_t realloc_pipes = pipes_modified(state);
-	int ret;
+	int ret, i;
+
+	/*
+	 * When we distrust bios wm we always need to recompute to set the
+	 * expected DDB allocations for each CRTC.
+	 */
+	if (dev_priv->wm.distrust_bios_wm)
+		(*changed) = true;
+
+	/*
+	 * If this transaction isn't actually touching any CRTC's, don't
+	 * bother with watermark calculation.  Note that if we pass this
+	 * test, we're guaranteed to hold at least one CRTC state mutex,
+	 * which means we can safely use values like dev_priv->active_crtcs
+	 * since any racing commits that want to update them would need to
+	 * hold _all_ CRTC state mutexes.
+	 */
+	for_each_new_crtc_in_state(state, crtc, cstate, i)
+		(*changed) = true;
+
+	if (!*changed)
+		return 0;
 
 	/*
 	 * If this is our first atomic update following hardware readout,
@@ -4944,7 +5379,7 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	 * any other display updates race with this transaction, so we need
 	 * to grab the lock on *all* CRTC's.
 	 */
-	if (intel_state->active_pipe_changes) {
+	if (intel_state->active_pipe_changes || intel_state->modeset) {
 		realloc_pipes = ~0;
 		intel_state->wm_results.dirty_pipes = ~0;
 	}
@@ -4953,73 +5388,75 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	 * We're not recomputing for the pipes not included in the commit, so
 	 * make sure we start with the current state.
 	 */
-	memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
-
 	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
 		struct intel_crtc_state *cstate;
 
 		cstate = intel_atomic_get_crtc_state(state, intel_crtc);
 		if (IS_ERR(cstate))
 			return PTR_ERR(cstate);
-
-		ret = skl_allocate_pipe_ddb(cstate, ddb);
-		if (ret)
-			return ret;
-
-		ret = skl_ddb_add_affected_planes(cstate);
-		if (ret)
-			return ret;
 	}
 
 	return 0;
 }
 
-static void
-skl_copy_wm_for_pipe(struct skl_wm_values *dst,
-		     struct skl_wm_values *src,
-		     enum pipe pipe)
-{
-	memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
-	       sizeof(dst->ddb.y_plane[pipe]));
-	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
-	       sizeof(dst->ddb.plane[pipe]));
-}
-
-static void
-skl_print_wm_changes(const struct drm_atomic_state *state)
+/*
+ * To make sure the cursor watermark registers are always consistent
+ * with our computed state the following scenario needs special
+ * treatment:
+ *
+ * 1. enable cursor
+ * 2. move cursor entirely offscreen
+ * 3. disable cursor
+ *
+ * Step 2. does call .disable_plane() but does not zero the watermarks
+ * (since we consider an offscreen cursor still active for the purposes
+ * of watermarks). Step 3. would not normally call .disable_plane()
+ * because the actual plane visibility isn't changing, and we don't
+ * deallocate the cursor ddb until the pipe gets disabled. So we must
+ * force step 3. to call .disable_plane() to update the watermark
+ * registers properly.
+ *
+ * Other planes do not suffer from this issues as their watermarks are
+ * calculated based on the actual plane visibility. The only time this
+ * can trigger for the other planes is during the initial readout as the
+ * default value of the watermarks registers is not zero.
+ */
+static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
+				      struct intel_crtc *crtc)
 {
-	const struct drm_device *dev = state->dev;
-	const struct drm_i915_private *dev_priv = to_i915(dev);
-	const struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(state);
-	const struct drm_crtc *crtc;
-	const struct drm_crtc_state *cstate;
-	const struct intel_plane *intel_plane;
-	const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
-	const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
-	int i;
-
-	for_each_new_crtc_in_state(state, crtc, cstate, i) {
-		const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-		enum pipe pipe = intel_crtc->pipe;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	struct intel_plane *plane;
 
-		for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
-			enum plane_id plane_id = intel_plane->id;
-			const struct skl_ddb_entry *old, *new;
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		struct intel_plane_state *plane_state;
+		enum plane_id plane_id = plane->id;
 
-			old = &old_ddb->plane[pipe][plane_id];
-			new = &new_ddb->plane[pipe][plane_id];
+		/*
+		 * Force a full wm update for every plane on modeset.
+		 * Required because the reset value of the wm registers
+		 * is non-zero, whereas we want all disabled planes to
+		 * have zero watermarks. So if we turn off the relevant
+		 * power well the hardware state will go out of sync
+		 * with the software state.
+		 */
+		if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->base) &&
+		    skl_plane_wm_equals(dev_priv,
+					&old_crtc_state->wm.skl.optimal.planes[plane_id],
+					&new_crtc_state->wm.skl.optimal.planes[plane_id]))
+			continue;
 
-			if (skl_ddb_entry_equal(old, new))
-				continue;
+		plane_state = intel_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
 
-			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
-					 intel_plane->base.base.id,
-					 intel_plane->base.name,
-					 old->start, old->end,
-					 new->start, new->end);
-		}
+		new_crtc_state->update_planes |= BIT(plane_id);
 	}
+
+	return 0;
 }
 
 static int
@@ -5028,36 +5465,18 @@  skl_compute_wm(struct drm_atomic_state *state)
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *cstate;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct skl_wm_values *results = &intel_state->wm_results;
-	struct drm_device *dev = state->dev;
+	struct skl_ddb_values *results = &intel_state->wm_results;
 	struct skl_pipe_wm *pipe_wm;
 	bool changed = false;
 	int ret, i;
 
-	/*
-	 * When we distrust bios wm we always need to recompute to set the
-	 * expected DDB allocations for each CRTC.
-	 */
-	if (to_i915(dev)->wm.distrust_bios_wm)
-		changed = true;
-
-	/*
-	 * If this transaction isn't actually touching any CRTC's, don't
-	 * bother with watermark calculation.  Note that if we pass this
-	 * test, we're guaranteed to hold at least one CRTC state mutex,
-	 * which means we can safely use values like dev_priv->active_crtcs
-	 * since any racing commits that want to update them would need to
-	 * hold _all_ CRTC state mutexes.
-	 */
-	for_each_new_crtc_in_state(state, crtc, cstate, i)
-		changed = true;
-
-	if (!changed)
-		return 0;
-
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	ret = skl_ddb_add_affected_pipes(state, &changed);
+	if (ret || !changed)
+		return ret;
+
 	ret = skl_compute_ddb(state);
 	if (ret)
 		return ret;
@@ -5079,8 +5498,12 @@  skl_compute_wm(struct drm_atomic_state *state)
 			&to_intel_crtc_state(crtc->state)->wm.skl.optimal;
 
 		pipe_wm = &intel_cstate->wm.skl.optimal;
-		ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
-					 &results->ddb, &changed);
+		ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, &changed);
+		if (ret)
+			return ret;
+
+		ret = skl_wm_add_affected_planes(intel_state,
+						 to_intel_crtc(crtc));
 		if (ret)
 			return ret;
 
@@ -5094,7 +5517,7 @@  skl_compute_wm(struct drm_atomic_state *state)
 		intel_cstate->update_wm_pre = true;
 	}
 
-	skl_print_wm_changes(state);
+	skl_print_wm_changes(intel_state);
 
 	return 0;
 }
@@ -5105,23 +5528,12 @@  static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
 	struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
-	const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
 	enum pipe pipe = crtc->pipe;
-	enum plane_id plane_id;
 
 	if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
 		return;
 
 	I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
-
-	for_each_plane_id_on_crtc(crtc, plane_id) {
-		if (plane_id != PLANE_CURSOR)
-			skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
-					   ddb, plane_id);
-		else
-			skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
-					    ddb);
-	}
 }
 
 static void skl_initial_wm(struct intel_atomic_state *state,
@@ -5130,9 +5542,7 @@  static void skl_initial_wm(struct intel_atomic_state *state,
 	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct skl_wm_values *results = &state->wm_results;
-	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
-	enum pipe pipe = intel_crtc->pipe;
+	struct skl_ddb_values *results = &state->wm_results;
 
 	if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
 		return;
@@ -5142,8 +5552,6 @@  static void skl_initial_wm(struct intel_atomic_state *state,
 	if (cstate->base.active_changed)
 		skl_atomic_update_crtc_wm(state, cstate);
 
-	skl_copy_wm_for_pipe(hw_vals, results, pipe);
-
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
 
@@ -5274,7 +5682,7 @@  void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
 void skl_wm_get_hw_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
+	struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
 	struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
 	struct drm_crtc *crtc;
 	struct intel_crtc *intel_crtc;
@@ -5294,9 +5702,6 @@  void skl_wm_get_hw_state(struct drm_device *dev)
 	if (dev_priv->active_crtcs) {
 		/* Fully recompute DDB on first atomic commit */
 		dev_priv->wm.distrust_bios_wm = true;
-	} else {
-		/* Easy/common case; just sanitize DDB now if everything off */
-		memset(ddb, 0, sizeof(*ddb));
 	}
 }
 
@@ -5839,11 +6244,17 @@  void intel_enable_ipc(struct drm_i915_private *dev_priv)
 {
 	u32 val;
 
+	if (!HAS_IPC(dev_priv))
+		return;
+
 	/* Display WA #0477 WaDisableIPC: skl */
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv))
+		dev_priv->ipc_enabled = false;
+
+	/* Display WA #1141: SKL:all KBL:all CFL */
+	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
+	    !dev_priv->dram_info.symmetric_memory)
 		dev_priv->ipc_enabled = false;
-		return;
-	}
 
 	val = I915_READ(DISP_ARB_CTL2);
 
@@ -5857,7 +6268,6 @@  void intel_enable_ipc(struct drm_i915_private *dev_priv)
 
 void intel_init_ipc(struct drm_i915_private *dev_priv)
 {
-	dev_priv->ipc_enabled = false;
 	if (!HAS_IPC(dev_priv))
 		return;
 
@@ -6892,7 +7302,7 @@  static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 			 * No floor required for ring frequency on SKL.
 			 */
 			ring_freq = gpu_freq;
-		} else if (INTEL_INFO(dev_priv)->gen >= 8) {
+		} else if (INTEL_GEN(dev_priv) >= 8) {
 			/* max(2 * GT, DDR). NB: GT is 50MHz units */
 			ring_freq = max(min_ring_freq, gpu_freq);
 		} else if (IS_HASWELL(dev_priv)) {
@@ -7505,7 +7915,7 @@  unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
 	unsigned long val;
 
-	if (INTEL_INFO(dev_priv)->gen != 5)
+	if (INTEL_GEN(dev_priv) != 5)
 		return 0;
 
 	spin_lock_irq(&mchdev_lock);
@@ -7589,7 +7999,7 @@  static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_INFO(dev_priv)->gen != 5)
+	if (INTEL_GEN(dev_priv) != 5)
 		return;
 
 	spin_lock_irq(&mchdev_lock);
@@ -7640,7 +8050,7 @@  unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
 	unsigned long val;
 
-	if (INTEL_INFO(dev_priv)->gen != 5)
+	if (INTEL_GEN(dev_priv) != 5)
 		return 0;
 
 	spin_lock_irq(&mchdev_lock);
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 55ea5eb3b7df..cd7f1f4a84b4 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -134,7 +134,7 @@  static void vlv_psr_enable_sink(struct intel_dp *intel_dp)
 static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv,
 				       enum port port)
 {
-	if (INTEL_INFO(dev_priv)->gen >= 9)
+	if (INTEL_GEN(dev_priv) >= 9)
 		return DP_AUX_CH_CTL(port);
 	else
 		return EDP_PSR_AUX_CTL;
@@ -143,7 +143,7 @@  static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv,
 static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv,
 					enum port port, int index)
 {
-	if (INTEL_INFO(dev_priv)->gen >= 9)
+	if (INTEL_GEN(dev_priv) >= 9)
 		return DP_AUX_CH_DATA(port, index);
 	else
 		return EDP_PSR_AUX_DATA(index);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8da1bde442dd..6da9fd74a463 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -700,7 +700,7 @@  static int init_render_ring(struct intel_engine_cs *engine)
 	if (IS_GEN(dev_priv, 6, 7))
 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-	if (INTEL_INFO(dev_priv)->gen >= 6)
+	if (INTEL_GEN(dev_priv) >= 6)
 		I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
 
 	return init_workarounds_ring(engine);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 7ddc400a2061..2471062f1a9a 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2572,26 +2572,47 @@  static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
 	mutex_unlock(&power_domains->lock);
 }
 
-static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
+static inline
+bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
+			  i915_reg_t reg, bool enable)
 {
-	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL);
+	u32 val, status;
 
+	val = I915_READ(reg);
+	val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST);
+	I915_WRITE(reg, val);
+	POSTING_READ(reg);
 	udelay(10);
 
-	if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power enable timeout\n");
+	status = I915_READ(reg) & DBUF_POWER_STATE;
+	if ((enable && !status) || (!enable && status)) {
+		DRM_ERROR("DBus power %s timeout!\n",
+			  enable ? "enable" : "disable");
+		return false;
+	}
+	return true;
+}
+
+static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
+{
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, true);
 }
 
 static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL);
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
+}
 
-	udelay(10);
+static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 11)
+		return 1;
+	return 2;
+}
 
-	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
-		DRM_ERROR("DBuf power disable timeout!\n");
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices)
+{
 }
 
 static void skl_display_core_init(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index e49d4f272626..07429974fa9d 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -303,6 +303,8 @@  skl_update_plane(struct intel_plane *plane,
 		I915_WRITE_FW(PLANE_POS(pipe, plane_id), (crtc_y << 16) | crtc_x);
 	}
 
+	skl_write_plane_wm(plane, crtc_state);
+
 	I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl);
 	I915_WRITE_FW(PLANE_SURF(pipe, plane_id),
 		      intel_plane_ggtt_offset(plane_state) + surf_addr);
@@ -312,7 +314,8 @@  skl_update_plane(struct intel_plane *plane,
 }
 
 void
-skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
+skl_disable_plane(struct intel_plane *plane,
+		  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum plane_id plane_id = plane->id;
@@ -321,6 +324,8 @@  skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
+	skl_write_plane_wm(plane, crtc_state);
+
 	I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0);
 
 	I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0);
@@ -554,7 +559,8 @@  vlv_update_plane(struct intel_plane *plane,
 }
 
 static void
-vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
+vlv_disable_plane(struct intel_plane *plane,
+		  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
@@ -712,7 +718,8 @@  ivb_update_plane(struct intel_plane *plane,
 }
 
 static void
-ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
+ivb_disable_plane(struct intel_plane *plane,
+		  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
@@ -863,7 +870,8 @@  g4x_update_plane(struct intel_plane *plane,
 }
 
 static void
-g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
+g4x_disable_plane(struct intel_plane *plane,
+		  const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 8c2ce81f01c2..b372c774894b 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1740,9 +1740,9 @@  static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 	if (!i915_modparams.reset)
 		return NULL;
 
-	if (INTEL_INFO(dev_priv)->gen >= 8)
+	if (INTEL_GEN(dev_priv) >= 8)
 		return gen8_reset_engines;
-	else if (INTEL_INFO(dev_priv)->gen >= 6)
+	else if (INTEL_GEN(dev_priv) >= 6)
 		return gen6_reset_engines;
 	else if (IS_GEN5(dev_priv))
 		return ironlake_do_reset;
@@ -1750,7 +1750,7 @@  static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 		return g4x_do_reset;
 	else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
 		return g33_do_reset;
-	else if (INTEL_INFO(dev_priv)->gen >= 3)
+	else if (INTEL_GEN(dev_priv) >= 3)
 		return i915_do_reset;
 	else
 		return NULL;