diff mbox

[v5,2/4] Documentation: arm64/arm: dt bindings for numa.

Message ID 1439570374-4079-3-git-send-email-gkulkarni@caviumnetworks.com
State Not Applicable, archived
Headers show

Commit Message

Ganapatrao Kulkarni Aug. 14, 2015, 4:39 p.m. UTC
DT bindings for numa map for memory, cores and IOs using
arm,associativity device node property.

Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
---
 Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/numa.txt

Comments

Robert Richter Aug. 22, 2015, 3:06 p.m. UTC | #1
On 14.08.15 22:09:32, Ganapatrao Kulkarni wrote:
> DT bindings for numa map for memory, cores and IOs using
> arm,associativity device node property.

Arnd, Rob,

as the change below suggests the same topology syntax as already
implemented for PPC, could you take a look at this one for arm64?
Please ack the devicetree changes, assuming you are fine with it.

All other review comments are addressed so far and there are no open
issues with the patches. This would help us to further drive this
series upstream.

Many thanks,

-Robert


> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
> ---
>  Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
>  1 file changed, 212 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
> 
> diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
> new file mode 100644
> index 0000000..dc3ef86
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/numa.txt
> @@ -0,0 +1,212 @@
> +==============================================================================
> +NUMA binding description.
> +==============================================================================
> +
> +==============================================================================
> +1 - Introduction
> +==============================================================================
> +
> +Systems employing a Non Uniform Memory Access (NUMA) architecture contain
> +collections of hardware resources including processors, memory, and I/O buses,
> +that comprise what is commonly known as a NUMA node.
> +Processor accesses to memory within the local NUMA node is generally faster
> +than processor accesses to memory outside of the local NUMA node.
> +DT defines interfaces that allow the platform to convey NUMA node
> +topology information to OS.
> +
> +==============================================================================
> +2 - arm,associativity
> +==============================================================================
> +The mapping is done using arm,associativity device property.
> +this property needs to be present in every device node which needs to to be
> +mapped to numa nodes.
> +
> +arm,associativity property is set of 32-bit integers which defines level of
> +topology and boundary in the system at which a significant difference in
> +performance can be measured between cross-device accesses within
> +a single location and those spanning multiple locations.
> +The first cell always contains the broadest subdivision within the system,
> +while the last cell enumerates the individual devices, such as an SMT thread
> +of a CPU, or a bus bridge within an SoC".
> +
> +ex:
> +	/* board 0, socket 0, cluster 0, core 0  thread 0 */
> +	arm,associativity = <0 0 0 0 0>;
> +
> +==============================================================================
> +3 - arm,associativity-reference-points
> +==============================================================================
> +This property is a set of 32-bit integers, each representing an index into
> +the arm,associativity nodes. The first integer is the most significant
> +NUMA boundary and the following are progressively less significant boundaries.
> +There can be more than one level of NUMA.
> +
> +Ex:
> +	arm,associativity-reference-points = <0 1>;
> +	The board Id(index 0) used first to calculate the associativity (node
> +	distance), then follows the  socket id(index 1).
> +
> +	arm,associativity-reference-points = <1 0>;
> +	The socket Id(index 1) used first to calculate the associativity,
> +	then follows the board id(index 0).
> +
> +	arm,associativity-reference-points = <0>;
> +	Only the board Id(index 0) used to calculate the associativity.
> +
> +	arm,associativity-reference-points = <1>;
> +	Only socket Id(index 1) used to calculate the associativity.
> +
> +==============================================================================
> +4 - Example dts
> +==============================================================================
> +
> +Example: 2 Node system consists of 2 boards and each board having one socket
> +and 8 core in each socket.
> +
> +	arm,associativity-reference-points = <0>;
> +
> +	memory@00c00000 {
> +		device_type = "memory";
> +		reg = <0x0 0x00c00000 0x0 0x80000000>;
> +		/* board 0, socket 0, no specific core */
> +		arm,associativity = <0 0 0xffff>;
> +	};
> +
> +	memory@10000000000 {
> +		device_type = "memory";
> +		reg = <0x100 0x00000000 0x0 0x80000000>;
> +		/* board 1, socket 0, no specific core */
> +		arm,associativity = <1 0 0xffff>;
> +	};
> +
> +	cpus {
> +		#address-cells = <2>;
> +		#size-cells = <0>;
> +
> +		cpu@000 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x000>;
> +			enable-method = "psci";
> +			/* board 0, socket 0, core 0*/
> +			arm,associativity = <0 0 0>;
> +		};
> +		cpu@001 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x001>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 1>;
> +		};
> +		cpu@002 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x002>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 2>;
> +		};
> +		cpu@003 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x003>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 3>;
> +		};
> +		cpu@004 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x004>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 4>;
> +		};
> +		cpu@005 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x005>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 5>;
> +		};
> +		cpu@006 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x006>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 6>;
> +		};
> +		cpu@007 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x007>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 7>;
> +		};
> +		cpu@008 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x008>;
> +			enable-method = "psci";
> +			/* board 1, socket 0, core 0*/
> +			arm,associativity = <1 0 0>;
> +		};
> +		cpu@009 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x009>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 1>;
> +		};
> +		cpu@00a {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00a>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 2>;
> +		};
> +		cpu@00b {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00b>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 3>;
> +		};
> +		cpu@00c {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00c>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 4>;
> +		};
> +		cpu@00d {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00d>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 5>;
> +		};
> +		cpu@00e {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00e>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 6>;
> +		};
> +		cpu@00f {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00f>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 7>;
> +		};
> +	};
> +
> +	pcie0: pcie0@0x8480,00000000 {
> +		compatible = "arm,armv8";
> +		device_type = "pci";
> +		bus-range = <0 255>;
> +		#size-cells = <2>;
> +		#address-cells = <3>;
> +		reg = <0x8480 0x00000000 0 0x10000000>;  /* Configuration space */
> +		ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>; /* mem ranges */
> +		/* board 0, socket 0, pci bus 0*/
> +		arm,associativity = <0 0 0>;
> +        };
> -- 
> 1.8.1.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rob Herring Aug. 23, 2015, 9:49 p.m. UTC | #2
On Fri, Aug 14, 2015 at 11:39 AM, Ganapatrao Kulkarni
<gkulkarni@caviumnetworks.com> wrote:
> DT bindings for numa map for memory, cores and IOs using
> arm,associativity device node property.
>
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>

Given this matches PPC, looks fine to me.

Acked-by: Rob Herring <robh@kernel.org>

> ---
>  Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
>  1 file changed, 212 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
>
> diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
> new file mode 100644
> index 0000000..dc3ef86
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/numa.txt
> @@ -0,0 +1,212 @@
> +==============================================================================
> +NUMA binding description.
> +==============================================================================
> +
> +==============================================================================
> +1 - Introduction
> +==============================================================================
> +
> +Systems employing a Non Uniform Memory Access (NUMA) architecture contain
> +collections of hardware resources including processors, memory, and I/O buses,
> +that comprise what is commonly known as a NUMA node.
> +Processor accesses to memory within the local NUMA node is generally faster
> +than processor accesses to memory outside of the local NUMA node.
> +DT defines interfaces that allow the platform to convey NUMA node
> +topology information to OS.
> +
> +==============================================================================
> +2 - arm,associativity
> +==============================================================================
> +The mapping is done using arm,associativity device property.
> +this property needs to be present in every device node which needs to to be
> +mapped to numa nodes.
> +
> +arm,associativity property is set of 32-bit integers which defines level of
> +topology and boundary in the system at which a significant difference in
> +performance can be measured between cross-device accesses within
> +a single location and those spanning multiple locations.
> +The first cell always contains the broadest subdivision within the system,
> +while the last cell enumerates the individual devices, such as an SMT thread
> +of a CPU, or a bus bridge within an SoC".
> +
> +ex:
> +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
> +       arm,associativity = <0 0 0 0 0>;
> +
> +==============================================================================
> +3 - arm,associativity-reference-points
> +==============================================================================
> +This property is a set of 32-bit integers, each representing an index into
> +the arm,associativity nodes. The first integer is the most significant
> +NUMA boundary and the following are progressively less significant boundaries.
> +There can be more than one level of NUMA.
> +
> +Ex:
> +       arm,associativity-reference-points = <0 1>;
> +       The board Id(index 0) used first to calculate the associativity (node
> +       distance), then follows the  socket id(index 1).
> +
> +       arm,associativity-reference-points = <1 0>;
> +       The socket Id(index 1) used first to calculate the associativity,
> +       then follows the board id(index 0).
> +
> +       arm,associativity-reference-points = <0>;
> +       Only the board Id(index 0) used to calculate the associativity.
> +
> +       arm,associativity-reference-points = <1>;
> +       Only socket Id(index 1) used to calculate the associativity.
> +
> +==============================================================================
> +4 - Example dts
> +==============================================================================
> +
> +Example: 2 Node system consists of 2 boards and each board having one socket
> +and 8 core in each socket.
> +
> +       arm,associativity-reference-points = <0>;
> +
> +       memory@00c00000 {
> +               device_type = "memory";
> +               reg = <0x0 0x00c00000 0x0 0x80000000>;
> +               /* board 0, socket 0, no specific core */
> +               arm,associativity = <0 0 0xffff>;
> +       };
> +
> +       memory@10000000000 {
> +               device_type = "memory";
> +               reg = <0x100 0x00000000 0x0 0x80000000>;
> +               /* board 1, socket 0, no specific core */
> +               arm,associativity = <1 0 0xffff>;
> +       };
> +
> +       cpus {
> +               #address-cells = <2>;
> +               #size-cells = <0>;
> +
> +               cpu@000 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x000>;
> +                       enable-method = "psci";
> +                       /* board 0, socket 0, core 0*/
> +                       arm,associativity = <0 0 0>;
> +               };
> +               cpu@001 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x001>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 1>;
> +               };
> +               cpu@002 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x002>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 2>;
> +               };
> +               cpu@003 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x003>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 3>;
> +               };
> +               cpu@004 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x004>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 4>;
> +               };
> +               cpu@005 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x005>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 5>;
> +               };
> +               cpu@006 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x006>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 6>;
> +               };
> +               cpu@007 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x007>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 7>;
> +               };
> +               cpu@008 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x008>;
> +                       enable-method = "psci";
> +                       /* board 1, socket 0, core 0*/
> +                       arm,associativity = <1 0 0>;
> +               };
> +               cpu@009 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x009>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 1>;
> +               };
> +               cpu@00a {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00a>;
> +                       enable-method = "psci";
> +                       arm,associativity = <0 0 2>;
> +               };
> +               cpu@00b {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00b>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 3>;
> +               };
> +               cpu@00c {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00c>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 4>;
> +               };
> +               cpu@00d {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00d>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 5>;
> +               };
> +               cpu@00e {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00e>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 6>;
> +               };
> +               cpu@00f {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x00f>;
> +                       enable-method = "psci";
> +                       arm,associativity = <1 0 7>;
> +               };
> +       };
> +
> +       pcie0: pcie0@0x8480,00000000 {
> +               compatible = "arm,armv8";
> +               device_type = "pci";
> +               bus-range = <0 255>;
> +               #size-cells = <2>;
> +               #address-cells = <3>;
> +               reg = <0x8480 0x00000000 0 0x10000000>;  /* Configuration space */
> +               ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>; /* mem ranges */
> +               /* board 0, socket 0, pci bus 0*/
> +               arm,associativity = <0 0 0>;
> +        };
> --
> 1.8.1.4
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matthias Brugger Aug. 28, 2015, 11:32 a.m. UTC | #3
On 14/08/15 18:39, Ganapatrao Kulkarni wrote:
> DT bindings for numa map for memory, cores and IOs using
> arm,associativity device node property.
>
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
> ---
>   Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
>   1 file changed, 212 insertions(+)
>   create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
>
> diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
> new file mode 100644
> index 0000000..dc3ef86
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/numa.txt
> @@ -0,0 +1,212 @@
> +==============================================================================
> +NUMA binding description.
> +==============================================================================
> +
> +==============================================================================
> +1 - Introduction
> +==============================================================================
> +
> +Systems employing a Non Uniform Memory Access (NUMA) architecture contain
> +collections of hardware resources including processors, memory, and I/O buses,
> +that comprise what is commonly known as a NUMA node.
> +Processor accesses to memory within the local NUMA node is generally faster
> +than processor accesses to memory outside of the local NUMA node.
> +DT defines interfaces that allow the platform to convey NUMA node
> +topology information to OS.
> +
> +==============================================================================
> +2 - arm,associativity
> +==============================================================================
> +The mapping is done using arm,associativity device property.
> +this property needs to be present in every device node which needs to to be
> +mapped to numa nodes.
> +
> +arm,associativity property is set of 32-bit integers which defines level of
> +topology and boundary in the system at which a significant difference in
> +performance can be measured between cross-device accesses within
> +a single location and those spanning multiple locations.
> +The first cell always contains the broadest subdivision within the system,
> +while the last cell enumerates the individual devices, such as an SMT thread
> +of a CPU, or a bus bridge within an SoC".
> +
> +ex:
> +	/* board 0, socket 0, cluster 0, core 0  thread 0 */
> +	arm,associativity = <0 0 0 0 0>;
> +
> +==============================================================================
> +3 - arm,associativity-reference-points
> +==============================================================================
> +This property is a set of 32-bit integers, each representing an index into
> +the arm,associativity nodes. The first integer is the most significant
> +NUMA boundary and the following are progressively less significant boundaries.
> +There can be more than one level of NUMA.
> +
> +Ex:
> +	arm,associativity-reference-points = <0 1>;
> +	The board Id(index 0) used first to calculate the associativity (node
> +	distance), then follows the  socket id(index 1).
> +
> +	arm,associativity-reference-points = <1 0>;
> +	The socket Id(index 1) used first to calculate the associativity,
> +	then follows the board id(index 0).
> +
> +	arm,associativity-reference-points = <0>;
> +	Only the board Id(index 0) used to calculate the associativity.
> +
> +	arm,associativity-reference-points = <1>;
> +	Only socket Id(index 1) used to calculate the associativity.
> +
> +==============================================================================
> +4 - Example dts
> +==============================================================================
> +
> +Example: 2 Node system consists of 2 boards and each board having one socket
> +and 8 core in each socket.
> +
> +	arm,associativity-reference-points = <0>;
> +
> +	memory@00c00000 {
> +		device_type = "memory";
> +		reg = <0x0 0x00c00000 0x0 0x80000000>;
> +		/* board 0, socket 0, no specific core */
> +		arm,associativity = <0 0 0xffff>;
> +	};
> +
> +	memory@10000000000 {
> +		device_type = "memory";
> +		reg = <0x100 0x00000000 0x0 0x80000000>;
> +		/* board 1, socket 0, no specific core */
> +		arm,associativity = <1 0 0xffff>;
> +	};
> +
> +	cpus {
> +		#address-cells = <2>;
> +		#size-cells = <0>;
> +
> +		cpu@000 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x000>;
> +			enable-method = "psci";
> +			/* board 0, socket 0, core 0*/
> +			arm,associativity = <0 0 0>;
> +		};
> +		cpu@001 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x001>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 1>;
> +		};
> +		cpu@002 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x002>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 2>;
> +		};
> +		cpu@003 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x003>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 3>;
> +		};
> +		cpu@004 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x004>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 4>;
> +		};
> +		cpu@005 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x005>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 5>;
> +		};
> +		cpu@006 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x006>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 6>;
> +		};
> +		cpu@007 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x007>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 7>;
> +		};
> +		cpu@008 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x008>;
> +			enable-method = "psci";
> +			/* board 1, socket 0, core 0*/
> +			arm,associativity = <1 0 0>;
> +		};
> +		cpu@009 {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x009>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 1>;
> +		};
> +		cpu@00a {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00a>;
> +			enable-method = "psci";
> +			arm,associativity = <0 0 2>;

Nit: this should be
arm,associativity = <1 0 2>;

> +		};
> +		cpu@00b {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00b>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 3>;
> +		};
> +		cpu@00c {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00c>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 4>;
> +		};
> +		cpu@00d {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00d>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 5>;
> +		};
> +		cpu@00e {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00e>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 6>;
> +		};
> +		cpu@00f {
> +			device_type = "cpu";
> +			compatible =  "arm,armv8";
> +			reg = <0x0 0x00f>;
> +			enable-method = "psci";
> +			arm,associativity = <1 0 7>;
> +		};
> +	};
> +
> +	pcie0: pcie0@0x8480,00000000 {
> +		compatible = "arm,armv8";
> +		device_type = "pci";
> +		bus-range = <0 255>;
> +		#size-cells = <2>;
> +		#address-cells = <3>;
> +		reg = <0x8480 0x00000000 0 0x10000000>;  /* Configuration space */
> +		ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>; /* mem ranges */
> +		/* board 0, socket 0, pci bus 0*/
> +		arm,associativity = <0 0 0>;
> +        };
>

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Rutland Aug. 28, 2015, 12:32 p.m. UTC | #4
Hi,

On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
> DT bindings for numa map for memory, cores and IOs using
> arm,associativity device node property.

Given this is just a copy of ibm,associativity, I'm not sure I see much
point in renaming the properties.

However, (somewhat counter to that) I'm also concerned that this isn't
sufficient for systems we're beginning to see today (more on that
below), so I don't think a simple copy of ibm,associativity is good
enough.

> 
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
> ---
>  Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
>  1 file changed, 212 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
> 
> diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
> new file mode 100644
> index 0000000..dc3ef86
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/numa.txt
> @@ -0,0 +1,212 @@
> +==============================================================================
> +NUMA binding description.
> +==============================================================================
> +
> +==============================================================================
> +1 - Introduction
> +==============================================================================
> +
> +Systems employing a Non Uniform Memory Access (NUMA) architecture contain
> +collections of hardware resources including processors, memory, and I/O buses,
> +that comprise what is commonly known as a NUMA node.
> +Processor accesses to memory within the local NUMA node is generally faster
> +than processor accesses to memory outside of the local NUMA node.
> +DT defines interfaces that allow the platform to convey NUMA node
> +topology information to OS.
> +
> +==============================================================================
> +2 - arm,associativity
> +==============================================================================
> +The mapping is done using arm,associativity device property.
> +this property needs to be present in every device node which needs to to be
> +mapped to numa nodes.

Can't there be some inheritance? e.g. all devices on a bus with an
arm,associativity property being assumed to share that value?

> +
> +arm,associativity property is set of 32-bit integers which defines level of

s/set/list/ -- the order is important.

> +topology and boundary in the system at which a significant difference in
> +performance can be measured between cross-device accesses within
> +a single location and those spanning multiple locations.
> +The first cell always contains the broadest subdivision within the system,
> +while the last cell enumerates the individual devices, such as an SMT thread
> +of a CPU, or a bus bridge within an SoC".

While this gives us some hierarchy, this doesn't seem to encode relative
distances at all. That seems like an oversight.

Additionally, I'm somewhat unclear on how what you'd be expected to
provide for this property in cases like ring or mesh interconnects,
where there isn't a strict hierarchy (see systems with ARM's own CCN, or
Tilera's TILE-Mx), but there is some measure of closeness.

Must all of these have the same length? If so, why not have a
#(whatever)-cells property in the root to describe the expected length?
If not, how are they to be interpreted relative to each other?

> +
> +ex:

s/ex/Example:/, please. There's no need to contract that.

> +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
> +       arm,associativity = <0 0 0 0 0>;
> +
> +==============================================================================
> +3 - arm,associativity-reference-points
> +==============================================================================
> +This property is a set of 32-bit integers, each representing an index into

Likeise, s/set/list/

> +the arm,associativity nodes. The first integer is the most significant
> +NUMA boundary and the following are progressively less significant boundaries.
> +There can be more than one level of NUMA.

I'm not clear on why this is necessary; the arm,associativity property
is already ordered from most significant to least significant per its
description.

What does this property achieve?

The description also doesn't describe where this property is expected to
live. The example isn't sufficient to disambiguate that, especially as
it seems like a trivial case.

Is this only expected at the root of the tree? Can it be re-defined in
sub-nodes?

> +
> +Ex:

s/Ex/Example:/, please

> +       arm,associativity-reference-points = <0 1>;
> +       The board Id(index 0) used first to calculate the associativity (node
> +       distance), then follows the  socket id(index 1).
> +
> +       arm,associativity-reference-points = <1 0>;
> +       The socket Id(index 1) used first to calculate the associativity,
> +       then follows the board id(index 0).
> +
> +       arm,associativity-reference-points = <0>;
> +       Only the board Id(index 0) used to calculate the associativity.
> +
> +       arm,associativity-reference-points = <1>;
> +       Only socket Id(index 1) used to calculate the associativity.
> +
> +==============================================================================
> +4 - Example dts
> +==============================================================================
> +
> +Example: 2 Node system consists of 2 boards and each board having one socket
> +and 8 core in each socket.
> +
> +       arm,associativity-reference-points = <0>;
> +
> +       memory@00c00000 {
> +               device_type = "memory";
> +               reg = <0x0 0x00c00000 0x0 0x80000000>;
> +               /* board 0, socket 0, no specific core */
> +               arm,associativity = <0 0 0xffff>;
> +       };
> +
> +       memory@10000000000 {
> +               device_type = "memory";
> +               reg = <0x100 0x00000000 0x0 0x80000000>;
> +               /* board 1, socket 0, no specific core */
> +               arm,associativity = <1 0 0xffff>;
> +       };
> +
> +       cpus {
> +               #address-cells = <2>;
> +               #size-cells = <0>;
> +
> +               cpu@000 {
> +                       device_type = "cpu";
> +                       compatible =  "arm,armv8";
> +                       reg = <0x0 0x000>;
> +                       enable-method = "psci";
> +                       /* board 0, socket 0, core 0*/
> +                       arm,associativity = <0 0 0>;

We should specify w.r.t. memory and CPUs how the property is expected to
be used (e.g. in the CPU nodes rather than the cpu-map, with separate
memory nodes, etc). The generic description of arm,associativity isn't
sufficient to limit confusion there.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rob Herring Aug. 28, 2015, 2:02 p.m. UTC | #5
+benh

On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi,
>
> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>> DT bindings for numa map for memory, cores and IOs using
>> arm,associativity device node property.
>
> Given this is just a copy of ibm,associativity, I'm not sure I see much
> point in renaming the properties.

So just keep the ibm? I'm okay with that. That would help move to
common code. Alternatively, we could drop the vendor prefix and have
common code just check for both.

>
> However, (somewhat counter to that) I'm also concerned that this isn't
> sufficient for systems we're beginning to see today (more on that
> below), so I don't think a simple copy of ibm,associativity is good
> enough.
>
>>
>> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
>> ---
>>  Documentation/devicetree/bindings/arm/numa.txt | 212 +++++++++++++++++++++++++
>>  1 file changed, 212 insertions(+)
>>  create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
>>
>> diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
>> new file mode 100644
>> index 0000000..dc3ef86
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/arm/numa.txt
>> @@ -0,0 +1,212 @@
>> +==============================================================================
>> +NUMA binding description.
>> +==============================================================================
>> +
>> +==============================================================================
>> +1 - Introduction
>> +==============================================================================
>> +
>> +Systems employing a Non Uniform Memory Access (NUMA) architecture contain
>> +collections of hardware resources including processors, memory, and I/O buses,
>> +that comprise what is commonly known as a NUMA node.
>> +Processor accesses to memory within the local NUMA node is generally faster
>> +than processor accesses to memory outside of the local NUMA node.
>> +DT defines interfaces that allow the platform to convey NUMA node
>> +topology information to OS.
>> +
>> +==============================================================================
>> +2 - arm,associativity
>> +==============================================================================
>> +The mapping is done using arm,associativity device property.
>> +this property needs to be present in every device node which needs to to be
>> +mapped to numa nodes.
>
> Can't there be some inheritance? e.g. all devices on a bus with an
> arm,associativity property being assumed to share that value?

There is actually already based on kernel code. So the documentation
just needs to be explicit.

>
>> +
>> +arm,associativity property is set of 32-bit integers which defines level of
>
> s/set/list/ -- the order is important.
>
>> +topology and boundary in the system at which a significant difference in
>> +performance can be measured between cross-device accesses within
>> +a single location and those spanning multiple locations.
>> +The first cell always contains the broadest subdivision within the system,
>> +while the last cell enumerates the individual devices, such as an SMT thread
>> +of a CPU, or a bus bridge within an SoC".
>
> While this gives us some hierarchy, this doesn't seem to encode relative
> distances at all. That seems like an oversight.
>
> Additionally, I'm somewhat unclear on how what you'd be expected to
> provide for this property in cases like ring or mesh interconnects,
> where there isn't a strict hierarchy (see systems with ARM's own CCN, or
> Tilera's TILE-Mx), but there is some measure of closeness.
>
> Must all of these have the same length? If so, why not have a
> #(whatever)-cells property in the root to describe the expected length?
> If not, how are they to be interpreted relative to each other?

All points that could be asked of the IBM binding. Perhaps Arnd or Ben
can provide some insight or know who can?

Rob
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Aug. 28, 2015, 9:37 p.m. UTC | #6
On Fri, 2015-08-28 at 09:02 -0500, Rob Herring wrote:

> So just keep the ibm? I'm okay with that. That would help move to
> common code. Alternatively, we could drop the vendor prefix and have
> common code just check for both.

That wouldn't be the first time we go down that path and it makes sense
imho.

> All points that could be asked of the IBM binding. Perhaps Arnd or 
> Ben can provide some insight or know who can?

They are part of the PAPR specification which we've been trying to get
published for a while now but that hasn't happened yet. Beware that
there are variants of the format based on some other property. There's
also 
"ibm,associativity-reference-points" which is used to calculate
distances. I'll see if I can get you an excerpt of the PAPR chapter, or
reword it, in the next few days (please poke me if I drop the ball next
week).

Cheers,
Ben.

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leizhen (ThunderTown) Aug. 29, 2015, 9:46 a.m. UTC | #7
On 2015/8/28 22:02, Rob Herring wrote:
> +benh
> 
> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>> Hi,
>>
>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>> DT bindings for numa map for memory, cores and IOs using
>>> arm,associativity device node property.
>>
>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>> point in renaming the properties.
> 
> So just keep the ibm? I'm okay with that. That would help move to
> common code. Alternatively, we could drop the vendor prefix and have
> common code just check for both.
> 

Hi all,

Why not copy the method of ACPI numa? There only three elements should be configured:
1) a cpu belong to which node
2) a memory block belong to which node
3) the distance of each two nodes

The devicetree nodes of numa can be like below:
/ {
	...

	numa-nodes-info {
		node-name: node-description {
			mem-ranges = <...>;
			cpus-list = <...>;
		};

		nodes-distance {
			distance-list = <...>;
		};
	};

	...
};

Sorry, I don't think xxx,associativity is a good method, it's hard to config, and it
seems hardware-dependent. Especially, when we want to support memory hot-add, it's too hard.
Because xxx,associativity have no obvious information about it. Like powerpc, it use another
property: "/ibm,dynamic-reconfiguration-memory".

I spend almost a whole month to implement of_numa(configured by dt-nodes), base upon my opinion
mentioned above. If somebody are interested in it, I can send my patchset to show it.

Regards,
Thunder.

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Aug. 29, 2015, 10:37 a.m. UTC | #8
On Sat, 2015-08-29 at 17:46 +0800, Leizhen (ThunderTown) wrote:
> Why not copy the method of ACPI numa? There only three elements
> should be configured:
> 1) a cpu belong to which node
> 2) a memory block belong to which node
> 3) the distance of each two nodes

This means your are bolting into the DT representation the concept of
"Node" which isn't necessarily very meaningful.

Your system is really a hierarchy of objects. You can have cores on a
chip, already possibly sharing some level of cache or not, you can have
chips on a module, modules linked at various distances, etc...

What is "a node" ?

For example, I have a P8 chip with 2 chips on a module (fast X-bus) and
2 modules (slightly slower A-bus). All 4 chips have 2 memory
controllers each.

Is a "node" a chip or a module ?

The Linux concept of node is too restrictive. The associativity
properties avoid this by allowing you to define as many "levels" of
associativity as you wish. Also since it's right justified, a given
component doesn't need to have all levels (a MC can stop at chip while
cores can go down one more level for example).

The reference points property gives a hint as "interesting" levels can
typically be used as a hint for chosing what Linux will use as a "node"
at least until Linux gets smarter. It can also be used to calculate
distances.

Cheers,
Ben.

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Aug. 29, 2015, 2:56 p.m. UTC | #9
Hi Thunder,

On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
<thunder.leizhen@huawei.com> wrote:
>
>
> On 2015/8/28 22:02, Rob Herring wrote:
>> +benh
>>
>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>>> Hi,
>>>
>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>> DT bindings for numa map for memory, cores and IOs using
>>>> arm,associativity device node property.
>>>
>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>> point in renaming the properties.
>>
>> So just keep the ibm? I'm okay with that. That would help move to
>> common code. Alternatively, we could drop the vendor prefix and have
>> common code just check for both.
>>
>
> Hi all,
>
> Why not copy the method of ACPI numa? There only three elements should be configured:
> 1) a cpu belong to which node
> 2) a memory block belong to which node
> 3) the distance of each two nodes
>
> The devicetree nodes of numa can be like below:
> / {
>         ...
>
>         numa-nodes-info {
>                 node-name: node-description {
>                         mem-ranges = <...>;
>                         cpus-list = <...>;
>                 };
>
>                 nodes-distance {
>                         distance-list = <...>;
>                 };
>         };
>
>         ...
> };
>
some what similar to what your are proposing is already implemented in
my v2 patchset.
https://lwn.net/Articles/623920/
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html
we have went to associativity property based implementation to keep it
more generic.
i do have both acpi(using linaro/hanjun's patches) and associativity
based implementations on our internal tree
and tested on thunderx platform.
i do see issue in creating numa mapping using ACPI for IOs(for
example, i am not able to create numa mapping for ITS which is on each
node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
talks only about processor and memory.
however associativity is generic and you can apply on any dt node.
> Sorry, I don't think xxx,associativity is a good method, it's hard to config, and it
> seems hardware-dependent. Especially, when we want to support memory hot-add, it's too hard.
> Because xxx,associativity have no obvious information about it. Like powerpc, it use another
> property: "/ibm,dynamic-reconfiguration-memory".
>
> I spend almost a whole month to implement of_numa(configured by dt-nodes), base upon my opinion
> mentioned above. If somebody are interested in it, I can send my patchset to show it.
>
> Regards,
> Thunder.
>
thanks
ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leizhen (ThunderTown) Aug. 31, 2015, 1:46 a.m. UTC | #10
On 2015/8/29 18:37, Benjamin Herrenschmidt wrote:
> On Sat, 2015-08-29 at 17:46 +0800, Leizhen (ThunderTown) wrote:
>> Why not copy the method of ACPI numa? There only three elements
>> should be configured:
>> 1) a cpu belong to which node
>> 2) a memory block belong to which node
>> 3) the distance of each two nodes

Sorry, I forgot to write something:
4) a device(maybe a bus device) belongs to which node

For example:
device-name {
        ...
        numa-node = <&node0>;
};

To simplify the discussion, I will not mention device again. Treat both
cpus and devices as masters, memorys as slaves.

A bus is not a master, we allow binding numa node to a bus, because we may
want all devices on the bus to inherit its numa node-id without obvious configured one by one.

> 
> This means your are bolting into the DT representation the concept of
> "Node" which isn't necessarily very meaningful.
> 
> Your system is really a hierarchy of objects. You can have cores on a
> chip, already possibly sharing some level of cache or not, you can have
> chips on a module, modules linked at various distances, etc...
> 
> What is "a node" ?
> 
> For example, I have a P8 chip with 2 chips on a module (fast X-bus) and
> 2 modules (slightly slower A-bus). All 4 chips have 2 memory
> controllers each.
> 
> Is a "node" a chip or a module ?

A numa node is a abstract concept, it needn't related to a real hardware level.
A numa node normally contains both cpus and mems, but may only contains cpus or mems,
or maybe nothing(quite rare). We put cpus or mems into a node, because we want to use
node-distance to implement the nearest memory access, the nearest process schedule.

In your example:
On fast X-bus, have a module contains 2 chips.
On slightly slower A-bus, have 2 modules(treat them as 2 chips).
Each chip contains 2 memory controllers.

Suppose each chip access its local bus memory faster than another.

Case1:
Each chip access its 2 local memory controllers faster than others. Then we can define numa nodes:
node-xbus-0: a chip and 2 local memory.
node-xbus-1: a chip and 2 local memory.
node-abus-0: a chip(module) and 2 local memory.
node-abus-1: a chip(module) and 2 local memory.

Case2:
Each chip access any memory controllers on its local bus are the same. Then we can define numa nodes:
node-xbus: 2 chips and 4 local memory.
node-abus: 2 chips(modules) and 4 local memory.


> 
> The Linux concept of node is too restrictive. The associativity
> properties avoid this by allowing you to define as many "levels" of
> associativity as you wish. Also since it's right justified, a given
> component doesn't need to have all levels (a MC can stop at chip while
> cores can go down one more level for example).
> 
> The reference points property gives a hint as "interesting" levels can
> typically be used as a hint for chosing what Linux will use as a "node"
> at least until Linux gets smarter. It can also be used to calculate
> distances.
> 
> Cheers,
> Ben.
> 
> 
> .
> 

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leizhen (ThunderTown) Aug. 31, 2015, 2:53 a.m. UTC | #11
On 2015/8/29 22:56, Ganapatrao Kulkarni wrote:
> Hi Thunder,
> 
> On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
> <thunder.leizhen@huawei.com> wrote:
>>
>>
>> On 2015/8/28 22:02, Rob Herring wrote:
>>> +benh
>>>
>>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>>>> Hi,
>>>>
>>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>>> DT bindings for numa map for memory, cores and IOs using
>>>>> arm,associativity device node property.
>>>>
>>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>>> point in renaming the properties.
>>>
>>> So just keep the ibm? I'm okay with that. That would help move to
>>> common code. Alternatively, we could drop the vendor prefix and have
>>> common code just check for both.
>>>
>>
>> Hi all,
>>
>> Why not copy the method of ACPI numa? There only three elements should be configured:
>> 1) a cpu belong to which node
>> 2) a memory block belong to which node
>> 3) the distance of each two nodes
>>
>> The devicetree nodes of numa can be like below:
>> / {
>>         ...
>>
>>         numa-nodes-info {
>>                 node-name: node-description {
>>                         mem-ranges = <...>;
>>                         cpus-list = <...>;
>>                 };
>>
>>                 nodes-distance {
>>                         distance-list = <...>;
>>                 };
>>         };
>>
>>         ...
>> };
>>
> some what similar to what your are proposing is already implemented in
> my v2 patchset.
> https://lwn.net/Articles/623920/
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html

Sorry, I have not read your old version patchsets before.

The basic ideas are consistent, but details are different. I think your v2 patchset may meet some problem:

-------------------------
+- cpu-map:	This property defines the association of range of processors
+		(range of cpu ids) and the proximity domain to which
+		the processor belongs.

+		cpu-map = <0 7 0>,
+			  <8 15 1>;
-------------------------

1.
I am not sure the cpu ids is logical cpu-ids in Linux or the sequence number of the CPU dt-nodes in dts.
The former case: logical cpu-id is allocated by Linux, we can not ensure that cpu0 is the first CPU dt-node.
The latter case: depend on Linux strictly parse CPU dt-nodes follow the sequence in dts.

2. You should put most codes into /drivers/of/, because it can be shared with other ARCHs which base upon devicetree.

Here is my detailed example:
Examples:
/ {
	#address-cells = <2>;
	#size-cells = <2>;

	memory@0 {
		device_type = "memory";
		reg = <0x0 0x00000000 0x0 0x40000000>,
		      <0x1 0x00000000 0x1 0x00000000>,
		      <0x2 0x00000000 0x0 0x40000000>,
		      <0x2 0x80000000 0x0 0x40000000>;
	};

	CPU0: cpu@10000 {
		device_type = "cpu";
		reg = <0x10000>;
		...
	};

	numa-nodes-info {
		node0: cluster0 {
			mem-ranges = <0x0 0x00000000 0x1 0x00000000>;
			cpus-list = <&CPU0 &CPU1>;
		};

		node1: cluster1 {
			mem-ranges = <0x1 0x00000000 0x1 0x00000000>;
			cpus-list = <&CPU2>;
		};

		node2: cluster2 {
			mem-ranges = <0x2 0x00000000 0x1 0x00000000>;
			cpus-list = <&CPU3 &CPU4 &CPU5>;
		};

		nodes-distance {
			distance-list = <&node0 &node1 15>, <&node1 &node2 18>;
		};
	};

> we have went to associativity property based implementation to keep it
> more generic.
> i do have both acpi(using linaro/hanjun's patches) and associativity
> based implementations on our internal tree
> and tested on thunderx platform.
> i do see issue in creating numa mapping using ACPI for IOs(for
> example, i am not able to create numa mapping for ITS which is on each
> node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
> talks only about processor and memory.
> however associativity is generic and you can apply on any dt node.
>> Sorry, I don't think xxx,associativity is a good method, it's hard to config, and it
>> seems hardware-dependent. Especially, when we want to support memory hot-add, it's too hard.
>> Because xxx,associativity have no obvious information about it. Like powerpc, it use another
>> property: "/ibm,dynamic-reconfiguration-memory".
>>
>> I spend almost a whole month to implement of_numa(configured by dt-nodes), base upon my opinion
>> mentioned above. If somebody are interested in it, I can send my patchset to show it.
>>
>> Regards,
>> Thunder.
>>
> thanks
> ganapat
> 
> .
> 

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 2, 2015, 5:11 p.m. UTC | #12
Hi Ben,

On Sat, Aug 29, 2015 at 3:07 AM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Fri, 2015-08-28 at 09:02 -0500, Rob Herring wrote:
>
>> So just keep the ibm? I'm okay with that. That would help move to
>> common code. Alternatively, we could drop the vendor prefix and have
>> common code just check for both.
>
> That wouldn't be the first time we go down that path and it makes sense
> imho.
>
>> All points that could be asked of the IBM binding. Perhaps Arnd or
>> Ben can provide some insight or know who can?
>
> They are part of the PAPR specification which we've been trying to get
> published for a while now but that hasn't happened yet. Beware that
> there are variants of the format based on some other property. There's
> also
> "ibm,associativity-reference-points" which is used to calculate
> distances. I'll see if I can get you an excerpt of the PAPR chapter, or
> reword it, in the next few days (please poke me if I drop the ball next
> week).
did you get a chance to write an excerpt of the PAPR chapter?
please share the details.
>
> Cheers,
> Ben.
>
thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hanjun Guo Sept. 8, 2015, 1:27 p.m. UTC | #13
Hi Ganapatrao,

On 08/29/2015 10:56 PM, Ganapatrao Kulkarni wrote:
> Hi Thunder,
>
> On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
> <thunder.leizhen@huawei.com> wrote:
>>
>>
>> On 2015/8/28 22:02, Rob Herring wrote:
>>> +benh
>>>
>>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com> wrote:
>>>> Hi,
>>>>
>>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>>> DT bindings for numa map for memory, cores and IOs using
>>>>> arm,associativity device node property.
>>>>
>>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>>> point in renaming the properties.
>>>
>>> So just keep the ibm? I'm okay with that. That would help move to
>>> common code. Alternatively, we could drop the vendor prefix and have
>>> common code just check for both.
>>>
>>
>> Hi all,
>>
>> Why not copy the method of ACPI numa? There only three elements should be configured:
>> 1) a cpu belong to which node
>> 2) a memory block belong to which node
>> 3) the distance of each two nodes
>>
>> The devicetree nodes of numa can be like below:
>> / {
>>          ...
>>
>>          numa-nodes-info {
>>                  node-name: node-description {
>>                          mem-ranges = <...>;
>>                          cpus-list = <...>;
>>                  };
>>
>>                  nodes-distance {
>>                          distance-list = <...>;
>>                  };
>>          };
>>
>>          ...
>> };
>>
> some what similar to what your are proposing is already implemented in
> my v2 patchset.
> https://lwn.net/Articles/623920/
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html
> we have went to associativity property based implementation to keep it
> more generic.
> i do have both acpi(using linaro/hanjun's patches) and associativity
> based implementations on our internal tree
> and tested on thunderx platform.

Great thanks!

> i do see issue in creating numa mapping using ACPI for IOs(for
> example, i am not able to create numa mapping for ITS which is on each
> node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
> talks only about processor and memory.

I'm not sure why the ITS needs to know the NUMA domain, for my
understanding, the interrupt will route to the correct NUMA domain
using setting the affinity, ITS will configured to route it to
the right GICR(cpu), so I think the ITS don't need to know which
NUMA node belonging to, correct me if I missed something.

Thanks
Hanjun
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 8, 2015, 4:27 p.m. UTC | #14
Hi Hanjun,

On Tue, Sep 8, 2015 at 6:57 PM, Hanjun Guo <hanjun.guo@linaro.org> wrote:
> Hi Ganapatrao,
>
>
> On 08/29/2015 10:56 PM, Ganapatrao Kulkarni wrote:
>>
>> Hi Thunder,
>>
>> On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
>> <thunder.leizhen@huawei.com> wrote:
>>>
>>>
>>>
>>> On 2015/8/28 22:02, Rob Herring wrote:
>>>>
>>>> +benh
>>>>
>>>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com>
>>>> wrote:
>>>>>
>>>>> Hi,
>>>>>
>>>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>>>>
>>>>>> DT bindings for numa map for memory, cores and IOs using
>>>>>> arm,associativity device node property.
>>>>>
>>>>>
>>>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>>>> point in renaming the properties.
>>>>
>>>>
>>>> So just keep the ibm? I'm okay with that. That would help move to
>>>> common code. Alternatively, we could drop the vendor prefix and have
>>>> common code just check for both.
>>>>
>>>
>>> Hi all,
>>>
>>> Why not copy the method of ACPI numa? There only three elements should be
>>> configured:
>>> 1) a cpu belong to which node
>>> 2) a memory block belong to which node
>>> 3) the distance of each two nodes
>>>
>>> The devicetree nodes of numa can be like below:
>>> / {
>>>          ...
>>>
>>>          numa-nodes-info {
>>>                  node-name: node-description {
>>>                          mem-ranges = <...>;
>>>                          cpus-list = <...>;
>>>                  };
>>>
>>>                  nodes-distance {
>>>                          distance-list = <...>;
>>>                  };
>>>          };
>>>
>>>          ...
>>> };
>>>
>> some what similar to what your are proposing is already implemented in
>> my v2 patchset.
>> https://lwn.net/Articles/623920/
>>
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html
>> we have went to associativity property based implementation to keep it
>> more generic.
>> i do have both acpi(using linaro/hanjun's patches) and associativity
>> based implementations on our internal tree
>> and tested on thunderx platform.
>
>
> Great thanks!
>
>> i do see issue in creating numa mapping using ACPI for IOs(for
>> example, i am not able to create numa mapping for ITS which is on each
>> node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
>> talks only about processor and memory.
>
>
> I'm not sure why the ITS needs to know the NUMA domain, for my
> understanding, the interrupt will route to the correct NUMA domain
> using setting the affinity, ITS will configured to route it to
> the right GICR(cpu), so I think the ITS don't need to know which
> NUMA node belonging to, correct me if I missed something.
IIUC, GICR/collection is per cpu and can be mapped to numa node using
cpu to node mapping.
However there are multiple its in multi-socket platform(at-least one
its per socket),
knowing its to numa node mapping will help in routing(optimal) the
interrupts to  any one of GICR/collections of that node
Hence, we need to find which its belongs to which socket/node using dt.
same applies to pci bus too.
>
> Thanks
> Hanjun

thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 11, 2015, 3:53 a.m. UTC | #15
Hi Thunder,


On Tue, Sep 8, 2015 at 9:57 PM, Ganapatrao Kulkarni
<gpkulkarni@gmail.com> wrote:
> Hi Hanjun,
>
> On Tue, Sep 8, 2015 at 6:57 PM, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>> Hi Ganapatrao,
>>
>>
>> On 08/29/2015 10:56 PM, Ganapatrao Kulkarni wrote:
>>>
>>> Hi Thunder,
>>>
>>> On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
>>> <thunder.leizhen@huawei.com> wrote:
>>>>
>>>>
>>>>
>>>> On 2015/8/28 22:02, Rob Herring wrote:
>>>>>
>>>>> +benh
>>>>>
>>>>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com>
>>>>> wrote:
>>>>>>
>>>>>> Hi,
>>>>>>
>>>>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>>>>>
>>>>>>> DT bindings for numa map for memory, cores and IOs using
>>>>>>> arm,associativity device node property.
>>>>>>
>>>>>>
>>>>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>>>>> point in renaming the properties.
>>>>>
>>>>>
>>>>> So just keep the ibm? I'm okay with that. That would help move to
>>>>> common code. Alternatively, we could drop the vendor prefix and have
>>>>> common code just check for both.
>>>>>
>>>>
>>>> Hi all,
>>>>
>>>> Why not copy the method of ACPI numa? There only three elements should be
>>>> configured:
>>>> 1) a cpu belong to which node
>>>> 2) a memory block belong to which node
>>>> 3) the distance of each two nodes
I too thought acpi only defines mapping for cpu and memory to numa
nodes and no specification to define for IOs.
however after going through the x86 implementation, i can see there is
provision for mapping IOs to numa node in acpi.
in x86 code, function pci_acpi_scan_root calls acpi_get_node to get
associated node for pci bus using _PXM object.
it imply there is entry in acpi tables to map pci bus for numa
node(proximity domain).
so in dt also, we should  have binding to define cpu, memory and IOs
to node mapping.
>>>>
>>>> The devicetree nodes of numa can be like below:
>>>> / {
>>>>          ...
>>>>
>>>>          numa-nodes-info {
>>>>                  node-name: node-description {
>>>>                          mem-ranges = <...>;
>>>>                          cpus-list = <...>;
>>>>                  };
>>>>
>>>>                  nodes-distance {
>>>>                          distance-list = <...>;
>>>>                  };
>>>>          };
>>>>
>>>>          ...
>>>> };
>>>>
>>> some what similar to what your are proposing is already implemented in
>>> my v2 patchset.
>>> https://lwn.net/Articles/623920/
>>>
>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html
>>> we have went to associativity property based implementation to keep it
>>> more generic.
>>> i do have both acpi(using linaro/hanjun's patches) and associativity
>>> based implementations on our internal tree
>>> and tested on thunderx platform.
>>
>>
>> Great thanks!
>>
>>> i do see issue in creating numa mapping using ACPI for IOs(for
>>> example, i am not able to create numa mapping for ITS which is on each
>>> node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
>>> talks only about processor and memory.
>>
>>
>> I'm not sure why the ITS needs to know the NUMA domain, for my
>> understanding, the interrupt will route to the correct NUMA domain
>> using setting the affinity, ITS will configured to route it to
>> the right GICR(cpu), so I think the ITS don't need to know which
>> NUMA node belonging to, correct me if I missed something.
> IIUC, GICR/collection is per cpu and can be mapped to numa node using
> cpu to node mapping.
> However there are multiple its in multi-socket platform(at-least one
> its per socket),
> knowing its to numa node mapping will help in routing(optimal) the
> interrupts to  any one of GICR/collections of that node
> Hence, we need to find which its belongs to which socket/node using dt.
> same applies to pci bus too.
>>
>> Thanks
>> Hanjun
>
> thanks
> Ganapat
thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leizhen (ThunderTown) Sept. 11, 2015, 6:43 a.m. UTC | #16
On 2015/9/11 11:53, Ganapatrao Kulkarni wrote:
> Hi Thunder,
> 
> 
> On Tue, Sep 8, 2015 at 9:57 PM, Ganapatrao Kulkarni
> <gpkulkarni@gmail.com> wrote:
>> Hi Hanjun,
>>
>> On Tue, Sep 8, 2015 at 6:57 PM, Hanjun Guo <hanjun.guo@linaro.org> wrote:
>>> Hi Ganapatrao,
>>>
>>>
>>> On 08/29/2015 10:56 PM, Ganapatrao Kulkarni wrote:
>>>>
>>>> Hi Thunder,
>>>>
>>>> On Sat, Aug 29, 2015 at 3:16 PM, Leizhen (ThunderTown)
>>>> <thunder.leizhen@huawei.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 2015/8/28 22:02, Rob Herring wrote:
>>>>>>
>>>>>> +benh
>>>>>>
>>>>>> On Fri, Aug 28, 2015 at 7:32 AM, Mark Rutland <mark.rutland@arm.com>
>>>>>> wrote:
>>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>>>>>>>>
>>>>>>>> DT bindings for numa map for memory, cores and IOs using
>>>>>>>> arm,associativity device node property.
>>>>>>>
>>>>>>>
>>>>>>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>>>>>>> point in renaming the properties.
>>>>>>
>>>>>>
>>>>>> So just keep the ibm? I'm okay with that. That would help move to
>>>>>> common code. Alternatively, we could drop the vendor prefix and have
>>>>>> common code just check for both.
>>>>>>
>>>>>
>>>>> Hi all,
>>>>>
>>>>> Why not copy the method of ACPI numa? There only three elements should be
>>>>> configured:
>>>>> 1) a cpu belong to which node
>>>>> 2) a memory block belong to which node
>>>>> 3) the distance of each two nodes
> I too thought acpi only defines mapping for cpu and memory to numa
> nodes and no specification to define for IOs.
> however after going through the x86 implementation, i can see there is
> provision for mapping IOs to numa node in acpi.
> in x86 code, function pci_acpi_scan_root calls acpi_get_node to get
> associated node for pci bus using _PXM object.
> it imply there is entry in acpi tables to map pci bus for numa
> node(proximity domain).
> so in dt also, we should  have binding to define cpu, memory and IOs
> to node mapping.

Yes, we should implement of_node_to_nid, to support device driver use dev_to_node.

I have added description about it, in the reply to Benjamin Herrenschmidt, at 2015/8/31 9:46.


>>>>>
>>>>> The devicetree nodes of numa can be like below:
>>>>> / {
>>>>>          ...
>>>>>
>>>>>          numa-nodes-info {
>>>>>                  node-name: node-description {
>>>>>                          mem-ranges = <...>;
>>>>>                          cpus-list = <...>;
>>>>>                  };
>>>>>
>>>>>                  nodes-distance {
>>>>>                          distance-list = <...>;
>>>>>                  };
>>>>>          };
>>>>>
>>>>>          ...
>>>>> };
>>>>>
>>>> some what similar to what your are proposing is already implemented in
>>>> my v2 patchset.
>>>> https://lwn.net/Articles/623920/
>>>>
>>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-November/305164.html
>>>> we have went to associativity property based implementation to keep it
>>>> more generic.
>>>> i do have both acpi(using linaro/hanjun's patches) and associativity
>>>> based implementations on our internal tree
>>>> and tested on thunderx platform.
>>>
>>>
>>> Great thanks!
>>>
>>>> i do see issue in creating numa mapping using ACPI for IOs(for
>>>> example, i am not able to create numa mapping for ITS which is on each
>>>> node, using ACPI tables),  since ACPI spec (tables SRAT and SLIT)
>>>> talks only about processor and memory.
>>>
>>>
>>> I'm not sure why the ITS needs to know the NUMA domain, for my
>>> understanding, the interrupt will route to the correct NUMA domain
>>> using setting the affinity, ITS will configured to route it to
>>> the right GICR(cpu), so I think the ITS don't need to know which
>>> NUMA node belonging to, correct me if I missed something.
>> IIUC, GICR/collection is per cpu and can be mapped to numa node using
>> cpu to node mapping.
>> However there are multiple its in multi-socket platform(at-least one
>> its per socket),
>> knowing its to numa node mapping will help in routing(optimal) the
>> interrupts to  any one of GICR/collections of that node
>> Hence, we need to find which its belongs to which socket/node using dt.
>> same applies to pci bus too.
>>>
>>> Thanks
>>> Hanjun
>>
>> thanks
>> Ganapat
> thanks
> Ganapat
> 
> .
> 

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 29, 2015, 8:38 a.m. UTC | #17
(sending again, by mistake it was set to html mode)

On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
<gpkulkarni@gmail.com> wrote:
> Hi Mark,
>
> I have tried to answer your comments, in the meantime we are waiting for Ben
> to share the details.
>
> On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>>
>> Hi,
>>
>> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>> > DT bindings for numa map for memory, cores and IOs using
>> > arm,associativity device node property.
>>
>> Given this is just a copy of ibm,associativity, I'm not sure I see much
>> point in renaming the properties.
>>
>> However, (somewhat counter to that) I'm also concerned that this isn't
>> sufficient for systems we're beginning to see today (more on that
>> below), so I don't think a simple copy of ibm,associativity is good
>> enough.
>
> it is just copy right now, however it can evolve when we come across more
> arm64 numa platforms
>>
>>
>> >
>> > Signed-off-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
>> > ---
>> >  Documentation/devicetree/bindings/arm/numa.txt | 212
>> > +++++++++++++++++++++++++
>> >  1 file changed, 212 insertions(+)
>> >  create mode 100644 Documentation/devicetree/bindings/arm/numa.txt
>> >
>> > diff --git a/Documentation/devicetree/bindings/arm/numa.txt
>> > b/Documentation/devicetree/bindings/arm/numa.txt
>> > new file mode 100644
>> > index 0000000..dc3ef86
>> > --- /dev/null
>> > +++ b/Documentation/devicetree/bindings/arm/numa.txt
>> > @@ -0,0 +1,212 @@
>> >
>> > +==============================================================================
>> > +NUMA binding description.
>> >
>> > +==============================================================================
>> > +
>> >
>> > +==============================================================================
>> > +1 - Introduction
>> >
>> > +==============================================================================
>> > +
>> > +Systems employing a Non Uniform Memory Access (NUMA) architecture
>> > contain
>> > +collections of hardware resources including processors, memory, and I/O
>> > buses,
>> > +that comprise what is commonly known as a NUMA node.
>> > +Processor accesses to memory within the local NUMA node is generally
>> > faster
>> > +than processor accesses to memory outside of the local NUMA node.
>> > +DT defines interfaces that allow the platform to convey NUMA node
>> > +topology information to OS.
>> > +
>> >
>> > +==============================================================================
>> > +2 - arm,associativity
>> >
>> > +==============================================================================
>> > +The mapping is done using arm,associativity device property.
>> > +this property needs to be present in every device node which needs to
>> > to be
>> > +mapped to numa nodes.
>>
>> Can't there be some inheritance? e.g. all devices on a bus with an
>> arm,associativity property being assumed to share that value?
>
> yes there is inheritance and respective bus drivers should take care of it,
> like pci driver does at present.
>>
>>
>> > +
>> > +arm,associativity property is set of 32-bit integers which defines
>> > level of
>>
>> s/set/list/ -- the order is important.
>
> ok
>>
>>
>> > +topology and boundary in the system at which a significant difference
>> > in
>> > +performance can be measured between cross-device accesses within
>> > +a single location and those spanning multiple locations.
>> > +The first cell always contains the broadest subdivision within the
>> > system,
>> > +while the last cell enumerates the individual devices, such as an SMT
>> > thread
>> > +of a CPU, or a bus bridge within an SoC".
>>
>> While this gives us some hierarchy, this doesn't seem to encode relative
>> distances at all. That seems like an oversight.
>
>
> distance is computed, will add the details to document.
> local nodes will have distance as 10(LOCAL_DISTANCE) and every level, the
> distance multiplies by 2.
> for example, for level 1 numa topology, distance from local node to remote
> node will be 20.
>
>>
>>
>> Additionally, I'm somewhat unclear on how what you'd be expected to
>> provide for this property in cases like ring or mesh interconnects,
>> where there isn't a strict hierarchy (see systems with ARM's own CCN, or
>> Tilera's TILE-Mx), but there is some measure of closeness.
>
>
> IIUC, as per ARMs CCN architecture, all core/clusters are at equal distance
> of DDR, i dont see any NUMA topology.
> however, if there are 2 SoC connected thorough the CCN, then it is very much
> similar to cavium topology.
>
>> Must all of these have the same length? If so, why not have a
>> #(whatever)-cells property in the root to describe the expected length?
>> If not, how are they to be interpreted relative to each other?
>
>
> yes, all are of default size.
> IMHO, there is no need to add cells property.
>>
>>
>> > +
>> > +ex:
>>
>> s/ex/Example:/, please. There's no need to contract that.
>>
>> > +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
>> > +       arm,associativity = <0 0 0 0 0>;
>> > +
>> >
>> > +==============================================================================
>> > +3 - arm,associativity-reference-points
>> >
>> > +==============================================================================
>> > +This property is a set of 32-bit integers, each representing an index
>> > into
>>
>> Likeise, s/set/list/
>
> ok
>>
>>
>> > +the arm,associativity nodes. The first integer is the most significant
>> > +NUMA boundary and the following are progressively less significant
>> > boundaries.
>> > +There can be more than one level of NUMA.
>>
>> I'm not clear on why this is necessary; the arm,associativity property
>> is already ordered from most significant to least significant per its
>> description.
>
>
> first entry in arm,associativity-reference-points is used to find which
> entry in associativity defines node id.
> also entries in arm,associativity-reference-points defines,
> how many entries(depth) in associativity can be used to calculate node
> distance
> in both level 1 and  multi level(hierarchical) numa topology.
>
>>
>>
>> What does this property achieve?
>>
>> The description also doesn't describe where this property is expected to
>> live. The example isn't sufficient to disambiguate that, especially as
>> it seems like a trivial case.
>
> sure, will add one more example to describe the
> arm,associativity-reference-points
>>
>>
>> Is this only expected at the root of the tree? Can it be re-defined in
>> sub-nodes?
>
> yes it is defined only at the root.
>>
>>
>> > +
>> > +Ex:
>>
>> s/Ex/Example:/, please
>
> sure.
>>
>>
>> > +       arm,associativity-reference-points = <0 1>;
>> > +       The board Id(index 0) used first to calculate the associativity
>> > (node
>> > +       distance), then follows the  socket id(index 1).
>> > +
>> > +       arm,associativity-reference-points = <1 0>;
>> > +       The socket Id(index 1) used first to calculate the
>> > associativity,
>> > +       then follows the board id(index 0).
>> > +
>> > +       arm,associativity-reference-points = <0>;
>> > +       Only the board Id(index 0) used to calculate the associativity.
>> > +
>> > +       arm,associativity-reference-points = <1>;
>> > +       Only socket Id(index 1) used to calculate the associativity.
>> > +
>> >
>> > +==============================================================================
>> > +4 - Example dts
>> >
>> > +==============================================================================
>> > +
>> > +Example: 2 Node system consists of 2 boards and each board having one
>> > socket
>> > +and 8 core in each socket.
>> > +
>> > +       arm,associativity-reference-points = <0>;
>> > +
>> > +       memory@00c00000 {
>> > +               device_type = "memory";
>> > +               reg = <0x0 0x00c00000 0x0 0x80000000>;
>> > +               /* board 0, socket 0, no specific core */
>> > +               arm,associativity = <0 0 0xffff>;
>> > +       };
>> > +
>> > +       memory@10000000000 {
>> > +               device_type = "memory";
>> > +               reg = <0x100 0x00000000 0x0 0x80000000>;
>> > +               /* board 1, socket 0, no specific core */
>> > +               arm,associativity = <1 0 0xffff>;
>> > +       };
>> > +
>> > +       cpus {
>> > +               #address-cells = <2>;
>> > +               #size-cells = <0>;
>> > +
>> > +               cpu@000 {
>> > +                       device_type = "cpu";
>> > +                       compatible =  "arm,armv8";
>> > +                       reg = <0x0 0x000>;
>> > +                       enable-method = "psci";
>> > +                       /* board 0, socket 0, core 0*/
>> > +                       arm,associativity = <0 0 0>;
>>
>> We should specify w.r.t. memory and CPUs how the property is expected to
>> be used (e.g. in the CPU nodes rather than the cpu-map, with separate
>> memory nodes, etc). The generic description of arm,associativity isn't
>> sufficient to limit confusion there.
>
> ok, will add the details like which nodes can use this property.
>
>>
>>
>> Thanks,
>> Mark.
>
>
> thanks
> Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Sept. 29, 2015, 9:42 a.m. UTC | #18
On Tue, 2015-09-29 at 14:08 +0530, Ganapatrao Kulkarni wrote:
> (sending again, by mistake it was set to html mode)

I sorry, I was trying to get OpenPower to move faster & release PAPR
publicly but it looks like it's going to take a bit longer, so I'll try
to write a summary in the next couple of days.

Ben.

> On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
> <gpkulkarni@gmail.com> wrote:
> > Hi Mark,
> > 
> > I have tried to answer your comments, in the meantime we are
> > waiting for Ben
> > to share the details.
> > 
> > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com
> > > wrote:
> > > 
> > > Hi,
> > > 
> > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
> > > wrote:
> > > > DT bindings for numa map for memory, cores and IOs using
> > > > arm,associativity device node property.
> > > 
> > > Given this is just a copy of ibm,associativity, I'm not sure I
> > > see much
> > > point in renaming the properties.
> > > 
> > > However, (somewhat counter to that) I'm also concerned that this
> > > isn't
> > > sufficient for systems we're beginning to see today (more on that
> > > below), so I don't think a simple copy of ibm,associativity is
> > > good
> > > enough.
> > 
> > it is just copy right now, however it can evolve when we come
> > across more
> > arm64 numa platforms
> > > 
> > > 
> > > > 
> > > > Signed-off-by: Ganapatrao Kulkarni <
> > > > gkulkarni@caviumnetworks.com>
> > > > ---
> > > >  Documentation/devicetree/bindings/arm/numa.txt | 212
> > > > +++++++++++++++++++++++++
> > > >  1 file changed, 212 insertions(+)
> > > >  create mode 100644
> > > > Documentation/devicetree/bindings/arm/numa.txt
> > > > 
> > > > diff --git a/Documentation/devicetree/bindings/arm/numa.txt
> > > > b/Documentation/devicetree/bindings/arm/numa.txt
> > > > new file mode 100644
> > > > index 0000000..dc3ef86
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/arm/numa.txt
> > > > @@ -0,0 +1,212 @@
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +NUMA binding description.
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +1 - Introduction
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > +Systems employing a Non Uniform Memory Access (NUMA)
> > > > architecture
> > > > contain
> > > > +collections of hardware resources including processors,
> > > > memory, and I/O
> > > > buses,
> > > > +that comprise what is commonly known as a NUMA node.
> > > > +Processor accesses to memory within the local NUMA node is
> > > > generally
> > > > faster
> > > > +than processor accesses to memory outside of the local NUMA
> > > > node.
> > > > +DT defines interfaces that allow the platform to convey NUMA
> > > > node
> > > > +topology information to OS.
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +2 - arm,associativity
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +The mapping is done using arm,associativity device property.
> > > > +this property needs to be present in every device node which
> > > > needs to
> > > > to be
> > > > +mapped to numa nodes.
> > > 
> > > Can't there be some inheritance? e.g. all devices on a bus with
> > > an
> > > arm,associativity property being assumed to share that value?
> > 
> > yes there is inheritance and respective bus drivers should take
> > care of it,
> > like pci driver does at present.
> > > 
> > > 
> > > > +
> > > > +arm,associativity property is set of 32-bit integers which
> > > > defines
> > > > level of
> > > 
> > > s/set/list/ -- the order is important.
> > 
> > ok
> > > 
> > > 
> > > > +topology and boundary in the system at which a significant
> > > > difference
> > > > in
> > > > +performance can be measured between cross-device accesses
> > > > within
> > > > +a single location and those spanning multiple locations.
> > > > +The first cell always contains the broadest subdivision within
> > > > the
> > > > system,
> > > > +while the last cell enumerates the individual devices, such as
> > > > an SMT
> > > > thread
> > > > +of a CPU, or a bus bridge within an SoC".
> > > 
> > > While this gives us some hierarchy, this doesn't seem to encode
> > > relative
> > > distances at all. That seems like an oversight.
> > 
> > 
> > distance is computed, will add the details to document.
> > local nodes will have distance as 10(LOCAL_DISTANCE) and every
> > level, the
> > distance multiplies by 2.
> > for example, for level 1 numa topology, distance from local node to
> > remote
> > node will be 20.
> > 
> > > 
> > > 
> > > Additionally, I'm somewhat unclear on how what you'd be expected
> > > to
> > > provide for this property in cases like ring or mesh
> > > interconnects,
> > > where there isn't a strict hierarchy (see systems with ARM's own
> > > CCN, or
> > > Tilera's TILE-Mx), but there is some measure of closeness.
> > 
> > 
> > IIUC, as per ARMs CCN architecture, all core/clusters are at equal
> > distance
> > of DDR, i dont see any NUMA topology.
> > however, if there are 2 SoC connected thorough the CCN, then it is
> > very much
> > similar to cavium topology.
> > 
> > > Must all of these have the same length? If so, why not have a
> > > #(whatever)-cells property in the root to describe the expected
> > > length?
> > > If not, how are they to be interpreted relative to each other?
> > 
> > 
> > yes, all are of default size.
> > IMHO, there is no need to add cells property.
> > > 
> > > 
> > > > +
> > > > +ex:
> > > 
> > > s/ex/Example:/, please. There's no need to contract that.
> > > 
> > > > +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
> > > > +       arm,associativity = <0 0 0 0 0>;
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +3 - arm,associativity-reference-points
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +This property is a set of 32-bit integers, each representing
> > > > an index
> > > > into
> > > 
> > > Likeise, s/set/list/
> > 
> > ok
> > > 
> > > 
> > > > +the arm,associativity nodes. The first integer is the most
> > > > significant
> > > > +NUMA boundary and the following are progressively less
> > > > significant
> > > > boundaries.
> > > > +There can be more than one level of NUMA.
> > > 
> > > I'm not clear on why this is necessary; the arm,associativity
> > > property
> > > is already ordered from most significant to least significant per
> > > its
> > > description.
> > 
> > 
> > first entry in arm,associativity-reference-points is used to find
> > which
> > entry in associativity defines node id.
> > also entries in arm,associativity-reference-points defines,
> > how many entries(depth) in associativity can be used to calculate
> > node
> > distance
> > in both level 1 and  multi level(hierarchical) numa topology.
> > 
> > > 
> > > 
> > > What does this property achieve?
> > > 
> > > The description also doesn't describe where this property is
> > > expected to
> > > live. The example isn't sufficient to disambiguate that,
> > > especially as
> > > it seems like a trivial case.
> > 
> > sure, will add one more example to describe the
> > arm,associativity-reference-points
> > > 
> > > 
> > > Is this only expected at the root of the tree? Can it be re
> > > -defined in
> > > sub-nodes?
> > 
> > yes it is defined only at the root.
> > > 
> > > 
> > > > +
> > > > +Ex:
> > > 
> > > s/Ex/Example:/, please
> > 
> > sure.
> > > 
> > > 
> > > > +       arm,associativity-reference-points = <0 1>;
> > > > +       The board Id(index 0) used first to calculate the
> > > > associativity
> > > > (node
> > > > +       distance), then follows the  socket id(index 1).
> > > > +
> > > > +       arm,associativity-reference-points = <1 0>;
> > > > +       The socket Id(index 1) used first to calculate the
> > > > associativity,
> > > > +       then follows the board id(index 0).
> > > > +
> > > > +       arm,associativity-reference-points = <0>;
> > > > +       Only the board Id(index 0) used to calculate the
> > > > associativity.
> > > > +
> > > > +       arm,associativity-reference-points = <1>;
> > > > +       Only socket Id(index 1) used to calculate the
> > > > associativity.
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +4 - Example dts
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > +Example: 2 Node system consists of 2 boards and each board
> > > > having one
> > > > socket
> > > > +and 8 core in each socket.
> > > > +
> > > > +       arm,associativity-reference-points = <0>;
> > > > +
> > > > +       memory@00c00000 {
> > > > +               device_type = "memory";
> > > > +               reg = <0x0 0x00c00000 0x0 0x80000000>;
> > > > +               /* board 0, socket 0, no specific core */
> > > > +               arm,associativity = <0 0 0xffff>;
> > > > +       };
> > > > +
> > > > +       memory@10000000000 {
> > > > +               device_type = "memory";
> > > > +               reg = <0x100 0x00000000 0x0 0x80000000>;
> > > > +               /* board 1, socket 0, no specific core */
> > > > +               arm,associativity = <1 0 0xffff>;
> > > > +       };
> > > > +
> > > > +       cpus {
> > > > +               #address-cells = <2>;
> > > > +               #size-cells = <0>;
> > > > +
> > > > +               cpu@000 {
> > > > +                       device_type = "cpu";
> > > > +                       compatible =  "arm,armv8";
> > > > +                       reg = <0x0 0x000>;
> > > > +                       enable-method = "psci";
> > > > +                       /* board 0, socket 0, core 0*/
> > > > +                       arm,associativity = <0 0 0>;
> > > 
> > > We should specify w.r.t. memory and CPUs how the property is
> > > expected to
> > > be used (e.g. in the CPU nodes rather than the cpu-map, with
> > > separate
> > > memory nodes, etc). The generic description of arm,associativity
> > > isn't
> > > sufficient to limit confusion there.
> > 
> > ok, will add the details like which nodes can use this property.
> > 
> > > 
> > > 
> > > Thanks,
> > > Mark.
> > 
> > 
> > thanks
> > Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Sept. 30, 2015, 12:28 a.m. UTC | #19
On Tue, 2015-09-29 at 14:08 +0530, Ganapatrao Kulkarni wrote:
> (sending again, by mistake it was set to html mode)

The representation consists of a hierarchy of domains, the idea being
that resources are grouped in domains of similar average performance
relative to each other.

The platform decides which "levels" of that hierarchy are significant. 

The "ibm,associativity" property allows to determine the associatitivy
between two resources (ie nodes) at a given level.

Unfortunately that property went through changes, so another property
in the DT (ibm,architecture-vec-5) contains, among a bunch of other
things, a bit indicating which form of the ibm,associativity property
is used. I'm going to stick to the new "form 1" in this description.

The ibm,associativity contains one or more lists of numbers (32-bit
cells), which represent the domains:

	< C1 , L1_1, L1_2, ... , C2, L2_1, L2_2, ... >

Where C1 (count 1) is the number of items for list 1, and L1_1,
L1_2, ... L1_C1 are the items for list 1, and same for C2/L2.

The entries in those lists are domain numbers from the highest level of
grouping to the lowest (successive numbers are sub divisions)
for example drawer#, socket#, chip#, core#... with the lowest level
being the actual resource itself. So within a domain that last number
is generally unique.

Different resources can have different number of levels, for example if
we have a grouping of node,socket,chip,core, a CPU core node would have
a list with all 4 but a memory controller on a chip might have only the
first 3.

This is an important statement in the spec:

<<
The user of this information is cautioned not to imply
any specific physical/logical significance of the various intermediate
levels.
>>

We can have multiple lists because a given resource can be connected
via multiple path in the same platform.

That means that to properly calculate the distance to another resource,
all the path need to be looked at (assuming the HW will pick the
shortest).

Additionally, to help the OS, another property "ibm,associativity
-reference-points" property indicates which levels (which indices in
the above lists) are of biggest significance to the platform. This can
typically be used by an OS to decide what to consider a "NUMA node"
if the OS cannot operate on distances alone. This is a list of 1-based
numbers representing indices in the associativity list. They should
be in order of significance of the boundary.

Finally, the ibm,max-associativity-domains (in the /rtas node on
pseries) is an array of cells < C, M1, M2, ... MC > (first is
count) containing for each domain/level the max number supported
by the platform.

Ben.

> On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
> <gpkulkarni@gmail.com> wrote:
> > Hi Mark,
> > 
> > I have tried to answer your comments, in the meantime we are
> > waiting for Ben
> > to share the details.
> > 
> > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com
> > > wrote:
> > > 
> > > Hi,
> > > 
> > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
> > > wrote:
> > > > DT bindings for numa map for memory, cores and IOs using
> > > > arm,associativity device node property.
> > > 
> > > Given this is just a copy of ibm,associativity, I'm not sure I
> > > see much
> > > point in renaming the properties.
> > > 
> > > However, (somewhat counter to that) I'm also concerned that this
> > > isn't
> > > sufficient for systems we're beginning to see today (more on that
> > > below), so I don't think a simple copy of ibm,associativity is
> > > good
> > > enough.
> > 
> > it is just copy right now, however it can evolve when we come
> > across more
> > arm64 numa platforms
> > > 
> > > 
> > > > 
> > > > Signed-off-by: Ganapatrao Kulkarni <
> > > > gkulkarni@caviumnetworks.com>
> > > > ---
> > > >  Documentation/devicetree/bindings/arm/numa.txt | 212
> > > > +++++++++++++++++++++++++
> > > >  1 file changed, 212 insertions(+)
> > > >  create mode 100644
> > > > Documentation/devicetree/bindings/arm/numa.txt
> > > > 
> > > > diff --git a/Documentation/devicetree/bindings/arm/numa.txt
> > > > b/Documentation/devicetree/bindings/arm/numa.txt
> > > > new file mode 100644
> > > > index 0000000..dc3ef86
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/arm/numa.txt
> > > > @@ -0,0 +1,212 @@
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +NUMA binding description.
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +1 - Introduction
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > +Systems employing a Non Uniform Memory Access (NUMA)
> > > > architecture
> > > > contain
> > > > +collections of hardware resources including processors,
> > > > memory, and I/O
> > > > buses,
> > > > +that comprise what is commonly known as a NUMA node.
> > > > +Processor accesses to memory within the local NUMA node is
> > > > generally
> > > > faster
> > > > +than processor accesses to memory outside of the local NUMA
> > > > node.
> > > > +DT defines interfaces that allow the platform to convey NUMA
> > > > node
> > > > +topology information to OS.
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +2 - arm,associativity
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +The mapping is done using arm,associativity device property.
> > > > +this property needs to be present in every device node which
> > > > needs to
> > > > to be
> > > > +mapped to numa nodes.
> > > 
> > > Can't there be some inheritance? e.g. all devices on a bus with
> > > an
> > > arm,associativity property being assumed to share that value?
> > 
> > yes there is inheritance and respective bus drivers should take
> > care of it,
> > like pci driver does at present.
> > > 
> > > 
> > > > +
> > > > +arm,associativity property is set of 32-bit integers which
> > > > defines
> > > > level of
> > > 
> > > s/set/list/ -- the order is important.
> > 
> > ok
> > > 
> > > 
> > > > +topology and boundary in the system at which a significant
> > > > difference
> > > > in
> > > > +performance can be measured between cross-device accesses
> > > > within
> > > > +a single location and those spanning multiple locations.
> > > > +The first cell always contains the broadest subdivision within
> > > > the
> > > > system,
> > > > +while the last cell enumerates the individual devices, such as
> > > > an SMT
> > > > thread
> > > > +of a CPU, or a bus bridge within an SoC".
> > > 
> > > While this gives us some hierarchy, this doesn't seem to encode
> > > relative
> > > distances at all. That seems like an oversight.
> > 
> > 
> > distance is computed, will add the details to document.
> > local nodes will have distance as 10(LOCAL_DISTANCE) and every
> > level, the
> > distance multiplies by 2.
> > for example, for level 1 numa topology, distance from local node to
> > remote
> > node will be 20.
> > 
> > > 
> > > 
> > > Additionally, I'm somewhat unclear on how what you'd be expected
> > > to
> > > provide for this property in cases like ring or mesh
> > > interconnects,
> > > where there isn't a strict hierarchy (see systems with ARM's own
> > > CCN, or
> > > Tilera's TILE-Mx), but there is some measure of closeness.
> > 
> > 
> > IIUC, as per ARMs CCN architecture, all core/clusters are at equal
> > distance
> > of DDR, i dont see any NUMA topology.
> > however, if there are 2 SoC connected thorough the CCN, then it is
> > very much
> > similar to cavium topology.
> > 
> > > Must all of these have the same length? If so, why not have a
> > > #(whatever)-cells property in the root to describe the expected
> > > length?
> > > If not, how are they to be interpreted relative to each other?
> > 
> > 
> > yes, all are of default size.
> > IMHO, there is no need to add cells property.
> > > 
> > > 
> > > > +
> > > > +ex:
> > > 
> > > s/ex/Example:/, please. There's no need to contract that.
> > > 
> > > > +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
> > > > +       arm,associativity = <0 0 0 0 0>;
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +3 - arm,associativity-reference-points
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +This property is a set of 32-bit integers, each representing
> > > > an index
> > > > into
> > > 
> > > Likeise, s/set/list/
> > 
> > ok
> > > 
> > > 
> > > > +the arm,associativity nodes. The first integer is the most
> > > > significant
> > > > +NUMA boundary and the following are progressively less
> > > > significant
> > > > boundaries.
> > > > +There can be more than one level of NUMA.
> > > 
> > > I'm not clear on why this is necessary; the arm,associativity
> > > property
> > > is already ordered from most significant to least significant per
> > > its
> > > description.
> > 
> > 
> > first entry in arm,associativity-reference-points is used to find
> > which
> > entry in associativity defines node id.
> > also entries in arm,associativity-reference-points defines,
> > how many entries(depth) in associativity can be used to calculate
> > node
> > distance
> > in both level 1 and  multi level(hierarchical) numa topology.
> > 
> > > 
> > > 
> > > What does this property achieve?
> > > 
> > > The description also doesn't describe where this property is
> > > expected to
> > > live. The example isn't sufficient to disambiguate that,
> > > especially as
> > > it seems like a trivial case.
> > 
> > sure, will add one more example to describe the
> > arm,associativity-reference-points
> > > 
> > > 
> > > Is this only expected at the root of the tree? Can it be re
> > > -defined in
> > > sub-nodes?
> > 
> > yes it is defined only at the root.
> > > 
> > > 
> > > > +
> > > > +Ex:
> > > 
> > > s/Ex/Example:/, please
> > 
> > sure.
> > > 
> > > 
> > > > +       arm,associativity-reference-points = <0 1>;
> > > > +       The board Id(index 0) used first to calculate the
> > > > associativity
> > > > (node
> > > > +       distance), then follows the  socket id(index 1).
> > > > +
> > > > +       arm,associativity-reference-points = <1 0>;
> > > > +       The socket Id(index 1) used first to calculate the
> > > > associativity,
> > > > +       then follows the board id(index 0).
> > > > +
> > > > +       arm,associativity-reference-points = <0>;
> > > > +       Only the board Id(index 0) used to calculate the
> > > > associativity.
> > > > +
> > > > +       arm,associativity-reference-points = <1>;
> > > > +       Only socket Id(index 1) used to calculate the
> > > > associativity.
> > > > +
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +4 - Example dts
> > > > 
> > > > +==============================================================
> > > > ================
> > > > +
> > > > +Example: 2 Node system consists of 2 boards and each board
> > > > having one
> > > > socket
> > > > +and 8 core in each socket.
> > > > +
> > > > +       arm,associativity-reference-points = <0>;
> > > > +
> > > > +       memory@00c00000 {
> > > > +               device_type = "memory";
> > > > +               reg = <0x0 0x00c00000 0x0 0x80000000>;
> > > > +               /* board 0, socket 0, no specific core */
> > > > +               arm,associativity = <0 0 0xffff>;
> > > > +       };
> > > > +
> > > > +       memory@10000000000 {
> > > > +               device_type = "memory";
> > > > +               reg = <0x100 0x00000000 0x0 0x80000000>;
> > > > +               /* board 1, socket 0, no specific core */
> > > > +               arm,associativity = <1 0 0xffff>;
> > > > +       };
> > > > +
> > > > +       cpus {
> > > > +               #address-cells = <2>;
> > > > +               #size-cells = <0>;
> > > > +
> > > > +               cpu@000 {
> > > > +                       device_type = "cpu";
> > > > +                       compatible =  "arm,armv8";
> > > > +                       reg = <0x0 0x000>;
> > > > +                       enable-method = "psci";
> > > > +                       /* board 0, socket 0, core 0*/
> > > > +                       arm,associativity = <0 0 0>;
> > > 
> > > We should specify w.r.t. memory and CPUs how the property is
> > > expected to
> > > be used (e.g. in the CPU nodes rather than the cpu-map, with
> > > separate
> > > memory nodes, etc). The generic description of arm,associativity
> > > isn't
> > > sufficient to limit confusion there.
> > 
> > ok, will add the details like which nodes can use this property.
> > 
> > > 
> > > 
> > > Thanks,
> > > Mark.
> > 
> > 
> > thanks
> > Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 30, 2015, 10:19 a.m. UTC | #20
Thanks Ben for the details.

On Wed, Sep 30, 2015 at 5:58 AM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Tue, 2015-09-29 at 14:08 +0530, Ganapatrao Kulkarni wrote:
>> (sending again, by mistake it was set to html mode)
>
> The representation consists of a hierarchy of domains, the idea being
> that resources are grouped in domains of similar average performance
> relative to each other.
>
> The platform decides which "levels" of that hierarchy are significant.
>
> The "ibm,associativity" property allows to determine the associatitivy
> between two resources (ie nodes) at a given level.
>
> Unfortunately that property went through changes, so another property
> in the DT (ibm,architecture-vec-5) contains, among a bunch of other
> things, a bit indicating which form of the ibm,associativity property
> is used. I'm going to stick to the new "form 1" in this description.
>
> The ibm,associativity contains one or more lists of numbers (32-bit
> cells), which represent the domains:
>
>         < C1 , L1_1, L1_2, ... , C2, L2_1, L2_2, ... >
>
> Where C1 (count 1) is the number of items for list 1, and L1_1,
> L1_2, ... L1_C1 are the items for list 1, and same for C2/L2.
can you please put some examples for more clarity.
>
> The entries in those lists are domain numbers from the highest level of
> grouping to the lowest (successive numbers are sub divisions)
> for example drawer#, socket#, chip#, core#... with the lowest level
> being the actual resource itself. So within a domain that last number
> is generally unique.
>
> Different resources can have different number of levels, for example if
> we have a grouping of node,socket,chip,core, a CPU core node would have
> a list with all 4 but a memory controller on a chip might have only the
> first 3.
can you please put some examples for more clarity.
>
> This is an important statement in the spec:
>
> <<
> The user of this information is cautioned not to imply
> any specific physical/logical significance of the various intermediate
> levels.
>>>
>
> We can have multiple lists because a given resource can be connected
> via multiple path in the same platform.
>
> That means that to properly calculate the distance to another resource,
> all the path need to be looked at (assuming the HW will pick the
> shortest).
>
> Additionally, to help the OS, another property "ibm,associativity
> -reference-points" property indicates which levels (which indices in
> the above lists) are of biggest significance to the platform. This can
> typically be used by an OS to decide what to consider a "NUMA node"
> if the OS cannot operate on distances alone. This is a list of 1-based
> numbers representing indices in the associativity list. They should
> be in order of significance of the boundary.
some examples please.
>
> Finally, the ibm,max-associativity-domains (in the /rtas node on
> pseries) is an array of cells < C, M1, M2, ... MC > (first is
> count) containing for each domain/level the max number supported
> by the platform.
max number of what/cpu?
how this helps?
please give some examples to understand this!
>
> Ben.
>
>> On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
>> <gpkulkarni@gmail.com> wrote:
>> > Hi Mark,
>> >
>> > I have tried to answer your comments, in the meantime we are
>> > waiting for Ben
>> > to share the details.
>> >
>> > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com
>> > > wrote:
>> > >
>> > > Hi,
>> > >
>> > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
>> > > wrote:
>> > > > DT bindings for numa map for memory, cores and IOs using
>> > > > arm,associativity device node property.
>> > >
>> > > Given this is just a copy of ibm,associativity, I'm not sure I
>> > > see much
>> > > point in renaming the properties.
>> > >
>> > > However, (somewhat counter to that) I'm also concerned that this
>> > > isn't
>> > > sufficient for systems we're beginning to see today (more on that
>> > > below), so I don't think a simple copy of ibm,associativity is
>> > > good
>> > > enough.
>> >
>> > it is just copy right now, however it can evolve when we come
>> > across more
>> > arm64 numa platforms
>> > >
>> > >
>> > > >
>> > > > Signed-off-by: Ganapatrao Kulkarni <
>> > > > gkulkarni@caviumnetworks.com>
>> > > > ---
>> > > >  Documentation/devicetree/bindings/arm/numa.txt | 212
>> > > > +++++++++++++++++++++++++
>> > > >  1 file changed, 212 insertions(+)
>> > > >  create mode 100644
>> > > > Documentation/devicetree/bindings/arm/numa.txt
>> > > >
>> > > > diff --git a/Documentation/devicetree/bindings/arm/numa.txt
>> > > > b/Documentation/devicetree/bindings/arm/numa.txt
>> > > > new file mode 100644
>> > > > index 0000000..dc3ef86
>> > > > --- /dev/null
>> > > > +++ b/Documentation/devicetree/bindings/arm/numa.txt
>> > > > @@ -0,0 +1,212 @@
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +NUMA binding description.
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +1 - Introduction
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +
>> > > > +Systems employing a Non Uniform Memory Access (NUMA)
>> > > > architecture
>> > > > contain
>> > > > +collections of hardware resources including processors,
>> > > > memory, and I/O
>> > > > buses,
>> > > > +that comprise what is commonly known as a NUMA node.
>> > > > +Processor accesses to memory within the local NUMA node is
>> > > > generally
>> > > > faster
>> > > > +than processor accesses to memory outside of the local NUMA
>> > > > node.
>> > > > +DT defines interfaces that allow the platform to convey NUMA
>> > > > node
>> > > > +topology information to OS.
>> > > > +
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +2 - arm,associativity
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +The mapping is done using arm,associativity device property.
>> > > > +this property needs to be present in every device node which
>> > > > needs to
>> > > > to be
>> > > > +mapped to numa nodes.
>> > >
>> > > Can't there be some inheritance? e.g. all devices on a bus with
>> > > an
>> > > arm,associativity property being assumed to share that value?
>> >
>> > yes there is inheritance and respective bus drivers should take
>> > care of it,
>> > like pci driver does at present.
>> > >
>> > >
>> > > > +
>> > > > +arm,associativity property is set of 32-bit integers which
>> > > > defines
>> > > > level of
>> > >
>> > > s/set/list/ -- the order is important.
>> >
>> > ok
>> > >
>> > >
>> > > > +topology and boundary in the system at which a significant
>> > > > difference
>> > > > in
>> > > > +performance can be measured between cross-device accesses
>> > > > within
>> > > > +a single location and those spanning multiple locations.
>> > > > +The first cell always contains the broadest subdivision within
>> > > > the
>> > > > system,
>> > > > +while the last cell enumerates the individual devices, such as
>> > > > an SMT
>> > > > thread
>> > > > +of a CPU, or a bus bridge within an SoC".
>> > >
>> > > While this gives us some hierarchy, this doesn't seem to encode
>> > > relative
>> > > distances at all. That seems like an oversight.
>> >
>> >
>> > distance is computed, will add the details to document.
>> > local nodes will have distance as 10(LOCAL_DISTANCE) and every
>> > level, the
>> > distance multiplies by 2.
>> > for example, for level 1 numa topology, distance from local node to
>> > remote
>> > node will be 20.
>> >
>> > >
>> > >
>> > > Additionally, I'm somewhat unclear on how what you'd be expected
>> > > to
>> > > provide for this property in cases like ring or mesh
>> > > interconnects,
>> > > where there isn't a strict hierarchy (see systems with ARM's own
>> > > CCN, or
>> > > Tilera's TILE-Mx), but there is some measure of closeness.
>> >
>> >
>> > IIUC, as per ARMs CCN architecture, all core/clusters are at equal
>> > distance
>> > of DDR, i dont see any NUMA topology.
>> > however, if there are 2 SoC connected thorough the CCN, then it is
>> > very much
>> > similar to cavium topology.
>> >
>> > > Must all of these have the same length? If so, why not have a
>> > > #(whatever)-cells property in the root to describe the expected
>> > > length?
>> > > If not, how are they to be interpreted relative to each other?
>> >
>> >
>> > yes, all are of default size.
>> > IMHO, there is no need to add cells property.
>> > >
>> > >
>> > > > +
>> > > > +ex:
>> > >
>> > > s/ex/Example:/, please. There's no need to contract that.
>> > >
>> > > > +       /* board 0, socket 0, cluster 0, core 0  thread 0 */
>> > > > +       arm,associativity = <0 0 0 0 0>;
>> > > > +
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +3 - arm,associativity-reference-points
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +This property is a set of 32-bit integers, each representing
>> > > > an index
>> > > > into
>> > >
>> > > Likeise, s/set/list/
>> >
>> > ok
>> > >
>> > >
>> > > > +the arm,associativity nodes. The first integer is the most
>> > > > significant
>> > > > +NUMA boundary and the following are progressively less
>> > > > significant
>> > > > boundaries.
>> > > > +There can be more than one level of NUMA.
>> > >
>> > > I'm not clear on why this is necessary; the arm,associativity
>> > > property
>> > > is already ordered from most significant to least significant per
>> > > its
>> > > description.
>> >
>> >
>> > first entry in arm,associativity-reference-points is used to find
>> > which
>> > entry in associativity defines node id.
>> > also entries in arm,associativity-reference-points defines,
>> > how many entries(depth) in associativity can be used to calculate
>> > node
>> > distance
>> > in both level 1 and  multi level(hierarchical) numa topology.
>> >
>> > >
>> > >
>> > > What does this property achieve?
>> > >
>> > > The description also doesn't describe where this property is
>> > > expected to
>> > > live. The example isn't sufficient to disambiguate that,
>> > > especially as
>> > > it seems like a trivial case.
>> >
>> > sure, will add one more example to describe the
>> > arm,associativity-reference-points
>> > >
>> > >
>> > > Is this only expected at the root of the tree? Can it be re
>> > > -defined in
>> > > sub-nodes?
>> >
>> > yes it is defined only at the root.
>> > >
>> > >
>> > > > +
>> > > > +Ex:
>> > >
>> > > s/Ex/Example:/, please
>> >
>> > sure.
>> > >
>> > >
>> > > > +       arm,associativity-reference-points = <0 1>;
>> > > > +       The board Id(index 0) used first to calculate the
>> > > > associativity
>> > > > (node
>> > > > +       distance), then follows the  socket id(index 1).
>> > > > +
>> > > > +       arm,associativity-reference-points = <1 0>;
>> > > > +       The socket Id(index 1) used first to calculate the
>> > > > associativity,
>> > > > +       then follows the board id(index 0).
>> > > > +
>> > > > +       arm,associativity-reference-points = <0>;
>> > > > +       Only the board Id(index 0) used to calculate the
>> > > > associativity.
>> > > > +
>> > > > +       arm,associativity-reference-points = <1>;
>> > > > +       Only socket Id(index 1) used to calculate the
>> > > > associativity.
>> > > > +
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +4 - Example dts
>> > > >
>> > > > +==============================================================
>> > > > ================
>> > > > +
>> > > > +Example: 2 Node system consists of 2 boards and each board
>> > > > having one
>> > > > socket
>> > > > +and 8 core in each socket.
>> > > > +
>> > > > +       arm,associativity-reference-points = <0>;
>> > > > +
>> > > > +       memory@00c00000 {
>> > > > +               device_type = "memory";
>> > > > +               reg = <0x0 0x00c00000 0x0 0x80000000>;
>> > > > +               /* board 0, socket 0, no specific core */
>> > > > +               arm,associativity = <0 0 0xffff>;
>> > > > +       };
>> > > > +
>> > > > +       memory@10000000000 {
>> > > > +               device_type = "memory";
>> > > > +               reg = <0x100 0x00000000 0x0 0x80000000>;
>> > > > +               /* board 1, socket 0, no specific core */
>> > > > +               arm,associativity = <1 0 0xffff>;
>> > > > +       };
>> > > > +
>> > > > +       cpus {
>> > > > +               #address-cells = <2>;
>> > > > +               #size-cells = <0>;
>> > > > +
>> > > > +               cpu@000 {
>> > > > +                       device_type = "cpu";
>> > > > +                       compatible =  "arm,armv8";
>> > > > +                       reg = <0x0 0x000>;
>> > > > +                       enable-method = "psci";
>> > > > +                       /* board 0, socket 0, core 0*/
>> > > > +                       arm,associativity = <0 0 0>;
>> > >
>> > > We should specify w.r.t. memory and CPUs how the property is
>> > > expected to
>> > > be used (e.g. in the CPU nodes rather than the cpu-map, with
>> > > separate
>> > > memory nodes, etc). The generic description of arm,associativity
>> > > isn't
>> > > sufficient to limit confusion there.
>> >
>> > ok, will add the details like which nodes can use this property.
>> >
>> > >
>> > >
>> > > Thanks,
>> > > Mark.
>> >
>> >
>> > thanks
>> > Ganapat
thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Rutland Sept. 30, 2015, 10:53 a.m. UTC | #21
On Tue, Sep 29, 2015 at 09:38:04AM +0100, Ganapatrao Kulkarni wrote:
> (sending again, by mistake it was set to html mode)
> 
> On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
> <gpkulkarni@gmail.com> wrote:
> > Hi Mark,
> >
> > I have tried to answer your comments, in the meantime we are waiting for Ben
> > to share the details.
> >
> > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> >>
> >> Hi,
> >>
> >> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
> >> > DT bindings for numa map for memory, cores and IOs using
> >> > arm,associativity device node property.
> >>
> >> Given this is just a copy of ibm,associativity, I'm not sure I see much
> >> point in renaming the properties.
> >>
> >> However, (somewhat counter to that) I'm also concerned that this isn't
> >> sufficient for systems we're beginning to see today (more on that
> >> below), so I don't think a simple copy of ibm,associativity is good
> >> enough.
> >
> > it is just copy right now, however it can evolve when we come across more
> > arm64 numa platforms

Whatever we do I suspect we'll have to evolve it as new platforms
appear. As I mentioned there are contemporary NUMA ARM64 platforms (e.g.
those with CCN) that I don't think we can ignore now given we'll have to
cater for them.

> >> > +==============================================================================
> >> > +2 - arm,associativity
> >> >
> >> > +==============================================================================
> >> > +The mapping is done using arm,associativity device property.
> >> > +this property needs to be present in every device node which needs to
> >> > to be
> >> > +mapped to numa nodes.
> >>
> >> Can't there be some inheritance? e.g. all devices on a bus with an
> >> arm,associativity property being assumed to share that value?
> >
> > yes there is inheritance and respective bus drivers should take care of it,
> > like pci driver does at present.

Ok. 

That seems counter to my initial interpretation of the wording that the
property must be present on device nodes that need to be mapped to NUMA
nodes.

Is there any simple way of describing the set of nodes that need this
property?

> >> > +topology and boundary in the system at which a significant difference
> >> > in
> >> > +performance can be measured between cross-device accesses within
> >> > +a single location and those spanning multiple locations.
> >> > +The first cell always contains the broadest subdivision within the
> >> > system,
> >> > +while the last cell enumerates the individual devices, such as an SMT
> >> > thread
> >> > +of a CPU, or a bus bridge within an SoC".
> >>
> >> While this gives us some hierarchy, this doesn't seem to encode relative
> >> distances at all. That seems like an oversight.
> >
> >
> > distance is computed, will add the details to document.
> > local nodes will have distance as 10(LOCAL_DISTANCE) and every level, the
> > distance multiplies by 2.
> > for example, for level 1 numa topology, distance from local node to remote
> > node will be 20.

This seems arbitrary.

Why not always have this explicitly described?

> >> Additionally, I'm somewhat unclear on how what you'd be expected to
> >> provide for this property in cases like ring or mesh interconnects,
> >> where there isn't a strict hierarchy (see systems with ARM's own CCN, or
> >> Tilera's TILE-Mx), but there is some measure of closeness.
> >
> >
> > IIUC, as per ARMs CCN architecture, all core/clusters are at equal distance
> > of DDR, i dont see any NUMA topology.

The CCN is a ring interconnect, so CPU clusters (henceforth CPUs) can be
connected with differing distances to RAM instances (or devices).

Consider the simplified network below:

  +-------+      +--------+      +-------+
  | CPU 0 |------| DRAM A |------| CPU 1 |
  +-------+      +--------+      +-------+
      |                              |
      |                              |
  +--------+                     +--------+
  | DRAM B |                     | DRAM C |
  +--------+                     +--------+
      |                              |
      |                              |
  +-------+      +--------+      +-------+
  | CPU 2 |------| DRAM D |------| CPU 3 |
  +-------+      +--------+      +-------+

In this case CPUs and DRAMs are spaced evenly on the ring, but the
distance between an arbitrary CPU and DRAM is not uniform.

CPU 0 can access DRAM A or DRAM B with a single hop, but accesses to
DRAM C or DRAM D take three hops.

An access from CPU 0 to DRAM C could contend with accesses from CPU 1 to
DRAM D, as they share hops on the ring.

There is definitely a NUMA topology here, but there's not a strict
hierarchy. I don't see how you would represent this with the proposed
binding.

Likewise for the mesh networks (e.g. that of TILE-Mx)

> > however, if there are 2 SoC connected thorough the CCN, then it is very much
> > similar to cavium topology.
> >
> >> Must all of these have the same length? If so, why not have a
> >> #(whatever)-cells property in the root to describe the expected length?
> >> If not, how are they to be interpreted relative to each other?
> >
> >
> > yes, all are of default size.

Where that size is...?

> > IMHO, there is no need to add cells property.

That might be the case, but it's unclear from the documentation. I don't
see how one would parse / verify values currently.

> >> > +the arm,associativity nodes. The first integer is the most significant
> >> > +NUMA boundary and the following are progressively less significant
> >> > boundaries.
> >> > +There can be more than one level of NUMA.
> >>
> >> I'm not clear on why this is necessary; the arm,associativity property
> >> is already ordered from most significant to least significant per its
> >> description.
> >
> >
> > first entry in arm,associativity-reference-points is used to find which
> > entry in associativity defines node id.
> > also entries in arm,associativity-reference-points defines,
> > how many entries(depth) in associativity can be used to calculate node
> > distance
> > in both level 1 and  multi level(hierarchical) numa topology.

I think this needs a more thorough description; I don't follow the
current one.

> >> Is this only expected at the root of the tree? Can it be re-defined in
> >> sub-nodes?
> >
> > yes it is defined only at the root.

This needs to be stated explicitly.

I see that this being the case, *,associativity-reference-points would
be a more powerful property than the #(whatever)-cells property I
mentioned earlier, but a more thorough description is required.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Sept. 30, 2015, 5:50 p.m. UTC | #22
Hi Ben,

On Wed, Sep 30, 2015 at 4:23 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Tue, Sep 29, 2015 at 09:38:04AM +0100, Ganapatrao Kulkarni wrote:
>> (sending again, by mistake it was set to html mode)
>>
>> On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
>> <gpkulkarni@gmail.com> wrote:
>> > Hi Mark,
>> >
>> > I have tried to answer your comments, in the meantime we are waiting for Ben
>> > to share the details.
>> >
>> > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> >>
>> >> Hi,
>> >>
>> >> On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni wrote:
>> >> > DT bindings for numa map for memory, cores and IOs using
>> >> > arm,associativity device node property.
>> >>
>> >> Given this is just a copy of ibm,associativity, I'm not sure I see much
>> >> point in renaming the properties.
>> >>
>> >> However, (somewhat counter to that) I'm also concerned that this isn't
>> >> sufficient for systems we're beginning to see today (more on that
>> >> below), so I don't think a simple copy of ibm,associativity is good
>> >> enough.
>> >
>> > it is just copy right now, however it can evolve when we come across more
>> > arm64 numa platforms
>
> Whatever we do I suspect we'll have to evolve it as new platforms
> appear. As I mentioned there are contemporary NUMA ARM64 platforms (e.g.
> those with CCN) that I don't think we can ignore now given we'll have to
> cater for them.
>
>> >> > +==============================================================================
>> >> > +2 - arm,associativity
>> >> >
>> >> > +==============================================================================
>> >> > +The mapping is done using arm,associativity device property.
>> >> > +this property needs to be present in every device node which needs to
>> >> > to be
>> >> > +mapped to numa nodes.
>> >>
>> >> Can't there be some inheritance? e.g. all devices on a bus with an
>> >> arm,associativity property being assumed to share that value?
>> >
>> > yes there is inheritance and respective bus drivers should take care of it,
>> > like pci driver does at present.
>
> Ok.
>
> That seems counter to my initial interpretation of the wording that the
> property must be present on device nodes that need to be mapped to NUMA
> nodes.
>
> Is there any simple way of describing the set of nodes that need this
> property?
>
>> >> > +topology and boundary in the system at which a significant difference
>> >> > in
>> >> > +performance can be measured between cross-device accesses within
>> >> > +a single location and those spanning multiple locations.
>> >> > +The first cell always contains the broadest subdivision within the
>> >> > system,
>> >> > +while the last cell enumerates the individual devices, such as an SMT
>> >> > thread
>> >> > +of a CPU, or a bus bridge within an SoC".
>> >>
>> >> While this gives us some hierarchy, this doesn't seem to encode relative
>> >> distances at all. That seems like an oversight.
>> >
>> >
>> > distance is computed, will add the details to document.
>> > local nodes will have distance as 10(LOCAL_DISTANCE) and every level, the
>> > distance multiplies by 2.
>> > for example, for level 1 numa topology, distance from local node to remote
>> > node will be 20.
>
> This seems arbitrary.
>
> Why not always have this explicitly described?
>
>> >> Additionally, I'm somewhat unclear on how what you'd be expected to
>> >> provide for this property in cases like ring or mesh interconnects,
>> >> where there isn't a strict hierarchy (see systems with ARM's own CCN, or
>> >> Tilera's TILE-Mx), but there is some measure of closeness.
>> >
>> >
>> > IIUC, as per ARMs CCN architecture, all core/clusters are at equal distance
>> > of DDR, i dont see any NUMA topology.
>
> The CCN is a ring interconnect, so CPU clusters (henceforth CPUs) can be
> connected with differing distances to RAM instances (or devices).
>
> Consider the simplified network below:
>
>   +-------+      +--------+      +-------+
>   | CPU 0 |------| DRAM A |------| CPU 1 |
>   +-------+      +--------+      +-------+
>       |                              |
>       |                              |
>   +--------+                     +--------+
>   | DRAM B |                     | DRAM C |
>   +--------+                     +--------+
>       |                              |
>       |                              |
>   +-------+      +--------+      +-------+
>   | CPU 2 |------| DRAM D |------| CPU 3 |
>   +-------+      +--------+      +-------+
>
> In this case CPUs and DRAMs are spaced evenly on the ring, but the
> distance between an arbitrary CPU and DRAM is not uniform.
>
> CPU 0 can access DRAM A or DRAM B with a single hop, but accesses to
> DRAM C or DRAM D take three hops.
>
> An access from CPU 0 to DRAM C could contend with accesses from CPU 1 to
> DRAM D, as they share hops on the ring.
>
> There is definitely a NUMA topology here, but there's not a strict
> hierarchy. I don't see how you would represent this with the proposed
> binding.
can you please explain, how associativity property will represent this
numa topology?
>
> Likewise for the mesh networks (e.g. that of TILE-Mx)
>
>> > however, if there are 2 SoC connected thorough the CCN, then it is very much
>> > similar to cavium topology.
>> >
>> >> Must all of these have the same length? If so, why not have a
>> >> #(whatever)-cells property in the root to describe the expected length?
>> >> If not, how are they to be interpreted relative to each other?
>> >
>> >
>> > yes, all are of default size.
>
> Where that size is...?
>
>> > IMHO, there is no need to add cells property.
>
> That might be the case, but it's unclear from the documentation. I don't
> see how one would parse / verify values currently.
>
>> >> > +the arm,associativity nodes. The first integer is the most significant
>> >> > +NUMA boundary and the following are progressively less significant
>> >> > boundaries.
>> >> > +There can be more than one level of NUMA.
>> >>
>> >> I'm not clear on why this is necessary; the arm,associativity property
>> >> is already ordered from most significant to least significant per its
>> >> description.
>> >
>> >
>> > first entry in arm,associativity-reference-points is used to find which
>> > entry in associativity defines node id.
>> > also entries in arm,associativity-reference-points defines,
>> > how many entries(depth) in associativity can be used to calculate node
>> > distance
>> > in both level 1 and  multi level(hierarchical) numa topology.
>
> I think this needs a more thorough description; I don't follow the
> current one.
>
>> >> Is this only expected at the root of the tree? Can it be re-defined in
>> >> sub-nodes?
>> >
>> > yes it is defined only at the root.
>
> This needs to be stated explicitly.
>
> I see that this being the case, *,associativity-reference-points would
> be a more powerful property than the #(whatever)-cells property I
> mentioned earlier, but a more thorough description is required.
>
> Thanks,
> Mark.
thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Oct. 1, 2015, 1:05 a.m. UTC | #23
On Wed, 2015-09-30 at 23:20 +0530, Ganapatrao Kulkarni wrote:
> Hi Ben,

Before I dig in more (short on time right now), PAPR (at least a chunk
of it) was released publicly:

https://members.openpowerfoundation.org/document/dl/469

(You don't need to be a member nor to sign up to get it)

Cheers,
Ben.

> On Wed, Sep 30, 2015 at 4:23 PM, Mark Rutland <mark.rutland@arm.com>
> wrote:
> > On Tue, Sep 29, 2015 at 09:38:04AM +0100, Ganapatrao Kulkarni
> > wrote:
> > > (sending again, by mistake it was set to html mode)
> > > 
> > > On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
> > > <gpkulkarni@gmail.com> wrote:
> > > > Hi Mark,
> > > > 
> > > > I have tried to answer your comments, in the meantime we are
> > > > waiting for Ben
> > > > to share the details.
> > > > 
> > > > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <
> > > > mark.rutland@arm.com> wrote:
> > > > > 
> > > > > Hi,
> > > > > 
> > > > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
> > > > > wrote:
> > > > > > DT bindings for numa map for memory, cores and IOs using
> > > > > > arm,associativity device node property.
> > > > > 
> > > > > Given this is just a copy of ibm,associativity, I'm not sure
> > > > > I see much
> > > > > point in renaming the properties.
> > > > > 
> > > > > However, (somewhat counter to that) I'm also concerned that
> > > > > this isn't
> > > > > sufficient for systems we're beginning to see today (more on
> > > > > that
> > > > > below), so I don't think a simple copy of ibm,associativity
> > > > > is good
> > > > > enough.
> > > > 
> > > > it is just copy right now, however it can evolve when we come
> > > > across more
> > > > arm64 numa platforms
> > 
> > Whatever we do I suspect we'll have to evolve it as new platforms
> > appear. As I mentioned there are contemporary NUMA ARM64 platforms
> > (e.g.
> > those with CCN) that I don't think we can ignore now given we'll
> > have to
> > cater for them.
> > 
> > > > > > +==========================================================
> > > > > > ====================
> > > > > > +2 - arm,associativity
> > > > > > 
> > > > > > +==========================================================
> > > > > > ====================
> > > > > > +The mapping is done using arm,associativity device
> > > > > > property.
> > > > > > +this property needs to be present in every device node
> > > > > > which needs to
> > > > > > to be
> > > > > > +mapped to numa nodes.
> > > > > 
> > > > > Can't there be some inheritance? e.g. all devices on a bus
> > > > > with an
> > > > > arm,associativity property being assumed to share that value?
> > > > 
> > > > yes there is inheritance and respective bus drivers should take
> > > > care of it,
> > > > like pci driver does at present.
> > 
> > Ok.
> > 
> > That seems counter to my initial interpretation of the wording that
> > the
> > property must be present on device nodes that need to be mapped to
> > NUMA
> > nodes.
> > 
> > Is there any simple way of describing the set of nodes that need
> > this
> > property?
> > 
> > > > > > +topology and boundary in the system at which a significant
> > > > > > difference
> > > > > > in
> > > > > > +performance can be measured between cross-device accesses
> > > > > > within
> > > > > > +a single location and those spanning multiple locations.
> > > > > > +The first cell always contains the broadest subdivision
> > > > > > within the
> > > > > > system,
> > > > > > +while the last cell enumerates the individual devices,
> > > > > > such as an SMT
> > > > > > thread
> > > > > > +of a CPU, or a bus bridge within an SoC".
> > > > > 
> > > > > While this gives us some hierarchy, this doesn't seem to
> > > > > encode relative
> > > > > distances at all. That seems like an oversight.
> > > > 
> > > > 
> > > > distance is computed, will add the details to document.
> > > > local nodes will have distance as 10(LOCAL_DISTANCE) and every
> > > > level, the
> > > > distance multiplies by 2.
> > > > for example, for level 1 numa topology, distance from local
> > > > node to remote
> > > > node will be 20.
> > 
> > This seems arbitrary.
> > 
> > Why not always have this explicitly described?
> > 
> > > > > Additionally, I'm somewhat unclear on how what you'd be
> > > > > expected to
> > > > > provide for this property in cases like ring or mesh
> > > > > interconnects,
> > > > > where there isn't a strict hierarchy (see systems with ARM's
> > > > > own CCN, or
> > > > > Tilera's TILE-Mx), but there is some measure of closeness.
> > > > 
> > > > 
> > > > IIUC, as per ARMs CCN architecture, all core/clusters are at
> > > > equal distance
> > > > of DDR, i dont see any NUMA topology.
> > 
> > The CCN is a ring interconnect, so CPU clusters (henceforth CPUs)
> > can be
> > connected with differing distances to RAM instances (or devices).
> > 
> > Consider the simplified network below:
> > 
> >   +-------+      +--------+      +-------+
> >   | CPU 0 |------| DRAM A |------| CPU 1 |
> >   +-------+      +--------+      +-------+
> >       |                              |
> >       |                              |
> >   +--------+                     +--------+
> >   | DRAM B |                     | DRAM C |
> >   +--------+                     +--------+
> >       |                              |
> >       |                              |
> >   +-------+      +--------+      +-------+
> >   | CPU 2 |------| DRAM D |------| CPU 3 |
> >   +-------+      +--------+      +-------+
> > 
> > In this case CPUs and DRAMs are spaced evenly on the ring, but the
> > distance between an arbitrary CPU and DRAM is not uniform.
> > 
> > CPU 0 can access DRAM A or DRAM B with a single hop, but accesses
> > to
> > DRAM C or DRAM D take three hops.
> > 
> > An access from CPU 0 to DRAM C could contend with accesses from CPU
> > 1 to
> > DRAM D, as they share hops on the ring.
> > 
> > There is definitely a NUMA topology here, but there's not a strict
> > hierarchy. I don't see how you would represent this with the
> > proposed
> > binding.
> can you please explain, how associativity property will represent
> this
> numa topology?
> > 
> > Likewise for the mesh networks (e.g. that of TILE-Mx)
> > 
> > > > however, if there are 2 SoC connected thorough the CCN, then it
> > > > is very much
> > > > similar to cavium topology.
> > > > 
> > > > > Must all of these have the same length? If so, why not have a
> > > > > #(whatever)-cells property in the root to describe the
> > > > > expected length?
> > > > > If not, how are they to be interpreted relative to each
> > > > > other?
> > > > 
> > > > 
> > > > yes, all are of default size.
> > 
> > Where that size is...?
> > 
> > > > IMHO, there is no need to add cells property.
> > 
> > That might be the case, but it's unclear from the documentation. I
> > don't
> > see how one would parse / verify values currently.
> > 
> > > > > > +the arm,associativity nodes. The first integer is the most
> > > > > > significant
> > > > > > +NUMA boundary and the following are progressively less
> > > > > > significant
> > > > > > boundaries.
> > > > > > +There can be more than one level of NUMA.
> > > > > 
> > > > > I'm not clear on why this is necessary; the arm,associativity
> > > > > property
> > > > > is already ordered from most significant to least significant
> > > > > per its
> > > > > description.
> > > > 
> > > > 
> > > > first entry in arm,associativity-reference-points is used to
> > > > find which
> > > > entry in associativity defines node id.
> > > > also entries in arm,associativity-reference-points defines,
> > > > how many entries(depth) in associativity can be used to
> > > > calculate node
> > > > distance
> > > > in both level 1 and  multi level(hierarchical) numa topology.
> > 
> > I think this needs a more thorough description; I don't follow the
> > current one.
> > 
> > > > > Is this only expected at the root of the tree? Can it be re
> > > > > -defined in
> > > > > sub-nodes?
> > > > 
> > > > yes it is defined only at the root.
> > 
> > This needs to be stated explicitly.
> > 
> > I see that this being the case, *,associativity-reference-points
> > would
> > be a more powerful property than the #(whatever)-cells property I
> > mentioned earlier, but a more thorough description is required.
> > 
> > Thanks,
> > Mark.
> thanks
> Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Oct. 1, 2015, 5:25 a.m. UTC | #24
(sending again, dont know, why plane text mode was unchecked.
apologies for the inconvenience)

On Thu, Oct 1, 2015 at 10:41 AM, Ganapatrao Kulkarni
<gpkulkarni@gmail.com> wrote:
> Hi Ben,
>
>
> On Thu, Oct 1, 2015 at 6:35 AM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
>>
>> On Wed, 2015-09-30 at 23:20 +0530, Ganapatrao Kulkarni wrote:
>> > Hi Ben,
>>
>> Before I dig in more (short on time right now), PAPR (at least a chunk
>> of it) was released publicly:
>>
>> https://members.openpowerfoundation.org/document/dl/469
>
> thanks a lot for sharing this document.
> i went through the chapter 15 of this doc which explains an example on
> hierarchical numa topology.
> i still could not represent the ring/mesh numa topology using associativity,
> which will be present in other upcoming arm64 platforms.
>
>>
>> (You don't need to be a member nor to sign up to get it)
>>
>> Cheers,
>> Ben.
>>
>> > On Wed, Sep 30, 2015 at 4:23 PM, Mark Rutland <mark.rutland@arm.com>
>> > wrote:
>> > > On Tue, Sep 29, 2015 at 09:38:04AM +0100, Ganapatrao Kulkarni
>> > > wrote:
>> > > > (sending again, by mistake it was set to html mode)
>> > > >
>> > > > On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
>> > > > <gpkulkarni@gmail.com> wrote:
>> > > > > Hi Mark,
>> > > > >
>> > > > > I have tried to answer your comments, in the meantime we are
>> > > > > waiting for Ben
>> > > > > to share the details.
>> > > > >
>> > > > > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <
>> > > > > mark.rutland@arm.com> wrote:
>> > > > > >
>> > > > > > Hi,
>> > > > > >
>> > > > > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
>> > > > > > wrote:
>> > > > > > > DT bindings for numa map for memory, cores and IOs using
>> > > > > > > arm,associativity device node property.
>> > > > > >
>> > > > > > Given this is just a copy of ibm,associativity, I'm not sure
>> > > > > > I see much
>> > > > > > point in renaming the properties.
>> > > > > >
>> > > > > > However, (somewhat counter to that) I'm also concerned that
>> > > > > > this isn't
>> > > > > > sufficient for systems we're beginning to see today (more on
>> > > > > > that
>> > > > > > below), so I don't think a simple copy of ibm,associativity
>> > > > > > is good
>> > > > > > enough.
>> > > > >
>> > > > > it is just copy right now, however it can evolve when we come
>> > > > > across more
>> > > > > arm64 numa platforms
>> > >
>> > > Whatever we do I suspect we'll have to evolve it as new platforms
>> > > appear. As I mentioned there are contemporary NUMA ARM64 platforms
>> > > (e.g.
>> > > those with CCN) that I don't think we can ignore now given we'll
>> > > have to
>> > > cater for them.
>> > >
>> > > > > > > +==========================================================
>> > > > > > > ====================
>> > > > > > > +2 - arm,associativity
>> > > > > > >
>> > > > > > > +==========================================================
>> > > > > > > ====================
>> > > > > > > +The mapping is done using arm,associativity device
>> > > > > > > property.
>> > > > > > > +this property needs to be present in every device node
>> > > > > > > which needs to
>> > > > > > > to be
>> > > > > > > +mapped to numa nodes.
>> > > > > >
>> > > > > > Can't there be some inheritance? e.g. all devices on a bus
>> > > > > > with an
>> > > > > > arm,associativity property being assumed to share that value?
>> > > > >
>> > > > > yes there is inheritance and respective bus drivers should take
>> > > > > care of it,
>> > > > > like pci driver does at present.
>> > >
>> > > Ok.
>> > >
>> > > That seems counter to my initial interpretation of the wording that
>> > > the
>> > > property must be present on device nodes that need to be mapped to
>> > > NUMA
>> > > nodes.
>> > >
>> > > Is there any simple way of describing the set of nodes that need
>> > > this
>> > > property?
>> > >
>> > > > > > > +topology and boundary in the system at which a significant
>> > > > > > > difference
>> > > > > > > in
>> > > > > > > +performance can be measured between cross-device accesses
>> > > > > > > within
>> > > > > > > +a single location and those spanning multiple locations.
>> > > > > > > +The first cell always contains the broadest subdivision
>> > > > > > > within the
>> > > > > > > system,
>> > > > > > > +while the last cell enumerates the individual devices,
>> > > > > > > such as an SMT
>> > > > > > > thread
>> > > > > > > +of a CPU, or a bus bridge within an SoC".
>> > > > > >
>> > > > > > While this gives us some hierarchy, this doesn't seem to
>> > > > > > encode relative
>> > > > > > distances at all. That seems like an oversight.
>> > > > >
>> > > > >
>> > > > > distance is computed, will add the details to document.
>> > > > > local nodes will have distance as 10(LOCAL_DISTANCE) and every
>> > > > > level, the
>> > > > > distance multiplies by 2.
>> > > > > for example, for level 1 numa topology, distance from local
>> > > > > node to remote
>> > > > > node will be 20.
>> > >
>> > > This seems arbitrary.
>> > >
>> > > Why not always have this explicitly described?
>> > >
>> > > > > > Additionally, I'm somewhat unclear on how what you'd be
>> > > > > > expected to
>> > > > > > provide for this property in cases like ring or mesh
>> > > > > > interconnects,
>> > > > > > where there isn't a strict hierarchy (see systems with ARM's
>> > > > > > own CCN, or
>> > > > > > Tilera's TILE-Mx), but there is some measure of closeness.
>> > > > >
>> > > > >
>> > > > > IIUC, as per ARMs CCN architecture, all core/clusters are at
>> > > > > equal distance
>> > > > > of DDR, i dont see any NUMA topology.
>> > >
>> > > The CCN is a ring interconnect, so CPU clusters (henceforth CPUs)
>> > > can be
>> > > connected with differing distances to RAM instances (or devices).
>> > >
>> > > Consider the simplified network below:
>> > >
>> > >   +-------+      +--------+      +-------+
>> > >   | CPU 0 |------| DRAM A |------| CPU 1 |
>> > >   +-------+      +--------+      +-------+
>> > >       |                              |
>> > >       |                              |
>> > >   +--------+                     +--------+
>> > >   | DRAM B |                     | DRAM C |
>> > >   +--------+                     +--------+
>> > >       |                              |
>> > >       |                              |
>> > >   +-------+      +--------+      +-------+
>> > >   | CPU 2 |------| DRAM D |------| CPU 3 |
>> > >   +-------+      +--------+      +-------+
>> > >
>> > > In this case CPUs and DRAMs are spaced evenly on the ring, but the
>> > > distance between an arbitrary CPU and DRAM is not uniform.
>> > >
>> > > CPU 0 can access DRAM A or DRAM B with a single hop, but accesses
>> > > to
>> > > DRAM C or DRAM D take three hops.
>> > >
>> > > An access from CPU 0 to DRAM C could contend with accesses from CPU
>> > > 1 to
>> > > DRAM D, as they share hops on the ring.
>> > >
>> > > There is definitely a NUMA topology here, but there's not a strict
>> > > hierarchy. I don't see how you would represent this with the
>> > > proposed
>> > > binding.
>> > can you please explain, how associativity property will represent
>> > this
>> > numa topology?
>
> Hi Mark,
>
> i am thinking, if we could not address(or becomes complex)  these topologies
> using associativity,
> we should think of an alternate binding which suits existing and upcoming
> arm64 platforms.
> can we think of below numa binding which is inline with ACPI and will
> address all sort of topologies!
>
> i am proposing as below,
>
> 1. introduce "proximity" node property. this property will be
> present in dt nodes like memory, cpu, bus and devices(like associativity
> property) and
> will tell which numa node(proximity domain) this dt node belongs to.
>
> examples:
>                cpu@000 {
>                         device_type = "cpu";
>                         compatible = "cavium,thunder", "arm,armv8";
>                         reg = <0x0 0x000>;
>                         enable-method = "psci";
>                         proximity = <0>;
>                 };
>                cpu@001 {
>                         device_type = "cpu";
>                         compatible = "cavium,thunder", "arm,armv8";
>                         reg = <0x0 0x001>;
>                         enable-method = "psci";
>                         proximity = <1>;
>                 };
>
>        memory@00000000 {
>                 device_type = "memory";
>                 reg = <0x0 0x01400000 0x3 0xFEC00000>;
>                 proximity =<0>;
>
>         };
>
>         memory@10000000000 {
>                 device_type = "memory";
>                 reg = <0x100 0x00400000 0x3 0xFFC00000>;
>                 proximity =<1>;
>         };
>
> pcie0@0x8480,00000000 {
>                 compatible = "cavium,thunder-pcie";
>                 device_type = "pci";
>                 msi-parent = <&its>;
>                 bus-range = <0 255>;
>                 #size-cells = <2>;
>                 #address-cells = <3>;
>                 #stream-id-cells = <1>;
>                 reg = <0x8480 0x00000000 0 0x10000000>;  /*Configuration
> space */
>                 ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000
> 0x70 0x00000000>, /* mem ranges */
>                          <0x03000000 0x8300 0x00000000 0x8300 0x00000000
> 0x500 0x00000000>;
>                proximity =<0>;
>         };
>
>
> 2. Introduce new dt node "proximity-map" which will capture the NxN numa
> node distance matrix.
>
> for example,  4 nodes connected in mesh/ring structure as,
> A(0) <connected to> B(1) <connected to> C(2) <connected to> D(3) <connected
> to> A(1)
>
> relative distance would be,
>       A -> B = 20
>       B -> C  = 20
>       C -> D = 20
>       D -> A = 20
>       A -> C = 40
>       B -> D = 40
>
> and dt presentation for this distance matrix is :
>
>        proximity-map {
>              node-count = <4>;
>              distance-matrix = <0 0  10>,
>                                 <0 1  20>,
>                                 <0 2  40>,
>                                 <0 3  20>,
>                                 <1 0  20>,
>                                 <1 1  10>,
>                                 <1 2  20>,
>                                 <1 3  40>,
>                                 <2 0  40>,
>                                 <2 1  20>,
>                                 <2 2  10>,
>                                 <2 3  20>,
>                                 <3 0  20>,
>                                 <3 1  40>,
>                                 <3 2  20>,
>                                 <3 3  10>;
>           }
>
> the entries like < 0 0 > < 1 1>  < 2 2> < 3 3> can be optional and code can
> put default value(local distance).
> the entries like <1 0> can be optional if <0 1> and <1 0> are of same
> distance.
>
>
>> > >
>> > > Likewise for the mesh networks (e.g. that of TILE-Mx)
>> > >
>> > > > > however, if there are 2 SoC connected thorough the CCN, then it
>> > > > > is very much
>> > > > > similar to cavium topology.
>> > > > >
>> > > > > > Must all of these have the same length? If so, why not have a
>> > > > > > #(whatever)-cells property in the root to describe the
>> > > > > > expected length?
>> > > > > > If not, how are they to be interpreted relative to each
>> > > > > > other?
>> > > > >
>> > > > >
>> > > > > yes, all are of default size.
>> > >
>> > > Where that size is...?
>> > >
>> > > > > IMHO, there is no need to add cells property.
>> > >
>> > > That might be the case, but it's unclear from the documentation. I
>> > > don't
>> > > see how one would parse / verify values currently.
>> > >
>> > > > > > > +the arm,associativity nodes. The first integer is the most
>> > > > > > > significant
>> > > > > > > +NUMA boundary and the following are progressively less
>> > > > > > > significant
>> > > > > > > boundaries.
>> > > > > > > +There can be more than one level of NUMA.
>> > > > > >
>> > > > > > I'm not clear on why this is necessary; the arm,associativity
>> > > > > > property
>> > > > > > is already ordered from most significant to least significant
>> > > > > > per its
>> > > > > > description.
>> > > > >
>> > > > >
>> > > > > first entry in arm,associativity-reference-points is used to
>> > > > > find which
>> > > > > entry in associativity defines node id.
>> > > > > also entries in arm,associativity-reference-points defines,
>> > > > > how many entries(depth) in associativity can be used to
>> > > > > calculate node
>> > > > > distance
>> > > > > in both level 1 and  multi level(hierarchical) numa topology.
>> > >
>> > > I think this needs a more thorough description; I don't follow the
>> > > current one.
>> > >
>> > > > > > Is this only expected at the root of the tree? Can it be re
>> > > > > > -defined in
>> > > > > > sub-nodes?
>> > > > >
>> > > > > yes it is defined only at the root.
>> > >
>> > > This needs to be stated explicitly.
>> > >
>> > > I see that this being the case, *,associativity-reference-points
>> > > would
>> > > be a more powerful property than the #(whatever)-cells property I
>> > > mentioned earlier, but a more thorough description is required.
>> > >
>> > > Thanks,
>> > > Mark.
>> > thanks
>> > Ganapat
>
>
> thanks
> Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Oct. 1, 2015, 7:17 a.m. UTC | #25
On Thu, 2015-10-01 at 10:41 +0530, Ganapatrao Kulkarni wrote:
> i still could not represent the ring/mesh numa topology using
> associativity, which will be present in other upcoming arm64
> platforms.

Right. It should be possible to represent it using the multiple list as
a multi-path problem, but it's a bit awkward.

It does look like the representation might not work well for that case

Cheers,
Ben.

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Oct. 1, 2015, 11:36 a.m. UTC | #26
Hi Mark,

On Thu, Oct 1, 2015 at 10:41 AM, Ganapatrao Kulkarni
<gpkulkarni@gmail.com> wrote:
> Hi Ben,
>
>
> On Thu, Oct 1, 2015 at 6:35 AM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
>>
>> On Wed, 2015-09-30 at 23:20 +0530, Ganapatrao Kulkarni wrote:
>> > Hi Ben,
>>
>> Before I dig in more (short on time right now), PAPR (at least a chunk
>> of it) was released publicly:
>>
>> https://members.openpowerfoundation.org/document/dl/469
>
> thanks a lot for sharing this document.
> i went through the chapter 15 of this doc which explains an example on
> hierarchical numa topology.
> i still could not represent the ring/mesh numa topology using associativity,
> which will be present in other upcoming arm64 platforms.
>
>>
>> (You don't need to be a member nor to sign up to get it)
>>
>> Cheers,
>> Ben.
>>
>> > On Wed, Sep 30, 2015 at 4:23 PM, Mark Rutland <mark.rutland@arm.com>
>> > wrote:
>> > > On Tue, Sep 29, 2015 at 09:38:04AM +0100, Ganapatrao Kulkarni
>> > > wrote:
>> > > > (sending again, by mistake it was set to html mode)
>> > > >
>> > > > On Tue, Sep 29, 2015 at 2:05 PM, Ganapatrao Kulkarni
>> > > > <gpkulkarni@gmail.com> wrote:
>> > > > > Hi Mark,
>> > > > >
>> > > > > I have tried to answer your comments, in the meantime we are
>> > > > > waiting for Ben
>> > > > > to share the details.
>> > > > >
>> > > > > On Fri, Aug 28, 2015 at 6:02 PM, Mark Rutland <
>> > > > > mark.rutland@arm.com> wrote:
>> > > > > >
>> > > > > > Hi,
>> > > > > >
>> > > > > > On Fri, Aug 14, 2015 at 05:39:32PM +0100, Ganapatrao Kulkarni
>> > > > > > wrote:
>> > > > > > > DT bindings for numa map for memory, cores and IOs using
>> > > > > > > arm,associativity device node property.
>> > > > > >
>> > > > > > Given this is just a copy of ibm,associativity, I'm not sure
>> > > > > > I see much
>> > > > > > point in renaming the properties.
>> > > > > >
>> > > > > > However, (somewhat counter to that) I'm also concerned that
>> > > > > > this isn't
>> > > > > > sufficient for systems we're beginning to see today (more on
>> > > > > > that
>> > > > > > below), so I don't think a simple copy of ibm,associativity
>> > > > > > is good
>> > > > > > enough.
>> > > > >
>> > > > > it is just copy right now, however it can evolve when we come
>> > > > > across more
>> > > > > arm64 numa platforms
>> > >
>> > > Whatever we do I suspect we'll have to evolve it as new platforms
>> > > appear. As I mentioned there are contemporary NUMA ARM64 platforms
>> > > (e.g.
>> > > those with CCN) that I don't think we can ignore now given we'll
>> > > have to
>> > > cater for them.
>> > >
>> > > > > > > +==========================================================
>> > > > > > > ====================
>> > > > > > > +2 - arm,associativity
>> > > > > > >
>> > > > > > > +==========================================================
>> > > > > > > ====================
>> > > > > > > +The mapping is done using arm,associativity device
>> > > > > > > property.
>> > > > > > > +this property needs to be present in every device node
>> > > > > > > which needs to
>> > > > > > > to be
>> > > > > > > +mapped to numa nodes.
>> > > > > >
>> > > > > > Can't there be some inheritance? e.g. all devices on a bus
>> > > > > > with an
>> > > > > > arm,associativity property being assumed to share that value?
>> > > > >
>> > > > > yes there is inheritance and respective bus drivers should take
>> > > > > care of it,
>> > > > > like pci driver does at present.
>> > >
>> > > Ok.
>> > >
>> > > That seems counter to my initial interpretation of the wording that
>> > > the
>> > > property must be present on device nodes that need to be mapped to
>> > > NUMA
>> > > nodes.
>> > >
>> > > Is there any simple way of describing the set of nodes that need
>> > > this
>> > > property?
>> > >
>> > > > > > > +topology and boundary in the system at which a significant
>> > > > > > > difference
>> > > > > > > in
>> > > > > > > +performance can be measured between cross-device accesses
>> > > > > > > within
>> > > > > > > +a single location and those spanning multiple locations.
>> > > > > > > +The first cell always contains the broadest subdivision
>> > > > > > > within the
>> > > > > > > system,
>> > > > > > > +while the last cell enumerates the individual devices,
>> > > > > > > such as an SMT
>> > > > > > > thread
>> > > > > > > +of a CPU, or a bus bridge within an SoC".
>> > > > > >
>> > > > > > While this gives us some hierarchy, this doesn't seem to
>> > > > > > encode relative
>> > > > > > distances at all. That seems like an oversight.
>> > > > >
>> > > > >
>> > > > > distance is computed, will add the details to document.
>> > > > > local nodes will have distance as 10(LOCAL_DISTANCE) and every
>> > > > > level, the
>> > > > > distance multiplies by 2.
>> > > > > for example, for level 1 numa topology, distance from local
>> > > > > node to remote
>> > > > > node will be 20.
>> > >
>> > > This seems arbitrary.
>> > >
>> > > Why not always have this explicitly described?
>> > >
>> > > > > > Additionally, I'm somewhat unclear on how what you'd be
>> > > > > > expected to
>> > > > > > provide for this property in cases like ring or mesh
>> > > > > > interconnects,
>> > > > > > where there isn't a strict hierarchy (see systems with ARM's
>> > > > > > own CCN, or
>> > > > > > Tilera's TILE-Mx), but there is some measure of closeness.
>> > > > >
>> > > > >
>> > > > > IIUC, as per ARMs CCN architecture, all core/clusters are at
>> > > > > equal distance
>> > > > > of DDR, i dont see any NUMA topology.
>> > >
>> > > The CCN is a ring interconnect, so CPU clusters (henceforth CPUs)
>> > > can be
>> > > connected with differing distances to RAM instances (or devices).
>> > >
>> > > Consider the simplified network below:
>> > >
>> > >   +-------+      +--------+      +-------+
>> > >   | CPU 0 |------| DRAM A |------| CPU 1 |
>> > >   +-------+      +--------+      +-------+
>> > >       |                              |
>> > >       |                              |
>> > >   +--------+                     +--------+
>> > >   | DRAM B |                     | DRAM C |
>> > >   +--------+                     +--------+
>> > >       |                              |
>> > >       |                              |
>> > >   +-------+      +--------+      +-------+
>> > >   | CPU 2 |------| DRAM D |------| CPU 3 |
>> > >   +-------+      +--------+      +-------+
>> > >
>> > > In this case CPUs and DRAMs are spaced evenly on the ring, but the
>> > > distance between an arbitrary CPU and DRAM is not uniform.
>> > >
>> > > CPU 0 can access DRAM A or DRAM B with a single hop, but accesses
>> > > to
>> > > DRAM C or DRAM D take three hops.
>> > >
>> > > An access from CPU 0 to DRAM C could contend with accesses from CPU
>> > > 1 to
>> > > DRAM D, as they share hops on the ring.
>> > >
>> > > There is definitely a NUMA topology here, but there's not a strict
>> > > hierarchy. I don't see how you would represent this with the
>> > > proposed
>> > > binding.
>> > can you please explain, how associativity property will represent
>> > this
>> > numa topology?
>
> Hi Mark,
>
> i am thinking, if we could not address(or becomes complex)  these topologies
> using associativity,
> we should think of an alternate binding which suits existing and upcoming
> arm64 platforms.
> can we think of below numa binding which is inline with ACPI and will
> address all sort of topologies!
>
> i am proposing as below,
>
> 1. introduce "proximity" node property. this property will be
> present in dt nodes like memory, cpu, bus and devices(like associativity
> property) and
> will tell which numa node(proximity domain) this dt node belongs to.
>
> examples:
>                cpu@000 {
>                         device_type = "cpu";
>                         compatible = "cavium,thunder", "arm,armv8";
>                         reg = <0x0 0x000>;
>                         enable-method = "psci";
>                         proximity = <0>;
>                 };
>                cpu@001 {
>                         device_type = "cpu";
>                         compatible = "cavium,thunder", "arm,armv8";
>                         reg = <0x0 0x001>;
>                         enable-method = "psci";
>                         proximity = <1>;
>                 };
>
>        memory@00000000 {
>                 device_type = "memory";
>                 reg = <0x0 0x01400000 0x3 0xFEC00000>;
>                 proximity =<0>;
>
>         };
>
>         memory@10000000000 {
>                 device_type = "memory";
>                 reg = <0x100 0x00400000 0x3 0xFFC00000>;
>                 proximity =<1>;
>         };
>
> pcie0@0x8480,00000000 {
>                 compatible = "cavium,thunder-pcie";
>                 device_type = "pci";
>                 msi-parent = <&its>;
>                 bus-range = <0 255>;
>                 #size-cells = <2>;
>                 #address-cells = <3>;
>                 #stream-id-cells = <1>;
>                 reg = <0x8480 0x00000000 0 0x10000000>;  /*Configuration
> space */
>                 ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000
> 0x70 0x00000000>, /* mem ranges */
>                          <0x03000000 0x8300 0x00000000 0x8300 0x00000000
> 0x500 0x00000000>;
>                proximity =<0>;
>         };
>
>
> 2. Introduce new dt node "proximity-map" which will capture the NxN numa
> node distance matrix.
>
> for example,  4 nodes connected in mesh/ring structure as,
> A(0) <connected to> B(1) <connected to> C(2) <connected to> D(3) <connected
> to> A(1)
>
> relative distance would be,
>       A -> B = 20
>       B -> C  = 20
>       C -> D = 20
>       D -> A = 20
>       A -> C = 40
>       B -> D = 40
>
> and dt presentation for this distance matrix is :
>
>        proximity-map {
>              node-count = <4>;
>              distance-matrix = <0 0  10>,
>                                 <0 1  20>,
>                                 <0 2  40>,
>                                 <0 3  20>,
>                                 <1 0  20>,
>                                 <1 1  10>,
>                                 <1 2  20>,
>                                 <1 3  40>,
>                                 <2 0  40>,
>                                 <2 1  20>,
>                                 <2 2  10>,
>                                 <2 3  20>,
>                                 <3 0  20>,
>                                 <3 1  40>,
>                                 <3 2  20>,
>                                 <3 3  10>;
>           }
>
> the entries like < 0 0 > < 1 1>  < 2 2> < 3 3> can be optional and code can
> put default value(local distance).
> the entries like <1 0> can be optional if <0 1> and <1 0> are of same
> distance.
is this binding looks ok?
i can implement this and submit in next version of patchset.
>
>
>> > >
>> > > Likewise for the mesh networks (e.g. that of TILE-Mx)
>> > >
>> > > > > however, if there are 2 SoC connected thorough the CCN, then it
>> > > > > is very much
>> > > > > similar to cavium topology.
>> > > > >
>> > > > > > Must all of these have the same length? If so, why not have a
>> > > > > > #(whatever)-cells property in the root to describe the
>> > > > > > expected length?
>> > > > > > If not, how are they to be interpreted relative to each
>> > > > > > other?
>> > > > >
>> > > > >
>> > > > > yes, all are of default size.
>> > >
>> > > Where that size is...?
>> > >
>> > > > > IMHO, there is no need to add cells property.
>> > >
>> > > That might be the case, but it's unclear from the documentation. I
>> > > don't
>> > > see how one would parse / verify values currently.
>> > >
>> > > > > > > +the arm,associativity nodes. The first integer is the most
>> > > > > > > significant
>> > > > > > > +NUMA boundary and the following are progressively less
>> > > > > > > significant
>> > > > > > > boundaries.
>> > > > > > > +There can be more than one level of NUMA.
>> > > > > >
>> > > > > > I'm not clear on why this is necessary; the arm,associativity
>> > > > > > property
>> > > > > > is already ordered from most significant to least significant
>> > > > > > per its
>> > > > > > description.
>> > > > >
>> > > > >
>> > > > > first entry in arm,associativity-reference-points is used to
>> > > > > find which
>> > > > > entry in associativity defines node id.
>> > > > > also entries in arm,associativity-reference-points defines,
>> > > > > how many entries(depth) in associativity can be used to
>> > > > > calculate node
>> > > > > distance
>> > > > > in both level 1 and  multi level(hierarchical) numa topology.
>> > >
>> > > I think this needs a more thorough description; I don't follow the
>> > > current one.
>> > >
>> > > > > > Is this only expected at the root of the tree? Can it be re
>> > > > > > -defined in
>> > > > > > sub-nodes?
>> > > > >
>> > > > > yes it is defined only at the root.
>> > >
>> > > This needs to be stated explicitly.
>> > >
>> > > I see that this being the case, *,associativity-reference-points
>> > > would
>> > > be a more powerful property than the #(whatever)-cells property I
>> > > mentioned earlier, but a more thorough description is required.
>> > >
>> > > Thanks,
>> > > Mark.
>> > thanks
>> > Ganapat
>
>
> thanks
> Ganapat
thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Rutland Oct. 13, 2015, 4:47 p.m. UTC | #27
> > Hi Mark,
> >
> > i am thinking, if we could not address(or becomes complex)  these topologies
> > using associativity,
> > we should think of an alternate binding which suits existing and upcoming
> > arm64 platforms.
> > can we think of below numa binding which is inline with ACPI and will
> > address all sort of topologies!
> >
> > i am proposing as below,
> >
> > 1. introduce "proximity" node property. this property will be
> > present in dt nodes like memory, cpu, bus and devices(like associativity
> > property) and
> > will tell which numa node(proximity domain) this dt node belongs to.
> >
> > examples:
> >                cpu@000 {
> >                         device_type = "cpu";
> >                         compatible = "cavium,thunder", "arm,armv8";
> >                         reg = <0x0 0x000>;
> >                         enable-method = "psci";
> >                         proximity = <0>;
> >                 };
> >                cpu@001 {
> >                         device_type = "cpu";
> >                         compatible = "cavium,thunder", "arm,armv8";
> >                         reg = <0x0 0x001>;
> >                         enable-method = "psci";
> >                         proximity = <1>;
> >                 };
> >
> >        memory@00000000 {
> >                 device_type = "memory";
> >                 reg = <0x0 0x01400000 0x3 0xFEC00000>;
> >                 proximity =<0>;
> >
> >         };
> >
> >         memory@10000000000 {
> >                 device_type = "memory";
> >                 reg = <0x100 0x00400000 0x3 0xFFC00000>;
> >                 proximity =<1>;
> >         };
> >
> > pcie0@0x8480,00000000 {
> >                 compatible = "cavium,thunder-pcie";
> >                 device_type = "pci";
> >                 msi-parent = <&its>;
> >                 bus-range = <0 255>;
> >                 #size-cells = <2>;
> >                 #address-cells = <3>;
> >                 #stream-id-cells = <1>;
> >                 reg = <0x8480 0x00000000 0 0x10000000>;  /*Configuration
> > space */
> >                 ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000
> > 0x70 0x00000000>, /* mem ranges */
> >                          <0x03000000 0x8300 0x00000000 0x8300 0x00000000
> > 0x500 0x00000000>;
> >                proximity =<0>;
> >         };
> >
> >
> > 2. Introduce new dt node "proximity-map" which will capture the NxN numa
> > node distance matrix.
> >
> > for example,  4 nodes connected in mesh/ring structure as,
> > A(0) <connected to> B(1) <connected to> C(2) <connected to> D(3) <connected
> > to> A(1)
> >
> > relative distance would be,
> >       A -> B = 20
> >       B -> C  = 20
> >       C -> D = 20
> >       D -> A = 20
> >       A -> C = 40
> >       B -> D = 40
> >
> > and dt presentation for this distance matrix is :
> >
> >        proximity-map {
> >              node-count = <4>;
> >              distance-matrix = <0 0  10>,
> >                                 <0 1  20>,
> >                                 <0 2  40>,
> >                                 <0 3  20>,
> >                                 <1 0  20>,
> >                                 <1 1  10>,
> >                                 <1 2  20>,
> >                                 <1 3  40>,
> >                                 <2 0  40>,
> >                                 <2 1  20>,
> >                                 <2 2  10>,
> >                                 <2 3  20>,
> >                                 <3 0  20>,
> >                                 <3 1  40>,
> >                                 <3 2  20>,
> >                                 <3 3  10>;
> >           }
> >
> > the entries like < 0 0 > < 1 1>  < 2 2> < 3 3> can be optional and code can
> > put default value(local distance).
> > the entries like <1 0> can be optional if <0 1> and <1 0> are of same
> > distance.
> is this binding looks ok?

This looks roughly requivalent to the ACPI SLIT, which means it's as
powerful, which allays my previous concerns.

> i can implement this and submit in next version of patchset.

Please put together (plaintext) patches.

Then we have a sensible baseline that we can work from; it's somewhat
difficult for others to join the disacussion here as-is.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ganapatrao Kulkarni Oct. 13, 2015, 5:07 p.m. UTC | #28
On Tue, Oct 13, 2015 at 10:17 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > Hi Mark,
>> >
>> > i am thinking, if we could not address(or becomes complex)  these topologies
>> > using associativity,
>> > we should think of an alternate binding which suits existing and upcoming
>> > arm64 platforms.
>> > can we think of below numa binding which is inline with ACPI and will
>> > address all sort of topologies!
>> >
>> > i am proposing as below,
>> >
>> > 1. introduce "proximity" node property. this property will be
>> > present in dt nodes like memory, cpu, bus and devices(like associativity
>> > property) and
>> > will tell which numa node(proximity domain) this dt node belongs to.
>> >
>> > examples:
>> >                cpu@000 {
>> >                         device_type = "cpu";
>> >                         compatible = "cavium,thunder", "arm,armv8";
>> >                         reg = <0x0 0x000>;
>> >                         enable-method = "psci";
>> >                         proximity = <0>;
>> >                 };
>> >                cpu@001 {
>> >                         device_type = "cpu";
>> >                         compatible = "cavium,thunder", "arm,armv8";
>> >                         reg = <0x0 0x001>;
>> >                         enable-method = "psci";
>> >                         proximity = <1>;
>> >                 };
>> >
>> >        memory@00000000 {
>> >                 device_type = "memory";
>> >                 reg = <0x0 0x01400000 0x3 0xFEC00000>;
>> >                 proximity =<0>;
>> >
>> >         };
>> >
>> >         memory@10000000000 {
>> >                 device_type = "memory";
>> >                 reg = <0x100 0x00400000 0x3 0xFFC00000>;
>> >                 proximity =<1>;
>> >         };
>> >
>> > pcie0@0x8480,00000000 {
>> >                 compatible = "cavium,thunder-pcie";
>> >                 device_type = "pci";
>> >                 msi-parent = <&its>;
>> >                 bus-range = <0 255>;
>> >                 #size-cells = <2>;
>> >                 #address-cells = <3>;
>> >                 #stream-id-cells = <1>;
>> >                 reg = <0x8480 0x00000000 0 0x10000000>;  /*Configuration
>> > space */
>> >                 ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000
>> > 0x70 0x00000000>, /* mem ranges */
>> >                          <0x03000000 0x8300 0x00000000 0x8300 0x00000000
>> > 0x500 0x00000000>;
>> >                proximity =<0>;
>> >         };
>> >
>> >
>> > 2. Introduce new dt node "proximity-map" which will capture the NxN numa
>> > node distance matrix.
>> >
>> > for example,  4 nodes connected in mesh/ring structure as,
>> > A(0) <connected to> B(1) <connected to> C(2) <connected to> D(3) <connected
>> > to> A(1)
>> >
>> > relative distance would be,
>> >       A -> B = 20
>> >       B -> C  = 20
>> >       C -> D = 20
>> >       D -> A = 20
>> >       A -> C = 40
>> >       B -> D = 40
>> >
>> > and dt presentation for this distance matrix is :
>> >
>> >        proximity-map {
>> >              node-count = <4>;
>> >              distance-matrix = <0 0  10>,
>> >                                 <0 1  20>,
>> >                                 <0 2  40>,
>> >                                 <0 3  20>,
>> >                                 <1 0  20>,
>> >                                 <1 1  10>,
>> >                                 <1 2  20>,
>> >                                 <1 3  40>,
>> >                                 <2 0  40>,
>> >                                 <2 1  20>,
>> >                                 <2 2  10>,
>> >                                 <2 3  20>,
>> >                                 <3 0  20>,
>> >                                 <3 1  40>,
>> >                                 <3 2  20>,
>> >                                 <3 3  10>;
>> >           }
>> >
>> > the entries like < 0 0 > < 1 1>  < 2 2> < 3 3> can be optional and code can
>> > put default value(local distance).
>> > the entries like <1 0> can be optional if <0 1> and <1 0> are of same
>> > distance.
>> is this binding looks ok?
>
> This looks roughly requivalent to the ACPI SLIT, which means it's as
> powerful, which allays my previous concerns.
>
>> i can implement this and submit in next version of patchset.
>
> Please put together (plaintext) patches.
>
> Then we have a sensible baseline that we can work from; it's somewhat
> difficult for others to join the disacussion here as-is.
thanks, will post the v6 in couple of days with implementation based
on this binding proposal..
>
> Thanks,
> Mark.

thanks
Ganapat
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hanjun Guo Oct. 14, 2015, 1:21 p.m. UTC | #29
On 10/14/2015 12:47 AM, Mark Rutland wrote:
>>> Hi Mark,
>>>
>>> i am thinking, if we could not address(or becomes complex)  these topologies
>>> using associativity,
>>> we should think of an alternate binding which suits existing and upcoming
>>> arm64 platforms.
>>> can we think of below numa binding which is inline with ACPI and will
>>> address all sort of topologies!
>>>
>>> i am proposing as below,
>>>
>>> 1. introduce "proximity" node property. this property will be
>>> present in dt nodes like memory, cpu, bus and devices(like associativity
>>> property) and
>>> will tell which numa node(proximity domain) this dt node belongs to.
>>>
>>> examples:
>>>                 cpu@000 {
>>>                          device_type = "cpu";
>>>                          compatible = "cavium,thunder", "arm,armv8";
>>>                          reg = <0x0 0x000>;
>>>                          enable-method = "psci";
>>>                          proximity = <0>;
>>>                  };
>>>                 cpu@001 {
>>>                          device_type = "cpu";
>>>                          compatible = "cavium,thunder", "arm,armv8";
>>>                          reg = <0x0 0x001>;
>>>                          enable-method = "psci";
>>>                          proximity = <1>;
>>>                  };
>>>
>>>         memory@00000000 {
>>>                  device_type = "memory";
>>>                  reg = <0x0 0x01400000 0x3 0xFEC00000>;
>>>                  proximity =<0>;
>>>
>>>          };
>>>
>>>          memory@10000000000 {
>>>                  device_type = "memory";
>>>                  reg = <0x100 0x00400000 0x3 0xFFC00000>;
>>>                  proximity =<1>;
>>>          };
>>>
>>> pcie0@0x8480,00000000 {
>>>                  compatible = "cavium,thunder-pcie";
>>>                  device_type = "pci";
>>>                  msi-parent = <&its>;
>>>                  bus-range = <0 255>;
>>>                  #size-cells = <2>;
>>>                  #address-cells = <3>;
>>>                  #stream-id-cells = <1>;
>>>                  reg = <0x8480 0x00000000 0 0x10000000>;  /*Configuration
>>> space */
>>>                  ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000
>>> 0x70 0x00000000>, /* mem ranges */
>>>                           <0x03000000 0x8300 0x00000000 0x8300 0x00000000
>>> 0x500 0x00000000>;
>>>                 proximity =<0>;
>>>          };
>>>
>>>
>>> 2. Introduce new dt node "proximity-map" which will capture the NxN numa
>>> node distance matrix.
>>>
>>> for example,  4 nodes connected in mesh/ring structure as,
>>> A(0) <connected to> B(1) <connected to> C(2) <connected to> D(3) <connected
>>> to> A(1)
>>>
>>> relative distance would be,
>>>        A -> B = 20
>>>        B -> C  = 20
>>>        C -> D = 20
>>>        D -> A = 20
>>>        A -> C = 40
>>>        B -> D = 40
>>>
>>> and dt presentation for this distance matrix is :
>>>
>>>         proximity-map {
>>>               node-count = <4>;
>>>               distance-matrix = <0 0  10>,
>>>                                  <0 1  20>,
>>>                                  <0 2  40>,
>>>                                  <0 3  20>,
>>>                                  <1 0  20>,
>>>                                  <1 1  10>,
>>>                                  <1 2  20>,
>>>                                  <1 3  40>,
>>>                                  <2 0  40>,
>>>                                  <2 1  20>,
>>>                                  <2 2  10>,
>>>                                  <2 3  20>,
>>>                                  <3 0  20>,
>>>                                  <3 1  40>,
>>>                                  <3 2  20>,
>>>                                  <3 3  10>;
>>>            }
>>>
>>> the entries like < 0 0 > < 1 1>  < 2 2> < 3 3> can be optional and code can
>>> put default value(local distance).
>>> the entries like <1 0> can be optional if <0 1> and <1 0> are of same
>>> distance.
>> is this binding looks ok?
>
> This looks roughly requivalent to the ACPI SLIT, which means it's as
> powerful, which allays my previous concerns.

Cool, I think those bindings are quite extensible and easy understood.

Thanks
Hanjun
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/arm/numa.txt b/Documentation/devicetree/bindings/arm/numa.txt
new file mode 100644
index 0000000..dc3ef86
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/numa.txt
@@ -0,0 +1,212 @@ 
+==============================================================================
+NUMA binding description.
+==============================================================================
+
+==============================================================================
+1 - Introduction
+==============================================================================
+
+Systems employing a Non Uniform Memory Access (NUMA) architecture contain
+collections of hardware resources including processors, memory, and I/O buses,
+that comprise what is commonly known as a NUMA node.
+Processor accesses to memory within the local NUMA node is generally faster
+than processor accesses to memory outside of the local NUMA node.
+DT defines interfaces that allow the platform to convey NUMA node
+topology information to OS.
+
+==============================================================================
+2 - arm,associativity
+==============================================================================
+The mapping is done using arm,associativity device property.
+this property needs to be present in every device node which needs to to be
+mapped to numa nodes.
+
+arm,associativity property is set of 32-bit integers which defines level of
+topology and boundary in the system at which a significant difference in
+performance can be measured between cross-device accesses within
+a single location and those spanning multiple locations.
+The first cell always contains the broadest subdivision within the system,
+while the last cell enumerates the individual devices, such as an SMT thread
+of a CPU, or a bus bridge within an SoC".
+
+ex:
+	/* board 0, socket 0, cluster 0, core 0  thread 0 */
+	arm,associativity = <0 0 0 0 0>;
+
+==============================================================================
+3 - arm,associativity-reference-points
+==============================================================================
+This property is a set of 32-bit integers, each representing an index into
+the arm,associativity nodes. The first integer is the most significant
+NUMA boundary and the following are progressively less significant boundaries.
+There can be more than one level of NUMA.
+
+Ex:
+	arm,associativity-reference-points = <0 1>;
+	The board Id(index 0) used first to calculate the associativity (node
+	distance), then follows the  socket id(index 1).
+
+	arm,associativity-reference-points = <1 0>;
+	The socket Id(index 1) used first to calculate the associativity,
+	then follows the board id(index 0).
+
+	arm,associativity-reference-points = <0>;
+	Only the board Id(index 0) used to calculate the associativity.
+
+	arm,associativity-reference-points = <1>;
+	Only socket Id(index 1) used to calculate the associativity.
+
+==============================================================================
+4 - Example dts
+==============================================================================
+
+Example: 2 Node system consists of 2 boards and each board having one socket
+and 8 core in each socket.
+
+	arm,associativity-reference-points = <0>;
+
+	memory@00c00000 {
+		device_type = "memory";
+		reg = <0x0 0x00c00000 0x0 0x80000000>;
+		/* board 0, socket 0, no specific core */
+		arm,associativity = <0 0 0xffff>;
+	};
+
+	memory@10000000000 {
+		device_type = "memory";
+		reg = <0x100 0x00000000 0x0 0x80000000>;
+		/* board 1, socket 0, no specific core */
+		arm,associativity = <1 0 0xffff>;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@000 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x000>;
+			enable-method = "psci";
+			/* board 0, socket 0, core 0*/
+			arm,associativity = <0 0 0>;
+		};
+		cpu@001 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x001>;
+			enable-method = "psci";
+			arm,associativity = <0 0 1>;
+		};
+		cpu@002 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x002>;
+			enable-method = "psci";
+			arm,associativity = <0 0 2>;
+		};
+		cpu@003 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x003>;
+			enable-method = "psci";
+			arm,associativity = <0 0 3>;
+		};
+		cpu@004 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x004>;
+			enable-method = "psci";
+			arm,associativity = <0 0 4>;
+		};
+		cpu@005 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x005>;
+			enable-method = "psci";
+			arm,associativity = <0 0 5>;
+		};
+		cpu@006 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x006>;
+			enable-method = "psci";
+			arm,associativity = <0 0 6>;
+		};
+		cpu@007 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x007>;
+			enable-method = "psci";
+			arm,associativity = <0 0 7>;
+		};
+		cpu@008 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x008>;
+			enable-method = "psci";
+			/* board 1, socket 0, core 0*/
+			arm,associativity = <1 0 0>;
+		};
+		cpu@009 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x009>;
+			enable-method = "psci";
+			arm,associativity = <1 0 1>;
+		};
+		cpu@00a {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00a>;
+			enable-method = "psci";
+			arm,associativity = <0 0 2>;
+		};
+		cpu@00b {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00b>;
+			enable-method = "psci";
+			arm,associativity = <1 0 3>;
+		};
+		cpu@00c {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00c>;
+			enable-method = "psci";
+			arm,associativity = <1 0 4>;
+		};
+		cpu@00d {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00d>;
+			enable-method = "psci";
+			arm,associativity = <1 0 5>;
+		};
+		cpu@00e {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00e>;
+			enable-method = "psci";
+			arm,associativity = <1 0 6>;
+		};
+		cpu@00f {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x00f>;
+			enable-method = "psci";
+			arm,associativity = <1 0 7>;
+		};
+	};
+
+	pcie0: pcie0@0x8480,00000000 {
+		compatible = "arm,armv8";
+		device_type = "pci";
+		bus-range = <0 255>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0x8480 0x00000000 0 0x10000000>;  /* Configuration space */
+		ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>; /* mem ranges */
+		/* board 0, socket 0, pci bus 0*/
+		arm,associativity = <0 0 0>;
+        };