@@ -163,8 +163,101 @@ typedef unsigned int linenum_type;
0xffffffff | UINT_MAX |
-----------+-------------------------------+-------------------------------
- To see how this works in practice, see the worked example in
- libcpp/location-example.txt. */
+ Examples of location encoding.
+
+ Packed ranges
+ =============
+
+ Consider encoding the location of a token "foo", seen underlined here
+ on line 523, within an ordinary line_map that starts at line 500:
+
+ 11111111112
+ 12345678901234567890
+ 522
+ 523 return foo + bar;
+ ^~~
+ 524
+
+ The location's caret and start are both at line 523, column 11; the
+ location's finish is on the same line, at column 13 (an offset of 2
+ columns, for length 3).
+
+ Line 523 is offset 23 from the starting line of the ordinary line_map.
+
+ caret == start, and the offset of the finish fits within 5 bits, so
+ this can be stored as a packed range.
+
+ This is encoded as:
+ ordmap->start
+ + (line_offset << ordmap->m_column_and_range_bits)
+ + (column << ordmap->m_range_bits)
+ + (range_offset);
+ i.e. (for line offset 23, column 11, range offset 2):
+ ordmap->start
+ + (23 << 12)
+ + (11 << 5)
+ + 2;
+ i.e.:
+ ordmap->start + 0x17162
+ assuming that the line_map uses the default of 7 bits for columns and
+ 5 bits for packed range (giving 12 bits for m_column_and_range_bits).
+
+
+ "Pure" locations
+ ================
+
+ These are a special case of the above, where
+ caret == start == finish
+ They are stored as packed ranges with offset == 0.
+ For example, the location of the "f" of "foo" could be stored
+ as above, but with range offset 0, giving:
+ ordmap->start
+ + (23 << 12)
+ + (11 << 5)
+ + 0;
+ i.e.:
+ ordmap->start + 0x17160
+
+
+ Unoptimized ranges
+ ==================
+
+ Consider encoding the location of the binary expression
+ below:
+
+ 11111111112
+ 12345678901234567890
+ 521
+ 523 return foo + bar;
+ ~~~~^~~~~
+ 523
+
+ The location's caret is at the "+", line 523 column 15, but starts
+ earlier, at the "f" of "foo" at column 11. The finish is at the "r"
+ of "bar" at column 19.
+
+ This can't be stored as a packed range since start != caret.
+ Hence it is stored as an ad-hoc location e.g. 0x80000003.
+
+ Stripping off the top bit gives us an index into the ad-hoc
+ lookaside table:
+
+ line_table->location_adhoc_data_map.data[0x3]
+
+ from which the caret, start and finish can be looked up,
+ encoded as "pure" locations:
+
+ start == ordmap->start + (23 << 12) + (11 << 5)
+ == ordmap->start + 0x17160 (as above; the "f" of "foo")
+
+ caret == ordmap->start + (23 << 12) + (15 << 5)
+ == ordmap->start + 0x171e0
+
+ finish == ordmap->start + (23 << 12) + (19 << 5)
+ == ordmap->start + 0x17260
+
+ To further see how source_location works in practice, see the
+ worked example in libcpp/location-example.txt. */
typedef unsigned int source_location;
/* A range of source locations.