diff mbox

[06/10] Track expression ranges in C frontend

Message ID 1445632918-29617-7-git-send-email-dmalcolm@redhat.com
State New
Headers show

Commit Message

David Malcolm Oct. 23, 2015, 8:41 p.m. UTC
As in the previous version of this patch
 "Implement tree expression tracking in C FE (v2)"
the patch now captures ranges for all C expressions during parsing within
a new field of c_expr, and for all tree nodes with a location_t, it stores
them in ad-hoc locations for later use.

Hence compound expressions get ranges; see:
  https://dmalcolm.fedorapeople.org/gcc/2015-09-22/diagnostic-test-expressions-1.html

and for this example:

  int test (int foo)
  {
    return foo * 100;
           ^^^   ^^^
  }

we have access to the ranges of "foo" and "100" during C parsing via
the c_expr, but once we have GENERIC, all we have is a VAR_DECL and an
INTEGER_CST (the former's location is in at the top of the
function, and the latter has no location).

gcc/ChangeLog:
	* Makefile.in (OBJS): Add gcc-rich-location.o.
	* gcc-rich-location.c: New file.
	* gcc-rich-location.h: New file.
	* print-tree.c (print_node): Print any source range information.
	* tree.c (set_source_range): New functions.
	* tree.h (CAN_HAVE_RANGE_P): New.
	(EXPR_LOCATION_RANGE): New.
	(EXPR_HAS_RANGE): New.
	(get_expr_source_range): New inline function.
	(DECL_LOCATION_RANGE): New.
	(set_source_range): New decls.
	(get_decl_source_range): New inline function.

gcc/c-family/ChangeLog:
	* c-common.c (c_fully_fold_internal): Capture existing souce_range,
	and store it on the result.

gcc/c/ChangeLog:
	* c-parser.c (set_c_expr_source_range): New functions.
	(c_token::get_range): New method.
	(c_token::get_finish): New method.
	(c_parser_expr_no_commas): Call set_c_expr_source_range on the ret
	based on the range from the start of the LHS to the end of the
	RHS.
	(c_parser_conditional_expression): Likewise, based on the range
	from the start of the cond.value to the end of exp2.value.
	(c_parser_binary_expression): Call set_c_expr_source_range on
	the stack values for TRUTH_ANDIF_EXPR and TRUTH_ORIF_EXPR.
	(c_parser_cast_expression): Call set_c_expr_source_range on ret
	based on the cast_loc through to the end of the expr.
	(c_parser_unary_expression): Likewise, based on the
	op_loc through to the end of op.
	(c_parser_sizeof_expression) Likewise, based on the start of the
	sizeof token through to either the closing paren or the end of
	expr.
	(c_parser_postfix_expression): Likewise, using the token range,
	or from the open paren through to the close paren for
	parenthesized expressions.
	(c_parser_postfix_expression_after_primary): Likewise, for
	various kinds of expression.
	* c-tree.h (struct c_expr): Add field "src_range".
	(c_expr::get_start): New method.
	(c_expr::get_finish): New method.
	(set_c_expr_source_range): New decls.
	* c-typeck.c (parser_build_unary_op): Call set_c_expr_source_range
	on ret for prefix unary ops.
	(parser_build_binary_op): Likewise, running from the start of
	arg1.value through to the end of arg2.value.

gcc/testsuite/ChangeLog:
	* gcc.dg/plugin/diagnostic-test-expressions-1.c: New file.
	* gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c:
	New file.
	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
	diagnostic_plugin_test_tree_expression_range.c and
	diagnostic-test-expressions-1.c.
---
 gcc/Makefile.in                                    |   1 +
 gcc/c-family/c-common.c                            |  10 +-
 gcc/c/c-parser.c                                   |  98 ++++-
 gcc/c/c-tree.h                                     |  19 +
 gcc/c/c-typeck.c                                   |  10 +
 gcc/gcc-rich-location.c                            |  86 +++++
 gcc/gcc-rich-location.h                            |  47 +++
 gcc/print-tree.c                                   |  21 +
 .../gcc.dg/plugin/diagnostic-test-expressions-1.c  | 422 +++++++++++++++++++++
 .../diagnostic_plugin_test_tree_expression_range.c | 152 ++++++++
 gcc/testsuite/gcc.dg/plugin/plugin.exp             |   2 +
 gcc/tree.c                                         |  22 ++
 gcc/tree.h                                         |  31 ++
 13 files changed, 917 insertions(+), 4 deletions(-)
 create mode 100644 gcc/gcc-rich-location.c
 create mode 100644 gcc/gcc-rich-location.h
 create mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c
 create mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c

Comments

Jeff Law Oct. 30, 2015, 6:15 a.m. UTC | #1
On 10/23/2015 02:41 PM, David Malcolm wrote:
> As in the previous version of this patch
>   "Implement tree expression tracking in C FE (v2)"
> the patch now captures ranges for all C expressions during parsing within
> a new field of c_expr, and for all tree nodes with a location_t, it stores
> them in ad-hoc locations for later use.
>
> Hence compound expressions get ranges; see:
>    https://dmalcolm.fedorapeople.org/gcc/2015-09-22/diagnostic-test-expressions-1.html
>
> and for this example:
>
>    int test (int foo)
>    {
>      return foo * 100;
>             ^^^   ^^^
>    }
>
> we have access to the ranges of "foo" and "100" during C parsing via
> the c_expr, but once we have GENERIC, all we have is a VAR_DECL and an
> INTEGER_CST (the former's location is in at the top of the
> function, and the latter has no location).
>
> gcc/ChangeLog:
> 	* Makefile.in (OBJS): Add gcc-rich-location.o.
> 	* gcc-rich-location.c: New file.
> 	* gcc-rich-location.h: New file.
> 	* print-tree.c (print_node): Print any source range information.
> 	* tree.c (set_source_range): New functions.
> 	* tree.h (CAN_HAVE_RANGE_P): New.
> 	(EXPR_LOCATION_RANGE): New.
> 	(EXPR_HAS_RANGE): New.
> 	(get_expr_source_range): New inline function.
> 	(DECL_LOCATION_RANGE): New.
> 	(set_source_range): New decls.
> 	(get_decl_source_range): New inline function.
>
> gcc/c-family/ChangeLog:
> 	* c-common.c (c_fully_fold_internal): Capture existing souce_range,
> 	and store it on the result.
>
> gcc/c/ChangeLog:
> 	* c-parser.c (set_c_expr_source_range): New functions.
> 	(c_token::get_range): New method.
> 	(c_token::get_finish): New method.
> 	(c_parser_expr_no_commas): Call set_c_expr_source_range on the ret
> 	based on the range from the start of the LHS to the end of the
> 	RHS.
> 	(c_parser_conditional_expression): Likewise, based on the range
> 	from the start of the cond.value to the end of exp2.value.
> 	(c_parser_binary_expression): Call set_c_expr_source_range on
> 	the stack values for TRUTH_ANDIF_EXPR and TRUTH_ORIF_EXPR.
> 	(c_parser_cast_expression): Call set_c_expr_source_range on ret
> 	based on the cast_loc through to the end of the expr.
> 	(c_parser_unary_expression): Likewise, based on the
> 	op_loc through to the end of op.
> 	(c_parser_sizeof_expression) Likewise, based on the start of the
> 	sizeof token through to either the closing paren or the end of
> 	expr.
> 	(c_parser_postfix_expression): Likewise, using the token range,
> 	or from the open paren through to the close paren for
> 	parenthesized expressions.
> 	(c_parser_postfix_expression_after_primary): Likewise, for
> 	various kinds of expression.
> 	* c-tree.h (struct c_expr): Add field "src_range".
> 	(c_expr::get_start): New method.
> 	(c_expr::get_finish): New method.
> 	(set_c_expr_source_range): New decls.
> 	* c-typeck.c (parser_build_unary_op): Call set_c_expr_source_range
> 	on ret for prefix unary ops.
> 	(parser_build_binary_op): Likewise, running from the start of
> 	arg1.value through to the end of arg2.value.
>
> gcc/testsuite/ChangeLog:
> 	* gcc.dg/plugin/diagnostic-test-expressions-1.c: New file.
> 	* gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c:
> 	New file.
> 	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
> 	diagnostic_plugin_test_tree_expression_range.c and
> 	diagnostic-test-expressions-1.c.

>   /* Initialization routine for this file.  */
>
> @@ -6085,6 +6112,9 @@ c_parser_expr_no_commas (c_parser *parser, struct c_expr *after,
>     ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type,
>   				 code, exp_location, rhs.value,
>   				 rhs.original_type);
> +  set_c_expr_source_range (&ret,
> +			   lhs.get_start (),
> +			   rhs.get_finish ());
One line if it fits.


> @@ -6198,6 +6232,9 @@ c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
>   			   ? t1
>   			   : NULL);
>       }
> +  set_c_expr_source_range (&ret,
> +			   start,
> +			   exp2.get_finish ());
Here too.

> @@ -6522,6 +6564,10 @@ c_parser_cast_expression (c_parser *parser, struct c_expr *after)
>   	expr = convert_lvalue_to_rvalue (expr_loc, expr, true, true);
>         }
>         ret.value = c_cast_expr (cast_loc, type_name, expr.value);
> +      if (ret.value && expr.value)
> +	set_c_expr_source_range (&ret,
> +				 cast_loc,
> +				 expr.get_finish ());
And here?

With the nits fixed, this is OK.

I think that covers this iteration of the rich location work and that 
you'll continue working with Jason on extending this into the C++ front-end.

jeff
David Malcolm Nov. 2, 2015, 7:14 p.m. UTC | #2
On Fri, 2015-10-30 at 00:15 -0600, Jeff Law wrote:
> On 10/23/2015 02:41 PM, David Malcolm wrote:
> > As in the previous version of this patch
> >   "Implement tree expression tracking in C FE (v2)"
> > the patch now captures ranges for all C expressions during parsing within
> > a new field of c_expr, and for all tree nodes with a location_t, it stores
> > them in ad-hoc locations for later use.
> >
> > Hence compound expressions get ranges; see:
> >    https://dmalcolm.fedorapeople.org/gcc/2015-09-22/diagnostic-test-expressions-1.html
> >
> > and for this example:
> >
> >    int test (int foo)
> >    {
> >      return foo * 100;
> >             ^^^   ^^^
> >    }
> >
> > we have access to the ranges of "foo" and "100" during C parsing via
> > the c_expr, but once we have GENERIC, all we have is a VAR_DECL and an
> > INTEGER_CST (the former's location is in at the top of the
> > function, and the latter has no location).
> >
> > gcc/ChangeLog:
> > 	* Makefile.in (OBJS): Add gcc-rich-location.o.
> > 	* gcc-rich-location.c: New file.
> > 	* gcc-rich-location.h: New file.
> > 	* print-tree.c (print_node): Print any source range information.
> > 	* tree.c (set_source_range): New functions.
> > 	* tree.h (CAN_HAVE_RANGE_P): New.
> > 	(EXPR_LOCATION_RANGE): New.
> > 	(EXPR_HAS_RANGE): New.
> > 	(get_expr_source_range): New inline function.
> > 	(DECL_LOCATION_RANGE): New.
> > 	(set_source_range): New decls.
> > 	(get_decl_source_range): New inline function.
> >
> > gcc/c-family/ChangeLog:
> > 	* c-common.c (c_fully_fold_internal): Capture existing souce_range,
> > 	and store it on the result.
> >
> > gcc/c/ChangeLog:
> > 	* c-parser.c (set_c_expr_source_range): New functions.
> > 	(c_token::get_range): New method.
> > 	(c_token::get_finish): New method.
> > 	(c_parser_expr_no_commas): Call set_c_expr_source_range on the ret
> > 	based on the range from the start of the LHS to the end of the
> > 	RHS.
> > 	(c_parser_conditional_expression): Likewise, based on the range
> > 	from the start of the cond.value to the end of exp2.value.
> > 	(c_parser_binary_expression): Call set_c_expr_source_range on
> > 	the stack values for TRUTH_ANDIF_EXPR and TRUTH_ORIF_EXPR.
> > 	(c_parser_cast_expression): Call set_c_expr_source_range on ret
> > 	based on the cast_loc through to the end of the expr.
> > 	(c_parser_unary_expression): Likewise, based on the
> > 	op_loc through to the end of op.
> > 	(c_parser_sizeof_expression) Likewise, based on the start of the
> > 	sizeof token through to either the closing paren or the end of
> > 	expr.
> > 	(c_parser_postfix_expression): Likewise, using the token range,
> > 	or from the open paren through to the close paren for
> > 	parenthesized expressions.
> > 	(c_parser_postfix_expression_after_primary): Likewise, for
> > 	various kinds of expression.
> > 	* c-tree.h (struct c_expr): Add field "src_range".
> > 	(c_expr::get_start): New method.
> > 	(c_expr::get_finish): New method.
> > 	(set_c_expr_source_range): New decls.
> > 	* c-typeck.c (parser_build_unary_op): Call set_c_expr_source_range
> > 	on ret for prefix unary ops.
> > 	(parser_build_binary_op): Likewise, running from the start of
> > 	arg1.value through to the end of arg2.value.
> >
> > gcc/testsuite/ChangeLog:
> > 	* gcc.dg/plugin/diagnostic-test-expressions-1.c: New file.
> > 	* gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c:
> > 	New file.
> > 	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
> > 	diagnostic_plugin_test_tree_expression_range.c and
> > 	diagnostic-test-expressions-1.c.
> 
> >   /* Initialization routine for this file.  */
> >
> > @@ -6085,6 +6112,9 @@ c_parser_expr_no_commas (c_parser *parser, struct c_expr *after,
> >     ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type,
> >   				 code, exp_location, rhs.value,
> >   				 rhs.original_type);
> > +  set_c_expr_source_range (&ret,
> > +			   lhs.get_start (),
> > +			   rhs.get_finish ());
> One line if it fits.
> 
> 
> > @@ -6198,6 +6232,9 @@ c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
> >   			   ? t1
> >   			   : NULL);
> >       }
> > +  set_c_expr_source_range (&ret,
> > +			   start,
> > +			   exp2.get_finish ());
> Here too.
> 
> > @@ -6522,6 +6564,10 @@ c_parser_cast_expression (c_parser *parser, struct c_expr *after)
> >   	expr = convert_lvalue_to_rvalue (expr_loc, expr, true, true);
> >         }
> >         ret.value = c_cast_expr (cast_loc, type_name, expr.value);
> > +      if (ret.value && expr.value)
> > +	set_c_expr_source_range (&ret,
> > +				 cast_loc,
> > +				 expr.get_finish ());
> And here?
> 
> With the nits fixed, this is OK.
> 
> I think that covers this iteration of the rich location work and that 
> you'll continue working with Jason on extending this into the C++ front-end.

Here's a summary of the current status of this work [1]:

Patches 1-4 of the kit: these Jeff has approved, with some pre-approved
nit fixes in 4.  I see these as relatively low risk, and plan to commit
these today/tomorrow.

Patches 5-10: Jeff approved these also (again with some nits). These
feel higher-risk to me, owing to the potential for performance
regressions; I haven't yet answered at least one of Richi's performance
questions (impact on time taken to generate the C++ PCH file); the last
performance testing I did can be seen here:
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02283.html
where the right-most column is this kit.

CCing Richi to keep him in the loop for the above.  Richi, is there any
other specific testing you'd want me to do for this?
Or is it OK to commit, and to see what impact it has on your daily
performance testing?  (and to revert if the impact is unacceptable).

Talking about risks: the reduction of the space for ordinary maps by a
factor of 32, by taking up 5 bits for the packed range information
optimization (patch 10):
 https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02539.html
CCing Dodji: Dodji; is this reasonable?

I did some experiments back in July on this; instrumented builds of
openldap and openssl to see how much space we have in location_t:
https://dmalcolm.fedorapeople.org/gcc/2015-07-23/openldap.csv
https://dmalcolm.fedorapeople.org/gcc/2015-07-24/openldap.csv

(these are space-separated:
           SRPM name
           sourcefile
           maximal ordinary location_t
           minimal macro location_t)

openldap build #files: 906
maximal ordinary location_t was:
sourcefile='/builddir/build/BUILD/openldap-2.4.40/openldap-2.4.40/servers/slapd/bconfig.c'
          max_ordinary_location=0x0081bd1b
          (and min_macro_location=0x7ffe5903
minimal macro location_t was:
sourcefile='/builddir/build/BUILD/openldap-2.4.40/openldap-2.4.40/servers/slapd/aclparse.c'
          min_macro_location=0x7ffe57e2
          (with max_ordinary_location=0x00719775)

openssl-1.0.1k-8.fc22.src.rpm.x86_64:
      #files: 1495
max_ordinary_location=0x00be3726
 (openssl-1.0.1k/apps/s_client.c)
 with min_macro_location=0x7ffe7b6b

min_macro_location=0x7ffdf069 
 (openssl-1.0.1k/apps/speed.c)
 with max_ordinary_location=0x00a1abdf

In all of the above cases, we had enough room to do the bit-packing
optimization, but this is just two projects (albeit real-world C code).

Comparing the gap between maximal ordinary map location and minimal
macro map location, and seeing how much we can expand the ordinary map
locations, the openldap build had:
  (0x7ffe57e2 - 0x0081bd1b) / 0x0081bd1b  == factor of 251 i.e.
7 bits of space available

openssl build had:
  (0x7ffdf069 - 0x00be3726) / 0x00be3726  == factor of 171 i.e. 7 bits
of space available

hence allocating 5 bits to packing ranges is (I hope) reasonable.


Jeff: I'm working on expression ranges in the C++ FE; is that a
prerequisite for patches 5-10, or can 5-10 go ahead without the C++
work?  (assuming the other issues above are acceptable).

Hope this all makes sense and sounds sane
Dave

[1] Together the kit gives us:
* patch 4: infrastructure for printing underlines in
diagnostic_show_locus and for multiple ranges
* patches 5-10: the "source_location" (aka location_t) type becomes a
caret plus a range; the tokens coming from libcpp gain ranges, so
everything using libcpp gains at least underlines of tokens; the C
frontend generates sane ranges for expressions as it parses them, better
showing the user how the parser "sees" their code.

Hence we ought to get underlined ranges for many diagnostics in C and C
++ with this (e.g. input_location gives an underline covering the range
of the token starting at the caret).  The "caret" should remain
unchanged from the status quo, so e.g. debugging locations shouldn't be
affected by the addition of ranges.

I'm anticipating that we'd need some followup patches to pick better
ranges for some diagnostics, analogous to the way we convert "warning"
to "warning_at" for where input_location isn't the best location; I'd
expect these followup patches to be relative simple and low-risk.
David Malcolm Nov. 2, 2015, 7:53 p.m. UTC | #3
On Mon, 2015-11-02 at 14:14 -0500, David Malcolm wrote:
> On Fri, 2015-10-30 at 00:15 -0600, Jeff Law wrote:
> > On 10/23/2015 02:41 PM, David Malcolm wrote:
> > > As in the previous version of this patch
> > >   "Implement tree expression tracking in C FE (v2)"
> > > the patch now captures ranges for all C expressions during parsing within
> > > a new field of c_expr, and for all tree nodes with a location_t, it stores
> > > them in ad-hoc locations for later use.
> > >
> > > Hence compound expressions get ranges; see:
> > >    https://dmalcolm.fedorapeople.org/gcc/2015-09-22/diagnostic-test-expressions-1.html
> > >
> > > and for this example:
> > >
> > >    int test (int foo)
> > >    {
> > >      return foo * 100;
> > >             ^^^   ^^^
> > >    }
> > >
> > > we have access to the ranges of "foo" and "100" during C parsing via
> > > the c_expr, but once we have GENERIC, all we have is a VAR_DECL and an
> > > INTEGER_CST (the former's location is in at the top of the
> > > function, and the latter has no location).
> > >
> > > gcc/ChangeLog:
> > > 	* Makefile.in (OBJS): Add gcc-rich-location.o.
> > > 	* gcc-rich-location.c: New file.
> > > 	* gcc-rich-location.h: New file.
> > > 	* print-tree.c (print_node): Print any source range information.
> > > 	* tree.c (set_source_range): New functions.
> > > 	* tree.h (CAN_HAVE_RANGE_P): New.
> > > 	(EXPR_LOCATION_RANGE): New.
> > > 	(EXPR_HAS_RANGE): New.
> > > 	(get_expr_source_range): New inline function.
> > > 	(DECL_LOCATION_RANGE): New.
> > > 	(set_source_range): New decls.
> > > 	(get_decl_source_range): New inline function.
> > >
> > > gcc/c-family/ChangeLog:
> > > 	* c-common.c (c_fully_fold_internal): Capture existing souce_range,
> > > 	and store it on the result.
> > >
> > > gcc/c/ChangeLog:
> > > 	* c-parser.c (set_c_expr_source_range): New functions.
> > > 	(c_token::get_range): New method.
> > > 	(c_token::get_finish): New method.
> > > 	(c_parser_expr_no_commas): Call set_c_expr_source_range on the ret
> > > 	based on the range from the start of the LHS to the end of the
> > > 	RHS.
> > > 	(c_parser_conditional_expression): Likewise, based on the range
> > > 	from the start of the cond.value to the end of exp2.value.
> > > 	(c_parser_binary_expression): Call set_c_expr_source_range on
> > > 	the stack values for TRUTH_ANDIF_EXPR and TRUTH_ORIF_EXPR.
> > > 	(c_parser_cast_expression): Call set_c_expr_source_range on ret
> > > 	based on the cast_loc through to the end of the expr.
> > > 	(c_parser_unary_expression): Likewise, based on the
> > > 	op_loc through to the end of op.
> > > 	(c_parser_sizeof_expression) Likewise, based on the start of the
> > > 	sizeof token through to either the closing paren or the end of
> > > 	expr.
> > > 	(c_parser_postfix_expression): Likewise, using the token range,
> > > 	or from the open paren through to the close paren for
> > > 	parenthesized expressions.
> > > 	(c_parser_postfix_expression_after_primary): Likewise, for
> > > 	various kinds of expression.
> > > 	* c-tree.h (struct c_expr): Add field "src_range".
> > > 	(c_expr::get_start): New method.
> > > 	(c_expr::get_finish): New method.
> > > 	(set_c_expr_source_range): New decls.
> > > 	* c-typeck.c (parser_build_unary_op): Call set_c_expr_source_range
> > > 	on ret for prefix unary ops.
> > > 	(parser_build_binary_op): Likewise, running from the start of
> > > 	arg1.value through to the end of arg2.value.
> > >
> > > gcc/testsuite/ChangeLog:
> > > 	* gcc.dg/plugin/diagnostic-test-expressions-1.c: New file.
> > > 	* gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c:
> > > 	New file.
> > > 	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add
> > > 	diagnostic_plugin_test_tree_expression_range.c and
> > > 	diagnostic-test-expressions-1.c.
> > 
> > >   /* Initialization routine for this file.  */
> > >
> > > @@ -6085,6 +6112,9 @@ c_parser_expr_no_commas (c_parser *parser, struct c_expr *after,
> > >     ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type,
> > >   				 code, exp_location, rhs.value,
> > >   				 rhs.original_type);
> > > +  set_c_expr_source_range (&ret,
> > > +			   lhs.get_start (),
> > > +			   rhs.get_finish ());
> > One line if it fits.
> > 
> > 
> > > @@ -6198,6 +6232,9 @@ c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
> > >   			   ? t1
> > >   			   : NULL);
> > >       }
> > > +  set_c_expr_source_range (&ret,
> > > +			   start,
> > > +			   exp2.get_finish ());
> > Here too.
> > 
> > > @@ -6522,6 +6564,10 @@ c_parser_cast_expression (c_parser *parser, struct c_expr *after)
> > >   	expr = convert_lvalue_to_rvalue (expr_loc, expr, true, true);
> > >         }
> > >         ret.value = c_cast_expr (cast_loc, type_name, expr.value);
> > > +      if (ret.value && expr.value)
> > > +	set_c_expr_source_range (&ret,
> > > +				 cast_loc,
> > > +				 expr.get_finish ());
> > And here?
> > 
> > With the nits fixed, this is OK.
> > 
> > I think that covers this iteration of the rich location work and that 
> > you'll continue working with Jason on extending this into the C++ front-end.
> 
> Here's a summary of the current status of this work [1]:
> 
> Patches 1-4 of the kit: these Jeff has approved, with some pre-approved
> nit fixes in 4.  I see these as relatively low risk, and plan to commit
> these today/tomorrow.
> 
> Patches 5-10: Jeff approved these also (again with some nits). These
> feel higher-risk to me, owing to the potential for performance
> regressions; I haven't yet answered at least one of Richi's performance
> questions (impact on time taken to generate the C++ PCH file); the last
> performance testing I did can be seen here:
> https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02283.html
> where the right-most column is this kit.
> 
> CCing Richi to keep him in the loop for the above.  Richi, is there any
> other specific testing you'd want me to do for this?
> Or is it OK to commit, and to see what impact it has on your daily
> performance testing?  (and to revert if the impact is unacceptable).
> 
> Talking about risks: the reduction of the space for ordinary maps by a
> factor of 32, by taking up 5 bits for the packed range information
> optimization (patch 10):
>  https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02539.html
> CCing Dodji: Dodji; is this reasonable?
> 
> I did some experiments back in July on this; instrumented builds of
> openldap and openssl to see how much space we have in location_t:
> https://dmalcolm.fedorapeople.org/gcc/2015-07-23/openldap.csv
> https://dmalcolm.fedorapeople.org/gcc/2015-07-24/openldap.csv
> 
> (these are space-separated:
>            SRPM name
>            sourcefile
>            maximal ordinary location_t
>            minimal macro location_t)
> 
> openldap build #files: 906
> maximal ordinary location_t was:
> sourcefile='/builddir/build/BUILD/openldap-2.4.40/openldap-2.4.40/servers/slapd/bconfig.c'
>           max_ordinary_location=0x0081bd1b
>           (and min_macro_location=0x7ffe5903
> minimal macro location_t was:
> sourcefile='/builddir/build/BUILD/openldap-2.4.40/openldap-2.4.40/servers/slapd/aclparse.c'
>           min_macro_location=0x7ffe57e2
>           (with max_ordinary_location=0x00719775)
> 
> openssl-1.0.1k-8.fc22.src.rpm.x86_64:
>       #files: 1495
> max_ordinary_location=0x00be3726
>  (openssl-1.0.1k/apps/s_client.c)
>  with min_macro_location=0x7ffe7b6b
> 
> min_macro_location=0x7ffdf069 
>  (openssl-1.0.1k/apps/speed.c)
>  with max_ordinary_location=0x00a1abdf
> 
> In all of the above cases, we had enough room to do the bit-packing
> optimization, but this is just two projects (albeit real-world C code).
> 
> Comparing the gap between maximal ordinary map location and minimal
> macro map location, and seeing how much we can expand the ordinary map
> locations, the openldap build had:
>   (0x7ffe57e2 - 0x0081bd1b) / 0x0081bd1b  == factor of 251 i.e.
> 7 bits of space available
> 
> openssl build had:
>   (0x7ffdf069 - 0x00be3726) / 0x00be3726  == factor of 171 i.e. 7 bits
> of space available
> 
> hence allocating 5 bits to packing ranges is (I hope) reasonable.

Actually, I realize now that a better upper limit to be concerned with
for ordinary line maps is LINE_MAP_MAX_LOCATION_WITH_COLS i.e.
0x60000000, since there'd be a user-visible impact if we hit that limit.

For the openldap build, we'd have:
  (0x60000000 - 0x0081bd1b) / 0x0081bd1b == factor of 188.
and for openssl:
  (0x60000000 - 0x00be3726) / 0x00be3726 == factor of 128.

So paying 5 bits (for a factor of 32) still seems a reasonable cost, for
these two builds.


> Jeff: I'm working on expression ranges in the C++ FE; is that a
> prerequisite for patches 5-10, or can 5-10 go ahead without the C++
> work?  (assuming the other issues above are acceptable).
> 
> Hope this all makes sense and sounds sane
> Dave
> 
> [1] Together the kit gives us:
> * patch 4: infrastructure for printing underlines in
> diagnostic_show_locus and for multiple ranges
> * patches 5-10: the "source_location" (aka location_t) type becomes a
> caret plus a range; the tokens coming from libcpp gain ranges, so
> everything using libcpp gains at least underlines of tokens; the C
> frontend generates sane ranges for expressions as it parses them, better
> showing the user how the parser "sees" their code.
> 
> Hence we ought to get underlined ranges for many diagnostics in C and C
> ++ with this (e.g. input_location gives an underline covering the range
> of the token starting at the caret).  The "caret" should remain
> unchanged from the status quo, so e.g. debugging locations shouldn't be
> affected by the addition of ranges.
> 
> I'm anticipating that we'd need some followup patches to pick better
> ranges for some diagnostics, analogous to the way we convert "warning"
> to "warning_at" for where input_location isn't the best location; I'd
> expect these followup patches to be relative simple and low-risk.
> 
>
Jeff Law Nov. 2, 2015, 10:26 p.m. UTC | #4
On 11/02/2015 12:14 PM, David Malcolm wrote:

>
>
> Jeff: I'm working on expression ranges in the C++ FE; is that a
> prerequisite for patches 5-10, or can 5-10 go ahead without the C++
> work?  (assuming the other issues above are acceptable).
>
> Hope this all makes sense and sounds sane
I think 5-10 can go in now given you're already in discussions with 
Jason on how to wire this into the C++ front-end.

jeff
Dodji Seketeli Nov. 6, 2015, 7:12 a.m. UTC | #5
> Talking about risks: the reduction of the space for ordinary maps by a
> factor of 32, by taking up 5 bits for the packed range information
> optimization (patch 10):
>  https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02539.html
> CCing Dodji: Dodji; is this reasonable?

FWIW, I am definitely to get this (patch 10/10 of the series) if other
agrees.  I just have some minor questions to ask about that patch and
I replied to the patch to ask.

As for the "reduction of the space for ordinary maps by a factor of 32,
by taking up 5 bits for the packed range information" that you mention,
I think it's a trade off I'd live with.

Ultimately, if it shows that we really move out of space with this, we
should probably explore the impact of just moving to a 64 bits size for
source_location.

Until then, a possible mitigation strategy could be to add an option to
disable the range tracking altogether (even at the preprocessor's lexer
level), to provide an escape path to users running low on resources.  A
bit what we do with -ftrack-macro-expansion=0.

Cheers,
diff mbox

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 009c745..8cd446d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1255,6 +1255,7 @@  OBJS = \
 	fold-const.o \
 	function.o \
 	fwprop.o \
+	gcc-rich-location.o \
 	gcse.o \
 	gcse-common.o \
 	ggc-common.o \
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 4a5ccb7..c102bbd 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -1188,6 +1188,7 @@  c_fully_fold_internal (tree expr, bool in_init, bool *maybe_const_operands,
   bool op0_const_self = true, op1_const_self = true, op2_const_self = true;
   bool nowarning = TREE_NO_WARNING (expr);
   bool unused_p;
+  source_range old_range;
 
   /* This function is not relevant to C++ because C++ folds while
      parsing, and may need changes to be correct for C++ when C++
@@ -1203,6 +1204,9 @@  c_fully_fold_internal (tree expr, bool in_init, bool *maybe_const_operands,
       || code == SAVE_EXPR)
     return expr;
 
+  if (IS_EXPR_CODE_CLASS (kind))
+    old_range = EXPR_LOCATION_RANGE (expr);
+
   /* Operands of variable-length expressions (function calls) have
      already been folded, as have __builtin_* function calls, and such
      expressions cannot occur in constant expressions.  */
@@ -1627,7 +1631,11 @@  c_fully_fold_internal (tree expr, bool in_init, bool *maybe_const_operands,
       TREE_NO_WARNING (ret) = 1;
     }
   if (ret != expr)
-    protected_set_expr_location (ret, loc);
+    {
+      protected_set_expr_location (ret, loc);
+      if (IS_EXPR_CODE_CLASS (kind))
+	set_source_range (ret, old_range.m_start, old_range.m_finish);
+    }
   return ret;
 }
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 2d24c21..00a8698 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -67,6 +67,23 @@  along with GCC; see the file COPYING3.  If not see
 #include "gomp-constants.h"
 #include "c-family/c-indentation.h"
 
+void
+set_c_expr_source_range (c_expr *expr,
+			 location_t start, location_t finish)
+{
+  expr->src_range.m_start = start;
+  expr->src_range.m_finish = finish;
+  set_source_range (expr->value, start, finish);
+}
+
+void
+set_c_expr_source_range (c_expr *expr,
+			 source_range src_range)
+{
+  expr->src_range = src_range;
+  set_source_range (expr->value, src_range);
+}
+
 
 /* Initialization routine for this file.  */
 
@@ -172,6 +189,16 @@  struct GTY (()) c_token {
   location_t location;
   /* The value associated with this token, if any.  */
   tree value;
+
+  source_range get_range () const
+  {
+    return get_range_from_loc (line_table, location);
+  }
+
+  location_t get_finish () const
+  {
+    return get_range ().m_finish;
+  }
 };
 
 /* A parser structure recording information about the state and
@@ -6085,6 +6112,9 @@  c_parser_expr_no_commas (c_parser *parser, struct c_expr *after,
   ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type,
 				 code, exp_location, rhs.value,
 				 rhs.original_type);
+  set_c_expr_source_range (&ret,
+			   lhs.get_start (),
+			   rhs.get_finish ());
   if (code == NOP_EXPR)
     ret.original_code = MODIFY_EXPR;
   else
@@ -6115,7 +6145,7 @@  c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
 				 tree omp_atomic_lhs)
 {
   struct c_expr cond, exp1, exp2, ret;
-  location_t cond_loc, colon_loc, middle_loc;
+  location_t start, cond_loc, colon_loc, middle_loc;
 
   gcc_assert (!after || c_dialect_objc ());
 
@@ -6123,6 +6153,10 @@  c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
 
   if (c_parser_next_token_is_not (parser, CPP_QUERY))
     return cond;
+  if (cond.value != error_mark_node)
+    start = cond.get_start ();
+  else
+    start = UNKNOWN_LOCATION;
   cond_loc = c_parser_peek_token (parser)->location;
   cond = convert_lvalue_to_rvalue (cond_loc, cond, true, true);
   c_parser_consume_token (parser);
@@ -6198,6 +6232,9 @@  c_parser_conditional_expression (c_parser *parser, struct c_expr *after,
 			   ? t1
 			   : NULL);
     }
+  set_c_expr_source_range (&ret,
+			   start,
+			   exp2.get_finish ());
   return ret;
 }
 
@@ -6350,6 +6387,7 @@  c_parser_binary_expression (c_parser *parser, struct c_expr *after,
     {
       enum c_parser_prec oprec;
       enum tree_code ocode;
+      source_range src_range;
       if (parser->error)
 	goto out;
       switch (c_parser_peek_token (parser)->type)
@@ -6438,6 +6476,7 @@  c_parser_binary_expression (c_parser *parser, struct c_expr *after,
       switch (ocode)
 	{
 	case TRUTH_ANDIF_EXPR:
+	  src_range = stack[sp].expr.src_range;
 	  stack[sp].expr
 	    = convert_lvalue_to_rvalue (stack[sp].loc,
 					stack[sp].expr, true, true);
@@ -6445,8 +6484,10 @@  c_parser_binary_expression (c_parser *parser, struct c_expr *after,
 	    (stack[sp].loc, default_conversion (stack[sp].expr.value));
 	  c_inhibit_evaluation_warnings += (stack[sp].expr.value
 					    == truthvalue_false_node);
+	  set_c_expr_source_range (&stack[sp].expr, src_range);
 	  break;
 	case TRUTH_ORIF_EXPR:
+	  src_range = stack[sp].expr.src_range;
 	  stack[sp].expr
 	    = convert_lvalue_to_rvalue (stack[sp].loc,
 					stack[sp].expr, true, true);
@@ -6454,6 +6495,7 @@  c_parser_binary_expression (c_parser *parser, struct c_expr *after,
 	    (stack[sp].loc, default_conversion (stack[sp].expr.value));
 	  c_inhibit_evaluation_warnings += (stack[sp].expr.value
 					    == truthvalue_true_node);
+	  set_c_expr_source_range (&stack[sp].expr, src_range);
 	  break;
 	default:
 	  break;
@@ -6522,6 +6564,10 @@  c_parser_cast_expression (c_parser *parser, struct c_expr *after)
 	expr = convert_lvalue_to_rvalue (expr_loc, expr, true, true);
       }
       ret.value = c_cast_expr (cast_loc, type_name, expr.value);
+      if (ret.value && expr.value)
+	set_c_expr_source_range (&ret,
+				 cast_loc,
+				 expr.get_finish ());
       ret.original_code = ERROR_MARK;
       ret.original_type = NULL;
       return ret;
@@ -6571,6 +6617,7 @@  c_parser_unary_expression (c_parser *parser)
   struct c_expr ret, op;
   location_t op_loc = c_parser_peek_token (parser)->location;
   location_t exp_loc;
+  location_t finish;
   ret.original_code = ERROR_MARK;
   ret.original_type = NULL;
   switch (c_parser_peek_token (parser)->type)
@@ -6610,8 +6657,10 @@  c_parser_unary_expression (c_parser *parser)
       c_parser_consume_token (parser);
       exp_loc = c_parser_peek_token (parser)->location;
       op = c_parser_cast_expression (parser, NULL);
+      finish = op.get_finish ();
       op = convert_lvalue_to_rvalue (exp_loc, op, true, true);
       ret.value = build_indirect_ref (op_loc, op.value, RO_UNARY_STAR);
+      set_c_expr_source_range (&ret, op_loc, finish);
       return ret;
     case CPP_PLUS:
       if (!c_dialect_objc () && !in_system_header_at (input_location))
@@ -6699,8 +6748,15 @@  static struct c_expr
 c_parser_sizeof_expression (c_parser *parser)
 {
   struct c_expr expr;
+  struct c_expr result;
   location_t expr_loc;
   gcc_assert (c_parser_next_token_is_keyword (parser, RID_SIZEOF));
+
+  location_t start;
+  location_t finish = UNKNOWN_LOCATION;
+
+  start = c_parser_peek_token (parser)->location;
+
   c_parser_consume_token (parser);
   c_inhibit_evaluation_warnings++;
   in_sizeof++;
@@ -6714,6 +6770,7 @@  c_parser_sizeof_expression (c_parser *parser)
       expr_loc = c_parser_peek_token (parser)->location;
       type_name = c_parser_type_name (parser);
       c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>");
+      finish = parser->tokens_buf[0].location;
       if (type_name == NULL)
 	{
 	  struct c_expr ret;
@@ -6729,17 +6786,19 @@  c_parser_sizeof_expression (c_parser *parser)
 	  expr = c_parser_postfix_expression_after_paren_type (parser,
 							       type_name,
 							       expr_loc);
+	  finish = expr.get_finish ();
 	  goto sizeof_expr;
 	}
       /* sizeof ( type-name ).  */
       c_inhibit_evaluation_warnings--;
       in_sizeof--;
-      return c_expr_sizeof_type (expr_loc, type_name);
+      result = c_expr_sizeof_type (expr_loc, type_name);
     }
   else
     {
       expr_loc = c_parser_peek_token (parser)->location;
       expr = c_parser_unary_expression (parser);
+      finish = expr.get_finish ();
     sizeof_expr:
       c_inhibit_evaluation_warnings--;
       in_sizeof--;
@@ -6747,8 +6806,11 @@  c_parser_sizeof_expression (c_parser *parser)
       if (TREE_CODE (expr.value) == COMPONENT_REF
 	  && DECL_C_BIT_FIELD (TREE_OPERAND (expr.value, 1)))
 	error_at (expr_loc, "%<sizeof%> applied to a bit-field");
-      return c_expr_sizeof_expr (expr_loc, expr);
+      result = c_expr_sizeof_expr (expr_loc, expr);
     }
+  if (finish != UNKNOWN_LOCATION)
+    set_c_expr_source_range (&result, start, finish);
+  return result;
 }
 
 /* Parse an alignof expression.  */
@@ -7168,12 +7230,14 @@  c_parser_postfix_expression (c_parser *parser)
   struct c_expr expr, e1;
   struct c_type_name *t1, *t2;
   location_t loc = c_parser_peek_token (parser)->location;;
+  source_range tok_range = c_parser_peek_token (parser)->get_range ();
   expr.original_code = ERROR_MARK;
   expr.original_type = NULL;
   switch (c_parser_peek_token (parser)->type)
     {
     case CPP_NUMBER:
       expr.value = c_parser_peek_token (parser)->value;
+      set_c_expr_source_range (&expr, tok_range);
       loc = c_parser_peek_token (parser)->location;
       c_parser_consume_token (parser);
       if (TREE_CODE (expr.value) == FIXED_CST
@@ -7188,6 +7252,7 @@  c_parser_postfix_expression (c_parser *parser)
     case CPP_CHAR32:
     case CPP_WCHAR:
       expr.value = c_parser_peek_token (parser)->value;
+      set_c_expr_source_range (&expr, tok_range);
       c_parser_consume_token (parser);
       break;
     case CPP_STRING:
@@ -7196,6 +7261,7 @@  c_parser_postfix_expression (c_parser *parser)
     case CPP_WSTRING:
     case CPP_UTF8STRING:
       expr.value = c_parser_peek_token (parser)->value;
+      set_c_expr_source_range (&expr, tok_range);
       expr.original_code = STRING_CST;
       c_parser_consume_token (parser);
       break;
@@ -7203,6 +7269,7 @@  c_parser_postfix_expression (c_parser *parser)
       gcc_assert (c_dialect_objc ());
       expr.value
 	= objc_build_string_object (c_parser_peek_token (parser)->value);
+      set_c_expr_source_range (&expr, tok_range);
       c_parser_consume_token (parser);
       break;
     case CPP_NAME:
@@ -7216,6 +7283,7 @@  c_parser_postfix_expression (c_parser *parser)
 					     (c_parser_peek_token (parser)->type
 					      == CPP_OPEN_PAREN),
 					     &expr.original_type);
+	    set_c_expr_source_range (&expr, tok_range);
 	    break;
 	  }
 	case C_ID_CLASSNAME:
@@ -7304,6 +7372,7 @@  c_parser_postfix_expression (c_parser *parser)
       else
 	{
 	  /* A parenthesized expression.  */
+	  location_t loc_open_paren = c_parser_peek_token (parser)->location;
 	  c_parser_consume_token (parser);
 	  expr = c_parser_expression (parser);
 	  if (TREE_CODE (expr.value) == MODIFY_EXPR)
@@ -7311,6 +7380,8 @@  c_parser_postfix_expression (c_parser *parser)
 	  if (expr.original_code != C_MAYBE_CONST_EXPR)
 	    expr.original_code = ERROR_MARK;
 	  /* Don't change EXPR.ORIGINAL_TYPE.  */
+	  location_t loc_close_paren = c_parser_peek_token (parser)->location;
+	  set_c_expr_source_range (&expr, loc_open_paren, loc_close_paren);
 	  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
 				     "expected %<)%>");
 	}
@@ -7901,6 +7972,8 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
   vec<tree, va_gc> *exprlist;
   vec<tree, va_gc> *origtypes = NULL;
   vec<location_t> arg_loc = vNULL;
+  location_t start;
+  location_t finish;
 
   while (true)
     {
@@ -7937,7 +8010,10 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 		{
 		  c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE,
 					     "expected %<]%>");
+		  start = expr.get_start ();
+		  finish = parser->tokens_buf[0].location;
 		  expr.value = build_array_ref (op_loc, expr.value, idx);
+		  set_c_expr_source_range (&expr, start, finish);
 		}
 	    }
 	  expr.original_code = ERROR_MARK;
@@ -7980,9 +8056,13 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 			"%<memset%> used with constant zero length parameter; "
 			"this could be due to transposed parameters");
 
+	  start = expr.get_start ();
+	  finish = parser->tokens_buf[0].get_finish ();
 	  expr.value
 	    = c_build_function_call_vec (expr_loc, arg_loc, expr.value,
 					 exprlist, origtypes);
+	  set_c_expr_source_range (&expr, start, finish);
+
 	  expr.original_code = ERROR_MARK;
 	  if (TREE_CODE (expr.value) == INTEGER_CST
 	      && TREE_CODE (orig_expr.value) == FUNCTION_DECL
@@ -8011,8 +8091,11 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
               expr.original_type = NULL;
 	      return expr;
 	    }
+	  start = expr.get_start ();
+	  finish = c_parser_peek_token (parser)->get_finish ();
 	  c_parser_consume_token (parser);
 	  expr.value = build_component_ref (op_loc, expr.value, ident);
+	  set_c_expr_source_range (&expr, start, finish);
 	  expr.original_code = ERROR_MARK;
 	  if (TREE_CODE (expr.value) != COMPONENT_REF)
 	    expr.original_type = NULL;
@@ -8040,12 +8123,15 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 	      expr.original_type = NULL;
 	      return expr;
 	    }
+	  start = expr.get_start ();
+	  finish = c_parser_peek_token (parser)->get_finish ();
 	  c_parser_consume_token (parser);
 	  expr.value = build_component_ref (op_loc,
 					    build_indirect_ref (op_loc,
 								expr.value,
 								RO_ARROW),
 					    ident);
+	  set_c_expr_source_range (&expr, start, finish);
 	  expr.original_code = ERROR_MARK;
 	  if (TREE_CODE (expr.value) != COMPONENT_REF)
 	    expr.original_type = NULL;
@@ -8061,6 +8147,8 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 	  break;
 	case CPP_PLUS_PLUS:
 	  /* Postincrement.  */
+	  start = expr.get_start ();
+	  finish = c_parser_peek_token (parser)->get_finish ();
 	  c_parser_consume_token (parser);
 	  /* If the expressions have array notations, we expand them.  */
 	  if (flag_cilkplus
@@ -8072,11 +8160,14 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 	      expr.value = build_unary_op (op_loc,
 					   POSTINCREMENT_EXPR, expr.value, 0);
 	    }
+	  set_c_expr_source_range (&expr, start, finish);
 	  expr.original_code = ERROR_MARK;
 	  expr.original_type = NULL;
 	  break;
 	case CPP_MINUS_MINUS:
 	  /* Postdecrement.  */
+	  start = expr.get_start ();
+	  finish = c_parser_peek_token (parser)->get_finish ();
 	  c_parser_consume_token (parser);
 	  /* If the expressions have array notations, we expand them.  */
 	  if (flag_cilkplus
@@ -8088,6 +8179,7 @@  c_parser_postfix_expression_after_primary (c_parser *parser,
 	      expr.value = build_unary_op (op_loc,
 					   POSTDECREMENT_EXPR, expr.value, 0);
 	    }
+	  set_c_expr_source_range (&expr, start, finish);
 	  expr.original_code = ERROR_MARK;
 	  expr.original_type = NULL;
 	  break;
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 667529a..ffa0598 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -132,6 +132,17 @@  struct c_expr
      The type of an enum constant is a plain integer type, but this
      field will be the enum type.  */
   tree original_type;
+
+  /* The source range of this expression.  This is redundant
+     for node values that have locations, but not all node kinds
+     have locations (e.g. constants, and references to params, locals,
+     etc), so we stash a copy here.  */
+  source_range src_range;
+
+  /* Access to the first and last locations within the source spelling
+     of this expression.  */
+  location_t get_start () const { return src_range.m_start; }
+  location_t get_finish () const { return src_range.m_finish; }
 };
 
 /* Type alias for struct c_expr. This allows to use the structure
@@ -709,4 +720,12 @@  extern void pedwarn_c90 (location_t, int opt, const char *, ...)
 extern bool pedwarn_c99 (location_t, int opt, const char *, ...)
     ATTRIBUTE_GCC_DIAG(3,4);
 
+extern void
+set_c_expr_source_range (c_expr *expr,
+			 location_t start, location_t finish);
+
+extern void
+set_c_expr_source_range (c_expr *expr,
+			 source_range src_range);
+
 #endif /* ! GCC_C_TREE_H */
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index df3245a..c2e16c6 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -3395,6 +3395,12 @@  parser_build_unary_op (location_t loc, enum tree_code code, struct c_expr arg)
     overflow_warning (loc, result.value);
     }
 
+  /* We are typically called when parsing a prefix token at LOC acting on
+     ARG.  Reflect this by updating the source range of the result to
+     start at LOC and end at the end of ARG.  */
+  set_c_expr_source_range (&result,
+			   loc, arg.get_finish ());
+
   return result;
 }
 
@@ -3432,6 +3438,10 @@  parser_build_binary_op (location_t location, enum tree_code code,
   if (location != UNKNOWN_LOCATION)
     protected_set_expr_location (result.value, location);
 
+  set_c_expr_source_range (&result,
+			   arg1.get_start (),
+			   arg2.get_finish ());
+
   /* Check for cases such as x+y<<z which users are likely
      to misinterpret.  */
   if (warn_parentheses)
diff --git a/gcc/gcc-rich-location.c b/gcc/gcc-rich-location.c
new file mode 100644
index 0000000..b0ec47b
--- /dev/null
+++ b/gcc/gcc-rich-location.c
@@ -0,0 +1,86 @@ 
+/* Implementation of gcc_rich_location class
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
+#include "tree-core.h"
+#include "tree.h"
+#include "diagnostic-core.h"
+#include "gcc-rich-location.h"
+#include "print-tree.h"
+#include "pretty-print.h"
+#include "intl.h"
+#include "cpplib.h"
+#include "diagnostic.h"
+
+/* Extract any source range information from EXPR and write it
+   to *R.  */
+
+static bool
+get_range_for_expr (tree expr, location_range *r)
+{
+  if (EXPR_HAS_RANGE (expr))
+    {
+      source_range sr = EXPR_LOCATION_RANGE (expr);
+
+      /* Do we have meaningful data?  */
+      if (sr.m_start && sr.m_finish)
+	{
+	  r->m_start = expand_location (sr.m_start);
+	  r->m_finish = expand_location (sr.m_finish);
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+/* Add a range to the rich_location, covering expression EXPR. */
+
+void
+gcc_rich_location::add_expr (tree expr)
+{
+  gcc_assert (expr);
+
+  location_range r;
+  r.m_show_caret_p = false;
+  if (get_range_for_expr (expr, &r))
+    add_range (&r);
+}
+
+/* If T is an expression, add a range for it to the rich_location.  */
+
+void
+gcc_rich_location::maybe_add_expr (tree t)
+{
+  if (EXPR_P (t))
+    add_expr (t);
+}
diff --git a/gcc/gcc-rich-location.h b/gcc/gcc-rich-location.h
new file mode 100644
index 0000000..c82cbf1
--- /dev/null
+++ b/gcc/gcc-rich-location.h
@@ -0,0 +1,47 @@ 
+/* Declarations relating to class gcc_rich_location
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RICH_LOCATION_H
+#define GCC_RICH_LOCATION_H
+
+/* A gcc_rich_location is libcpp's rich_location with additional
+   helper methods for working with gcc's types.  */
+class gcc_rich_location : public rich_location
+{
+ public:
+  /* Constructors.  */
+
+  /* Constructing from a location.  */
+  gcc_rich_location (source_location loc) :
+    rich_location (loc) {}
+
+  /* Constructing from a source_range.  */
+  gcc_rich_location (source_range src_range) :
+    rich_location (src_range) {}
+
+
+  /* Methods for adding ranges via gcc entities.  */
+  void
+  add_expr (tree expr);
+
+  void
+  maybe_add_expr (tree t);
+};
+
+#endif /* GCC_RICH_LOCATION_H */
diff --git a/gcc/print-tree.c b/gcc/print-tree.c
index ea50056..8b3794a 100644
--- a/gcc/print-tree.c
+++ b/gcc/print-tree.c
@@ -936,6 +936,27 @@  print_node (FILE *file, const char *prefix, tree node, int indent)
       expanded_location xloc = expand_location (EXPR_LOCATION (node));
       indent_to (file, indent+4);
       fprintf (file, "%s:%d:%d", xloc.file, xloc.line, xloc.column);
+
+      /* Print the range, if any */
+      source_range r = EXPR_LOCATION_RANGE (node);
+      if (r.m_start)
+	{
+	  xloc = expand_location (r.m_start);
+	  fprintf (file, " start: %s:%d:%d", xloc.file, xloc.line, xloc.column);
+	}
+      else
+	{
+	  fprintf (file, " start: unknown");
+	}
+      if (r.m_finish)
+	{
+	  xloc = expand_location (r.m_finish);
+	  fprintf (file, " finish: %s:%d:%d", xloc.file, xloc.line, xloc.column);
+	}
+      else
+	{
+	  fprintf (file, " finish: unknown");
+	}
     }
 
   fprintf (file, ">");
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c
new file mode 100644
index 0000000..5485aaf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c
@@ -0,0 +1,422 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdiagnostics-show-caret" } */
+
+/* This is a collection of unittests to verify that we're correctly
+   capturing the source code ranges of various kinds of expression.
+
+   It uses the various "diagnostic_test_*_expression_range_plugin"
+   plugins which handles "__emit_expression_range" by generating a warning
+   at the given source range of the input argument.  Each of the
+   different plugins do this at a different phase of the internal
+   representation (tree, gimple, etc), so we can verify that the
+   source code range information is valid at each phase.
+
+   We want to accept an expression of any type.  To do this in C, we
+   use variadic arguments, but C requires at least one argument before
+   the ellipsis, so we have a dummy one.  */
+
+extern void __emit_expression_range (int dummy, ...);
+
+int global;
+
+void test_parentheses (int a, int b)
+{
+  __emit_expression_range (0, (a + b) ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, (a + b) );
+                               ~~~^~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, (a + b) * (a - b) ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, (a + b) * (a - b) );
+                               ~~~~~~~~^~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, !(a && b) ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, !(a && b) );
+                               ^~~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Postfix expressions.  ************************************************/
+
+void test_array_reference (int *arr)
+{
+  __emit_expression_range (0, arr[100] ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, arr[100] );
+                               ~~~^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+int test_function_call (int p, int q, int r)
+{
+  __emit_expression_range (0, test_function_call (p, q, r) ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, test_function_call (p, q, r) );
+                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
+   { dg-end-multiline-output "" } */
+  return 0;
+}
+
+struct test_struct
+{
+  int field;
+};
+
+int test_structure_references (struct test_struct *ptr)
+{
+  struct test_struct local;
+  local.field = 42;
+
+  __emit_expression_range (0, local.field ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, local.field );
+                               ~~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, ptr->field ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, ptr->field );
+                               ~~~^~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+int test_postfix_incdec (int i)
+{
+  __emit_expression_range (0, i++ ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, i++ );
+                               ~^~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, i-- ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, i-- );
+                               ~^~
+   { dg-end-multiline-output "" } */
+}
+
+/* Unary operators.  ****************************************************/
+
+int test_prefix_incdec (int i)
+{
+  __emit_expression_range (0, ++i ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, ++i );
+                               ^~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, --i ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, --i );
+                               ^~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_address_operator (void)
+{
+  __emit_expression_range (0, &global ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, &global );
+                               ^~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_indirection (int *ptr)
+{
+  __emit_expression_range (0, *ptr ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, *ptr );
+                               ^~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_unary_minus (int i)
+{
+  __emit_expression_range (0, -i ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, -i );
+                               ^~
+   { dg-end-multiline-output "" } */
+}
+
+void test_ones_complement (int i)
+{
+  __emit_expression_range (0, ~i ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, ~i );
+                               ^~
+   { dg-end-multiline-output "" } */
+}
+
+void test_logical_negation (int flag)
+{
+  __emit_expression_range (0, !flag ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, !flag );
+                               ^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Casts.  ****************************************************/
+
+void test_cast (void *ptr)
+{
+  __emit_expression_range (0, (int *)ptr ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, (int *)ptr );
+                               ^~~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+}
+
+/* Binary operators.  *******************************************/
+
+void test_multiplicative_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs * rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs * rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs / rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs / rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs % rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs % rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_additive_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs + rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs + rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs - rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs - rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_shift_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs << rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs << rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs >> rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs >> rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_relational_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs < rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs < rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs > rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs > rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs <= rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs <= rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs >= rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs >= rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_equality_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs == rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs == rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs != rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs != rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_bitwise_binary_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs & rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs & rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs ^ rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs ^ rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs | rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs | rhs );
+                               ~~~~^~~~~
+   { dg-end-multiline-output "" } */
+}
+
+void test_logical_operators (int lhs, int rhs)
+{
+  __emit_expression_range (0, lhs && rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs && rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, lhs || rhs ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, lhs || rhs );
+                               ~~~~^~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Conditional operator.  *******************************************/
+
+void test_conditional_operators (int flag, int on_true, int on_false)
+{
+  __emit_expression_range (0, flag ? on_true : on_false ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, flag ? on_true : on_false );
+                               ~~~~~~~~~~~~~~~^~~~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Assignment expressions.  *******************************************/
+
+void test_assignment_expressions (int dest, int other)
+{
+  __emit_expression_range (0, dest = other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest = other );
+                               ~~~~~^~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest *= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest *= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest /= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest /= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest %= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest %= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest += other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest += other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest -= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest -= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest <<= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest <<= other );
+                               ~~~~~^~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest >>= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest >>= other );
+                               ~~~~~^~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest &= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest &= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest ^= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest ^= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0, dest |= other ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, dest |= other );
+                               ~~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Comma operator.  *******************************************/
+
+void test_comma_operator (int a, int b)
+{
+  __emit_expression_range (0, (a++, a + b) ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, (a++, a + b) );
+                               ~~~~^~~~~~~~
+   { dg-end-multiline-output "" } */
+}
+
+/* Examples of non-trivial expressions.  ****************************/
+
+extern double sqrt (double x);
+
+void test_quadratic (double a, double b, double c)
+{
+  __emit_expression_range (0, b * b - 4 * a * c ); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, b * b - 4 * a * c );
+                               ~~~~~~^~~~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+  __emit_expression_range (0,
+     (-b + sqrt (b * b - 4 * a * c))
+     / (2 * a)); /* { dg-warning "range" } */
+/* { dg-begin-multiline-output "" }
+      (-b + sqrt (b * b - 4 * a * c))
+      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      / (2 * a));
+      ^~~~~~~~~
+   { dg-end-multiline-output "" } */
+
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c
new file mode 100644
index 0000000..46e97b7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c
@@ -0,0 +1,152 @@ 
+/* This plugin verifies the source-code location ranges of
+   expressions, at the pre-gimplification tree stage.  */
+/* { dg-options "-O" } */
+
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "intl.h"
+#include "plugin-version.h"
+#include "diagnostic.h"
+#include "context.h"
+#include "gcc-rich-location.h"
+#include "print-tree.h"
+
+/*
+  Hack: fails with linker error:
+./diagnostic_plugin_test_tree_expression_range.so: undefined symbol: _ZN17gcc_rich_location8add_exprEP9tree_node
+  since nothing in the tree is using gcc_rich_location::add_expr yet.
+
+  I've tried various workarounds (adding DEBUG_FUNCTION to the
+  method, taking its address), but can't seem to fix it that way.
+  So as a nasty workaround, the following material is copied&pasted
+  from gcc-rich-location.c: */
+
+static bool
+get_range_for_expr (tree expr, location_range *r)
+{
+  if (EXPR_HAS_RANGE (expr))
+    {
+      source_range sr = EXPR_LOCATION_RANGE (expr);
+
+      /* Do we have meaningful data?  */
+      if (sr.m_start && sr.m_finish)
+	{
+	  r->m_start = expand_location (sr.m_start);
+	  r->m_finish = expand_location (sr.m_finish);
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+/* Add a range to the rich_location, covering expression EXPR. */
+
+void
+gcc_rich_location::add_expr (tree expr)
+{
+  gcc_assert (expr);
+
+  location_range r;
+  r.m_show_caret_p = false;
+  if (get_range_for_expr (expr, &r))
+    add_range (&r);
+}
+
+/* FIXME: end of material taken from gcc-rich-location.c */
+
+
+int plugin_is_GPL_compatible;
+
+static void
+emit_warning (rich_location *richloc)
+{
+  if (richloc->get_num_locations () < 2)
+    {
+      error_at_rich_loc (richloc, "range not found");
+      return;
+    }
+
+  location_range *range = richloc->get_range (1);
+  warning_at_rich_loc (richloc, 0,
+		       "tree range %i:%i-%i:%i",
+		       range->m_start.line,
+		       range->m_start.column,
+		       range->m_finish.line,
+		       range->m_finish.column);
+}
+
+tree
+cb_walk_tree_fn (tree * tp, int * walk_subtrees,
+		 void * data ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (*tp) != CALL_EXPR)
+    return NULL_TREE;
+
+  tree call_expr = *tp;
+  tree fn = CALL_EXPR_FN (call_expr);
+  if (TREE_CODE (fn) != ADDR_EXPR)
+    return NULL_TREE;
+  fn = TREE_OPERAND (fn, 0);
+  if (TREE_CODE (fn) != FUNCTION_DECL)
+    return NULL_TREE;
+  if (strcmp (IDENTIFIER_POINTER (DECL_NAME (fn)), "__emit_expression_range"))
+    return NULL_TREE;
+
+  /* Get arg 1; print it! */
+  tree arg = CALL_EXPR_ARG (call_expr, 1);
+
+  gcc_rich_location richloc (EXPR_LOCATION (arg));
+  richloc.add_expr (arg);
+  emit_warning (&richloc);
+
+  return NULL_TREE;
+}
+
+static void
+callback (void *gcc_data, void *user_data)
+{
+  tree fndecl = (tree)gcc_data;
+  walk_tree (&DECL_SAVED_TREE (fndecl), cb_walk_tree_fn, NULL, NULL);
+}
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  struct register_pass_info pass_info;
+  const char *plugin_name = plugin_info->base_name;
+  int argc = plugin_info->argc;
+  struct plugin_argument *argv = plugin_info->argv;
+
+  if (!plugin_default_version_check (version, &gcc_version))
+    return 1;
+
+  register_callback (plugin_name,
+		     PLUGIN_PRE_GENERICIZE,
+		     callback,
+		     NULL);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index 941bccc..b7efcf5 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -66,6 +66,8 @@  set plugin_test_list [list \
     { diagnostic_plugin_test_show_locus.c \
 	  diagnostic-test-show-locus-bw.c \
 	  diagnostic-test-show-locus-color.c } \
+    { diagnostic_plugin_test_tree_expression_range.c \
+	  diagnostic-test-expressions-1.c } \
 ]
 
 foreach plugin_test $plugin_test_list {
diff --git a/gcc/tree.c b/gcc/tree.c
index 426803c..a676352 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -13660,4 +13660,26 @@  set_block (location_t loc, tree block)
   return COMBINE_LOCATION_DATA (line_table, loc, src_range, block);
 }
 
+void
+set_source_range (tree expr, location_t start, location_t finish)
+{
+  source_range src_range;
+  src_range.m_start = start;
+  src_range.m_finish = finish;
+  set_source_range (expr, src_range);
+}
+
+void
+set_source_range (tree expr, source_range src_range)
+{
+  if (!EXPR_P (expr))
+    return;
+
+  location_t adhoc = COMBINE_LOCATION_DATA (line_table,
+					    EXPR_LOCATION (expr),
+					    src_range,
+					    NULL);
+  SET_EXPR_LOCATION (expr, adhoc);
+}
+
 #include "gt-tree.h"
diff --git a/gcc/tree.h b/gcc/tree.h
index 92cc929..7d20f74 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1070,10 +1070,25 @@  extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define EXPR_FILENAME(NODE) LOCATION_FILE (EXPR_CHECK ((NODE))->exp.locus)
 #define EXPR_LINENO(NODE) LOCATION_LINE (EXPR_CHECK (NODE)->exp.locus)
 
+#define CAN_HAVE_RANGE_P(NODE) (CAN_HAVE_LOCATION_P (NODE))
+#define EXPR_LOCATION_RANGE(NODE) (get_expr_source_range (EXPR_CHECK ((NODE))))
+
+#define EXPR_HAS_RANGE(NODE) \
+    (CAN_HAVE_RANGE_P (NODE) \
+     ? EXPR_LOCATION_RANGE (NODE).m_start != UNKNOWN_LOCATION \
+     : false)
+
 /* True if a tree is an expression or statement that can have a
    location.  */
 #define CAN_HAVE_LOCATION_P(NODE) ((NODE) && EXPR_P (NODE))
 
+static inline source_range
+get_expr_source_range (tree expr)
+{
+  location_t loc = EXPR_LOCATION (expr);
+  return get_range_from_loc (line_table, loc);
+}
+
 extern void protected_set_expr_location (tree, location_t);
 
 /* In a TARGET_EXPR node.  */
@@ -2098,6 +2113,9 @@  extern machine_mode element_mode (const_tree t);
 #define DECL_IS_BUILTIN(DECL) \
   (LOCATION_LOCUS (DECL_SOURCE_LOCATION (DECL)) <= BUILTINS_LOCATION)
 
+#define DECL_LOCATION_RANGE(NODE) \
+  (get_decl_source_range (DECL_MINIMAL_CHECK (NODE)))
+
 /*  For FIELD_DECLs, this is the RECORD_TYPE, UNION_TYPE, or
     QUAL_UNION_TYPE node that the field is a member of.  For VAR_DECL,
     PARM_DECL, FUNCTION_DECL, LABEL_DECL, RESULT_DECL, and CONST_DECL
@@ -5148,4 +5166,17 @@  extern void gt_pch_nx (tree &, gt_pointer_operator, void *);
 
 extern bool nonnull_arg_p (const_tree);
 
+extern void
+set_source_range (tree expr, location_t start, location_t finish);
+
+extern void
+set_source_range (tree expr, source_range src_range);
+
+static inline source_range
+get_decl_source_range (tree decl)
+{
+  location_t loc = DECL_SOURCE_LOCATION (decl);
+  return get_range_from_loc (line_table, loc);
+}
+
 #endif  /* GCC_TREE_H  */