diff mbox series

[RFC,4/4] objtool/powerpc: Add --mcount specific implementation

Message ID 20220523175548.922671-5-sv@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series objtool: Enable and implement --mcount option on powerpc | expand

Commit Message

Sathvika Vasireddy May 23, 2022, 5:55 p.m. UTC
This patch enables objtool --mcount on powerpc, and
adds implementation specific to powerpc.

Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
---
 arch/powerpc/Kconfig                |  1 +
 tools/objtool/arch/powerpc/decode.c | 14 ++++++++++++++
 tools/objtool/check.c               | 12 +++++++-----
 tools/objtool/elf.c                 | 13 +++++++++++++
 tools/objtool/include/objtool/elf.h |  1 +
 5 files changed, 36 insertions(+), 5 deletions(-)

Comments

Christophe Leroy May 24, 2022, 9:35 a.m. UTC | #1
Le 23/05/2022 à 19:55, Sathvika Vasireddy a écrit :
> This patch enables objtool --mcount on powerpc, and
> adds implementation specific to powerpc.
> 
> Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
> ---
>   arch/powerpc/Kconfig                |  1 +
>   tools/objtool/arch/powerpc/decode.c | 14 ++++++++++++++
>   tools/objtool/check.c               | 12 +++++++-----
>   tools/objtool/elf.c                 | 13 +++++++++++++
>   tools/objtool/include/objtool/elf.h |  1 +
>   5 files changed, 36 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 732a3f91ee5e..3373d44a1298 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -233,6 +233,7 @@ config PPC
>   	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
>   	select HAVE_OPTPROBES
>   	select HAVE_OBJTOOL			if PPC64
> +	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
>   	select HAVE_PERF_EVENTS
>   	select HAVE_PERF_EVENTS_NMI		if PPC64
>   	select HAVE_PERF_REGS
> diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
> index e3b77a6ce357..ad3d79fffac2 100644
> --- a/tools/objtool/arch/powerpc/decode.c
> +++ b/tools/objtool/arch/powerpc/decode.c
> @@ -40,12 +40,26 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
>   			    struct list_head *ops_list)
>   {
>   	u32 insn;
> +	unsigned int opcode;
>   
>   	*immediate = 0;
>   	memcpy(&insn, sec->data->d_buf+offset, 4);
>   	*len = 4;
>   	*type = INSN_OTHER;
>   
> +	opcode = (insn >> 26);

You dont need the brackets here.

> +
> +	switch (opcode) {
> +	case 18: /* bl */
> +		if ((insn & 3) == 1) {
> +			*type = INSN_CALL;
> +			*immediate = insn & 0x3fffffc;
> +			if (*immediate & 0x2000000)
> +				*immediate -= 0x4000000;
> +		}
> +		break;
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index 056302d58e23..fd8bad092f89 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -832,7 +832,7 @@ static int create_mcount_loc_sections(struct objtool_file *file)
>   
>   		if (elf_add_reloc_to_insn(file->elf, sec,
>   					  idx * sizeof(unsigned long),
> -					  R_X86_64_64,
> +					  elf_reloc_type_long(file->elf),
>   					  insn->sec, insn->offset))
>   			return -1;
>   
> @@ -2183,7 +2183,7 @@ static int classify_symbols(struct objtool_file *file)
>   			if (arch_is_retpoline(func))
>   				func->retpoline_thunk = true;
>   
> -			if (!strcmp(func->name, "__fentry__"))
> +			if ((!strcmp(func->name, "__fentry__")) || (!strcmp(func->name, "_mcount")))
>   				func->fentry = true;
>   
>   			if (is_profiling_func(func->name))
> @@ -2259,9 +2259,11 @@ static int decode_sections(struct objtool_file *file)
>   	 * Must be before add_jump_destinations(), which depends on 'func'
>   	 * being set for alternatives, to enable proper sibling call detection.
>   	 */
> -	ret = add_special_section_alts(file);
> -	if (ret)
> -		return ret;
> +	if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
> +		ret = add_special_section_alts(file);
> +		if (ret)
> +			return ret;
> +	}

I think this change should be a patch by itself, it's not related to 
powerpc.

>   
>   	ret = add_jump_destinations(file);
>   	if (ret)
> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
> index c25e957c1e52..95763060d551 100644
> --- a/tools/objtool/elf.c
> +++ b/tools/objtool/elf.c
> @@ -793,6 +793,19 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
>   	return sym;
>   }
>   
> +int elf_reloc_type_long(struct elf *elf)

Not sure it's a good name, because for 32 bits we have to use 'int'.

> +{
> +	switch (elf->ehdr.e_machine) {
> +	case EM_X86_64:
> +		return R_X86_64_64;
> +	case EM_PPC64:
> +		return R_PPC64_ADDR64;
> +	default:
> +		WARN("unknown machine...");
> +		exit(-1);
> +	}
> +}

Wouldn't it be better to make that function arch specific ?

> +
>   int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
>   			  unsigned long offset, unsigned int type,
>   			  struct section *insn_sec, unsigned long insn_off)
> diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
> index adebfbc2b518..c43e23c0b3c8 100644
> --- a/tools/objtool/include/objtool/elf.h
> +++ b/tools/objtool/include/objtool/elf.h
> @@ -144,6 +144,7 @@ static inline bool has_multiple_files(struct elf *elf)
>   struct elf *elf_open_read(const char *name, int flags);
>   struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
>   
> +int elf_reloc_type_long(struct elf *elf);
>   int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
>   		  unsigned int type, struct symbol *sym, s64 addend);
>   int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
Sathvika Vasireddy May 24, 2022, 11 a.m. UTC | #2
On 24/05/22 15:05, Christophe Leroy wrote:
>
> Le 23/05/2022 à 19:55, Sathvika Vasireddy a écrit :
>> This patch enables objtool --mcount on powerpc, and
>> adds implementation specific to powerpc.
>>
>> Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
>> ---
>>    arch/powerpc/Kconfig                |  1 +
>>    tools/objtool/arch/powerpc/decode.c | 14 ++++++++++++++
>>    tools/objtool/check.c               | 12 +++++++-----
>>    tools/objtool/elf.c                 | 13 +++++++++++++
>>    tools/objtool/include/objtool/elf.h |  1 +
>>    5 files changed, 36 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 732a3f91ee5e..3373d44a1298 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -233,6 +233,7 @@ config PPC
>>    	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
>>    	select HAVE_OPTPROBES
>>    	select HAVE_OBJTOOL			if PPC64
>> +	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
>>    	select HAVE_PERF_EVENTS
>>    	select HAVE_PERF_EVENTS_NMI		if PPC64
>>    	select HAVE_PERF_REGS
>> diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
>> index e3b77a6ce357..ad3d79fffac2 100644
>> --- a/tools/objtool/arch/powerpc/decode.c
>> +++ b/tools/objtool/arch/powerpc/decode.c
>> @@ -40,12 +40,26 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
>>    			    struct list_head *ops_list)
>>    {
>>    	u32 insn;
>> +	unsigned int opcode;
>>    
>>    	*immediate = 0;
>>    	memcpy(&insn, sec->data->d_buf+offset, 4);
>>    	*len = 4;
>>    	*type = INSN_OTHER;
>>    
>> +	opcode = (insn >> 26);
> You dont need the brackets here.
>
>> +
>> +	switch (opcode) {
>> +	case 18: /* bl */
>> +		if ((insn & 3) == 1) {
>> +			*type = INSN_CALL;
>> +			*immediate = insn & 0x3fffffc;
>> +			if (*immediate & 0x2000000)
>> +				*immediate -= 0x4000000;
>> +		}
>> +		break;
>> +	}
>> +
>>    	return 0;
>>    }
>>    
>> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
>> index 056302d58e23..fd8bad092f89 100644
>> --- a/tools/objtool/check.c
>> +++ b/tools/objtool/check.c
>> @@ -832,7 +832,7 @@ static int create_mcount_loc_sections(struct objtool_file *file)
>>    
>>    		if (elf_add_reloc_to_insn(file->elf, sec,
>>    					  idx * sizeof(unsigned long),
>> -					  R_X86_64_64,
>> +					  elf_reloc_type_long(file->elf),
>>    					  insn->sec, insn->offset))
>>    			return -1;
>>    
>> @@ -2183,7 +2183,7 @@ static int classify_symbols(struct objtool_file *file)
>>    			if (arch_is_retpoline(func))
>>    				func->retpoline_thunk = true;
>>    
>> -			if (!strcmp(func->name, "__fentry__"))
>> +			if ((!strcmp(func->name, "__fentry__")) || (!strcmp(func->name, "_mcount")))
>>    				func->fentry = true;
>>    
>>    			if (is_profiling_func(func->name))
>> @@ -2259,9 +2259,11 @@ static int decode_sections(struct objtool_file *file)
>>    	 * Must be before add_jump_destinations(), which depends on 'func'
>>    	 * being set for alternatives, to enable proper sibling call detection.
>>    	 */
>> -	ret = add_special_section_alts(file);
>> -	if (ret)
>> -		return ret;
>> +	if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
>> +		ret = add_special_section_alts(file);
>> +		if (ret)
>> +			return ret;
>> +	}
> I think this change should be a patch by itself, it's not related to
> powerpc.
Makes sense. I'll make this a separate patch in the next revision.
>
>>    
>>    	ret = add_jump_destinations(file);
>>    	if (ret)
>> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
>> index c25e957c1e52..95763060d551 100644
>> --- a/tools/objtool/elf.c
>> +++ b/tools/objtool/elf.c
>> @@ -793,6 +793,19 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
>>    	return sym;
>>    }
>>    
>> +int elf_reloc_type_long(struct elf *elf)
> Not sure it's a good name, because for 32 bits we have to use 'int'.
Sure, I'll rename it to elf_reloc_type() or some such.
>
>> +{
>> +	switch (elf->ehdr.e_machine) {
>> +	case EM_X86_64:
>> +		return R_X86_64_64;
>> +	case EM_PPC64:
>> +		return R_PPC64_ADDR64;
>> +	default:
>> +		WARN("unknown machine...");
>> +		exit(-1);
>> +	}
>> +}
> Wouldn't it be better to make that function arch specific ?

This is so that we can support cross architecture builds.


Thanks for reviewing!

- Sathvika
Christophe Leroy May 24, 2022, 1:33 p.m. UTC | #3
Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :
> [Vous ne recevez pas souvent de courriers de la part de 
> sv@linux.vnet.ibm.com. Découvrez pourquoi cela peut être important à 
> l’adresse https://aka.ms/LearnAboutSenderIdentification.]
> 
> On 24/05/22 15:05, Christophe Leroy wrote:
>>
>> Le 23/05/2022 à 19:55, Sathvika Vasireddy a écrit :
>>> This patch enables objtool --mcount on powerpc, and
>>> adds implementation specific to powerpc.
>>>
>>> Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
>>> ---
>>>    arch/powerpc/Kconfig                |  1 +
>>>    tools/objtool/arch/powerpc/decode.c | 14 ++++++++++++++
>>>    tools/objtool/check.c               | 12 +++++++-----
>>>    tools/objtool/elf.c                 | 13 +++++++++++++
>>>    tools/objtool/include/objtool/elf.h |  1 +
>>>    5 files changed, 36 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>>> index 732a3f91ee5e..3373d44a1298 100644
>>> --- a/arch/powerpc/Kconfig
>>> +++ b/arch/powerpc/Kconfig
>>> @@ -233,6 +233,7 @@ config PPC
>>>      select HAVE_NMI                         if PERF_EVENTS || (PPC64 
>>> && PPC_BOOK3S)
>>>      select HAVE_OPTPROBES
>>>      select HAVE_OBJTOOL                     if PPC64
>>> +    select HAVE_OBJTOOL_MCOUNT              if HAVE_OBJTOOL
>>>      select HAVE_PERF_EVENTS
>>>      select HAVE_PERF_EVENTS_NMI             if PPC64
>>>      select HAVE_PERF_REGS
>>> diff --git a/tools/objtool/arch/powerpc/decode.c 
>>> b/tools/objtool/arch/powerpc/decode.c
>>> index e3b77a6ce357..ad3d79fffac2 100644
>>> --- a/tools/objtool/arch/powerpc/decode.c
>>> +++ b/tools/objtool/arch/powerpc/decode.c
>>> @@ -40,12 +40,26 @@ int arch_decode_instruction(struct objtool_file 
>>> *file, const struct section *sec
>>>                          struct list_head *ops_list)
>>>    {
>>>      u32 insn;
>>> +    unsigned int opcode;
>>>
>>>      *immediate = 0;
>>>      memcpy(&insn, sec->data->d_buf+offset, 4);
>>>      *len = 4;
>>>      *type = INSN_OTHER;
>>>
>>> +    opcode = (insn >> 26);
>> You dont need the brackets here.
>>
>>> +
>>> +    switch (opcode) {
>>> +    case 18: /* bl */
>>> +            if ((insn & 3) == 1) {
>>> +                    *type = INSN_CALL;
>>> +                    *immediate = insn & 0x3fffffc;
>>> +                    if (*immediate & 0x2000000)
>>> +                            *immediate -= 0x4000000;
>>> +            }
>>> +            break;
>>> +    }
>>> +
>>>      return 0;
>>>    }
>>>
>>> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
>>> index 056302d58e23..fd8bad092f89 100644
>>> --- a/tools/objtool/check.c
>>> +++ b/tools/objtool/check.c
>>> @@ -832,7 +832,7 @@ static int create_mcount_loc_sections(struct 
>>> objtool_file *file)
>>>
>>>              if (elf_add_reloc_to_insn(file->elf, sec,
>>>                                        idx * sizeof(unsigned long),
>>> -                                      R_X86_64_64,
>>> +                                      elf_reloc_type_long(file->elf),
>>>                                        insn->sec, insn->offset))
>>>                      return -1;
>>>
>>> @@ -2183,7 +2183,7 @@ static int classify_symbols(struct objtool_file 
>>> *file)
>>>                      if (arch_is_retpoline(func))
>>>                              func->retpoline_thunk = true;
>>>
>>> -                    if (!strcmp(func->name, "__fentry__"))
>>> +                    if ((!strcmp(func->name, "__fentry__")) || 
>>> (!strcmp(func->name, "_mcount")))
>>>                              func->fentry = true;
>>>
>>>                      if (is_profiling_func(func->name))
>>> @@ -2259,9 +2259,11 @@ static int decode_sections(struct objtool_file 
>>> *file)
>>>       * Must be before add_jump_destinations(), which depends on 'func'
>>>       * being set for alternatives, to enable proper sibling call 
>>> detection.
>>>       */
>>> -    ret = add_special_section_alts(file);
>>> -    if (ret)
>>> -            return ret;
>>> +    if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
>>> +            ret = add_special_section_alts(file);
>>> +            if (ret)
>>> +                    return ret;
>>> +    }
>> I think this change should be a patch by itself, it's not related to
>> powerpc.
> Makes sense. I'll make this a separate patch in the next revision.

Great. Can you base your next revision on the one I just sent out ?

I will now start looking at inline static calls for PPC32.

>>
>>>
>>>      ret = add_jump_destinations(file);
>>>      if (ret)
>>> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
>>> index c25e957c1e52..95763060d551 100644
>>> --- a/tools/objtool/elf.c
>>> +++ b/tools/objtool/elf.c
>>> @@ -793,6 +793,19 @@ elf_create_section_symbol(struct elf *elf, 
>>> struct section *sec)
>>>      return sym;
>>>    }
>>>
>>> +int elf_reloc_type_long(struct elf *elf)
>> Not sure it's a good name, because for 32 bits we have to use 'int'.
> Sure, I'll rename it to elf_reloc_type() or some such.
>>
>>> +{
>>> +    switch (elf->ehdr.e_machine) {
>>> +    case EM_X86_64:
>>> +            return R_X86_64_64;
>>> +    case EM_PPC64:
>>> +            return R_PPC64_ADDR64;
>>> +    default:
>>> +            WARN("unknown machine...");
>>> +            exit(-1);
>>> +    }
>>> +}
>> Wouldn't it be better to make that function arch specific ?
> 
> This is so that we can support cross architecture builds.
> 


I'm not sure I follow you here.

This is only based on the target, it doesn't depend on the build host so 
I can't the link with cross arch builds.

The same as you have arch_decode_instruction(), you could have 
arch_elf_reloc_type_long()
It would make sense indeed, because there is no point in supporting X86 
relocation when you don't support X86 instruction decoding.

Christophe
Christophe Leroy May 25, 2022, 5:27 p.m. UTC | #4
Le 24/05/2022 à 15:33, Christophe Leroy a écrit :
> 
> 
> Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :
>>>
>>>> +{
>>>> +    switch (elf->ehdr.e_machine) {
>>>> +    case EM_X86_64:
>>>> +            return R_X86_64_64;
>>>> +    case EM_PPC64:
>>>> +            return R_PPC64_ADDR64;
>>>> +    default:
>>>> +            WARN("unknown machine...");
>>>> +            exit(-1);
>>>> +    }
>>>> +}
>>> Wouldn't it be better to make that function arch specific ?
>>
>> This is so that we can support cross architecture builds.
>>
> 
> 
> I'm not sure I follow you here.
> 
> This is only based on the target, it doesn't depend on the build host so
> I can't the link with cross arch builds.
> 
> The same as you have arch_decode_instruction(), you could have
> arch_elf_reloc_type_long()
> It would make sense indeed, because there is no point in supporting X86
> relocation when you don't support X86 instruction decoding.
> 

Could simply be some macro defined in 
tools/objtool/arch/powerpc/include/arch/elf.h and 
tools/objtool/arch/x86/include/arch/elf.h

The x86 version would be:

#define R_ADDR(elf) R_X86_64_64

And the powerpc version would be:

#define R_ADDR(elf) (elf->ehdr.e_machine == EM_PPC64 ? R_PPC64_ADDR64 : 
R_PPC_ADDR32)

Christophe
Christophe Leroy May 31, 2022, 6:20 a.m. UTC | #5
Le 25/05/2022 à 19:27, Christophe Leroy a écrit :
> 
> 
> Le 24/05/2022 à 15:33, Christophe Leroy a écrit :
>>
>>
>> Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :
>>>>
>>>>> +{
>>>>> +    switch (elf->ehdr.e_machine) {
>>>>> +    case EM_X86_64:
>>>>> +            return R_X86_64_64;
>>>>> +    case EM_PPC64:
>>>>> +            return R_PPC64_ADDR64;
>>>>> +    default:
>>>>> +            WARN("unknown machine...");
>>>>> +            exit(-1);
>>>>> +    }
>>>>> +}
>>>> Wouldn't it be better to make that function arch specific ?
>>>
>>> This is so that we can support cross architecture builds.
>>>
>>
>>
>> I'm not sure I follow you here.
>>
>> This is only based on the target, it doesn't depend on the build host so
>> I can't the link with cross arch builds.
>>
>> The same as you have arch_decode_instruction(), you could have
>> arch_elf_reloc_type_long()
>> It would make sense indeed, because there is no point in supporting X86
>> relocation when you don't support X86 instruction decoding.
>>
> 
> Could simply be some macro defined in 
> tools/objtool/arch/powerpc/include/arch/elf.h and 
> tools/objtool/arch/x86/include/arch/elf.h
> 
> The x86 version would be:
> 
> #define R_ADDR(elf) R_X86_64_64
> 
> And the powerpc version would be:
> 
> #define R_ADDR(elf) (elf->ehdr.e_machine == EM_PPC64 ? R_PPC64_ADDR64 : 
> R_PPC_ADDR32)
> 

Well, looking once more, and taking into account the patch from Chen 
https://lore.kernel.org/lkml/20220531020744.236970-4-chenzhongjin@huawei.com/

It would be easier to just define two macros:

#define R_ABS64 R_PPC64_ADDR64
#define R_ABS32 R_PPC_ADDR32

And then in the caller, as we know the size, do something like

	size == sizeof(u64) ? R_ABS64 : R_ABS32;

Christophe
Naveen N. Rao June 16, 2022, 1:34 p.m. UTC | #6
Christophe Leroy wrote:
> 
> 
> Le 25/05/2022 à 19:27, Christophe Leroy a écrit :
>> 
>> 
>> Le 24/05/2022 à 15:33, Christophe Leroy a écrit :
>>>
>>>
>>> Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :
>>>>>
>>>>>> +{
>>>>>> +    switch (elf->ehdr.e_machine) {
>>>>>> +    case EM_X86_64:
>>>>>> +            return R_X86_64_64;
>>>>>> +    case EM_PPC64:
>>>>>> +            return R_PPC64_ADDR64;
>>>>>> +    default:
>>>>>> +            WARN("unknown machine...");
>>>>>> +            exit(-1);
>>>>>> +    }
>>>>>> +}
>>>>> Wouldn't it be better to make that function arch specific ?
>>>>
>>>> This is so that we can support cross architecture builds.
>>>>
>>>
>>>
>>> I'm not sure I follow you here.
>>>
>>> This is only based on the target, it doesn't depend on the build host so
>>> I can't the link with cross arch builds.
>>>
>>> The same as you have arch_decode_instruction(), you could have
>>> arch_elf_reloc_type_long()
>>> It would make sense indeed, because there is no point in supporting X86
>>> relocation when you don't support X86 instruction decoding.
>>>
>> 
>> Could simply be some macro defined in 
>> tools/objtool/arch/powerpc/include/arch/elf.h and 
>> tools/objtool/arch/x86/include/arch/elf.h
>> 
>> The x86 version would be:
>> 
>> #define R_ADDR(elf) R_X86_64_64
>> 
>> And the powerpc version would be:
>> 
>> #define R_ADDR(elf) (elf->ehdr.e_machine == EM_PPC64 ? R_PPC64_ADDR64 : 
>> R_PPC_ADDR32)
>> 
> 
> Well, looking once more, and taking into account the patch from Chen 
> https://lore.kernel.org/lkml/20220531020744.236970-4-chenzhongjin@huawei.com/
> 
> It would be easier to just define two macros:
> 
> #define R_ABS64 R_PPC64_ADDR64
> #define R_ABS32 R_PPC_ADDR32
> 
> And then in the caller, as we know the size, do something like
> 
> 	size == sizeof(u64) ? R_ABS64 : R_ABS32;

How does 'sizeof(u64)' work here?


- Naveen
Christophe Leroy June 16, 2022, 1:40 p.m. UTC | #7
Le 16/06/2022 à 15:34, Naveen N. Rao a écrit :
> Christophe Leroy wrote:
>>
>>
>> Le 25/05/2022 à 19:27, Christophe Leroy a écrit :
>>>
>>>
>>> Le 24/05/2022 à 15:33, Christophe Leroy a écrit :
>>>>
>>>>
>>>> Le 24/05/2022 à 13:00, Sathvika Vasireddy a écrit :
>>>>>>
>>>>>>> +{
>>>>>>> +    switch (elf->ehdr.e_machine) {
>>>>>>> +    case EM_X86_64:
>>>>>>> +            return R_X86_64_64;
>>>>>>> +    case EM_PPC64:
>>>>>>> +            return R_PPC64_ADDR64;
>>>>>>> +    default:
>>>>>>> +            WARN("unknown machine...");
>>>>>>> +            exit(-1);
>>>>>>> +    }
>>>>>>> +}
>>>>>> Wouldn't it be better to make that function arch specific ?
>>>>>
>>>>> This is so that we can support cross architecture builds.
>>>>>
>>>>
>>>>
>>>> I'm not sure I follow you here.
>>>>
>>>> This is only based on the target, it doesn't depend on the build 
>>>> host so
>>>> I can't the link with cross arch builds.
>>>>
>>>> The same as you have arch_decode_instruction(), you could have
>>>> arch_elf_reloc_type_long()
>>>> It would make sense indeed, because there is no point in supporting X86
>>>> relocation when you don't support X86 instruction decoding.
>>>>
>>>
>>> Could simply be some macro defined in 
>>> tools/objtool/arch/powerpc/include/arch/elf.h and 
>>> tools/objtool/arch/x86/include/arch/elf.h
>>>
>>> The x86 version would be:
>>>
>>> #define R_ADDR(elf) R_X86_64_64
>>>
>>> And the powerpc version would be:
>>>
>>> #define R_ADDR(elf) (elf->ehdr.e_machine == EM_PPC64 ? R_PPC64_ADDR64 
>>> : R_PPC_ADDR32)
>>>
>>
>> Well, looking once more, and taking into account the patch from Chen 
>> https://lore.kernel.org/lkml/20220531020744.236970-4-chenzhongjin@huawei.com/ 
>>
>>
>> It would be easier to just define two macros:
>>
>> #define R_ABS64 R_PPC64_ADDR64
>> #define R_ABS32 R_PPC_ADDR32
>>
>> And then in the caller, as we know the size, do something like
>>
>>     size == sizeof(u64) ? R_ABS64 : R_ABS32;
> 
> How does 'sizeof(u64)' work here?
> 

sizeof(u64) is always 8 by definition.

So if size is 8 we are working on a binary file for a 64 bits target, if 
not it means we are working for a 32 bits target.

Christophe
Peter Zijlstra June 16, 2022, 1:57 p.m. UTC | #8
On Thu, Jun 16, 2022 at 01:40:34PM +0000, Christophe Leroy wrote:
> sizeof(u64) is always 8 by definition.
> 
> So if size is 8 we are working on a binary file for a 64 bits target, if 
> not it means we are working for a 32 bits target.

Cross-builds invalidate this I think. Best to look at something like:

  elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32
Christophe Leroy June 16, 2022, 2:06 p.m. UTC | #9
Le 16/06/2022 à 15:57, Peter Zijlstra a écrit :
> On Thu, Jun 16, 2022 at 01:40:34PM +0000, Christophe Leroy wrote:
>> sizeof(u64) is always 8 by definition.
>>
>> So if size is 8 we are working on a binary file for a 64 bits target, if
>> not it means we are working for a 32 bits target.
> 
> Cross-builds invalidate this I think. Best to look at something like:
> 
>    elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32
> 
> 

Yes that's what it does indirectly:

	int size = elf_class_size(elf);


With

static inline int elf_class_size(struct elf *elf)
{
	if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32)
		return sizeof(u32);
	else
		return sizeof(u64);
}
Naveen N. Rao June 17, 2022, 2:04 p.m. UTC | #10
Christophe Leroy wrote:
> 
> 
> Le 16/06/2022 à 15:57, Peter Zijlstra a écrit :
>> On Thu, Jun 16, 2022 at 01:40:34PM +0000, Christophe Leroy wrote:
>>> sizeof(u64) is always 8 by definition.
>>>
>>> So if size is 8 we are working on a binary file for a 64 bits target, if
>>> not it means we are working for a 32 bits target.
>> 
>> Cross-builds invalidate this I think. Best to look at something like:
>> 
>>    elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32
>> 
>> 
> 
> Yes that's what it does indirectly:
> 
> 	int size = elf_class_size(elf);
> 
> 
> With
> 
> static inline int elf_class_size(struct elf *elf)
> {
> 	if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32)
> 		return sizeof(u32);
> 	else
> 		return sizeof(u64);
> }

Ok, those come from the below patch:
https://lore.kernel.org/all/c4b06b5b314183d85615765a5ce421a057674bd8.1653398233.git.christophe.leroy@csgroup.eu/T/#u

I guess it would have been clearer if 'size' was named differently: 
'addr_size' perhaps?


- Naveen
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 732a3f91ee5e..3373d44a1298 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -233,6 +233,7 @@  config PPC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
 	select HAVE_OPTPROBES
 	select HAVE_OBJTOOL			if PPC64
+	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index e3b77a6ce357..ad3d79fffac2 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -40,12 +40,26 @@  int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			    struct list_head *ops_list)
 {
 	u32 insn;
+	unsigned int opcode;
 
 	*immediate = 0;
 	memcpy(&insn, sec->data->d_buf+offset, 4);
 	*len = 4;
 	*type = INSN_OTHER;
 
+	opcode = (insn >> 26);
+
+	switch (opcode) {
+	case 18: /* bl */
+		if ((insn & 3) == 1) {
+			*type = INSN_CALL;
+			*immediate = insn & 0x3fffffc;
+			if (*immediate & 0x2000000)
+				*immediate -= 0x4000000;
+		}
+		break;
+	}
+
 	return 0;
 }
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 056302d58e23..fd8bad092f89 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -832,7 +832,7 @@  static int create_mcount_loc_sections(struct objtool_file *file)
 
 		if (elf_add_reloc_to_insn(file->elf, sec,
 					  idx * sizeof(unsigned long),
-					  R_X86_64_64,
+					  elf_reloc_type_long(file->elf),
 					  insn->sec, insn->offset))
 			return -1;
 
@@ -2183,7 +2183,7 @@  static int classify_symbols(struct objtool_file *file)
 			if (arch_is_retpoline(func))
 				func->retpoline_thunk = true;
 
-			if (!strcmp(func->name, "__fentry__"))
+			if ((!strcmp(func->name, "__fentry__")) || (!strcmp(func->name, "_mcount")))
 				func->fentry = true;
 
 			if (is_profiling_func(func->name))
@@ -2259,9 +2259,11 @@  static int decode_sections(struct objtool_file *file)
 	 * Must be before add_jump_destinations(), which depends on 'func'
 	 * being set for alternatives, to enable proper sibling call detection.
 	 */
-	ret = add_special_section_alts(file);
-	if (ret)
-		return ret;
+	if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
+		ret = add_special_section_alts(file);
+		if (ret)
+			return ret;
+	}
 
 	ret = add_jump_destinations(file);
 	if (ret)
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c25e957c1e52..95763060d551 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -793,6 +793,19 @@  elf_create_section_symbol(struct elf *elf, struct section *sec)
 	return sym;
 }
 
+int elf_reloc_type_long(struct elf *elf)
+{
+	switch (elf->ehdr.e_machine) {
+	case EM_X86_64:
+		return R_X86_64_64;
+	case EM_PPC64:
+		return R_PPC64_ADDR64;
+	default:
+		WARN("unknown machine...");
+		exit(-1);
+	}
+}
+
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
 			  unsigned long offset, unsigned int type,
 			  struct section *insn_sec, unsigned long insn_off)
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index adebfbc2b518..c43e23c0b3c8 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -144,6 +144,7 @@  static inline bool has_multiple_files(struct elf *elf)
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 
+int elf_reloc_type_long(struct elf *elf);
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
 		  unsigned int type, struct symbol *sym, s64 addend);
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,