diff mbox

[U-Boot,07/17] x86: Use fs for global data

Message ID 1325477374-6417-8-git-send-email-graeme.russ@gmail.com
State Superseded
Headers show

Commit Message

Graeme Russ Jan. 2, 2012, 4:09 a.m. UTC
Use the base address of the 'F' segment as a pointer to the global data
structure. By adding the linear address (i.e. the 'D' segment address)
as the first word of the global data structure, the address of the
global data relative to the 'D' segment can be found simply, for
example, by:

	fs movl 0, %eax

This makes the gd 'pointer' writable prior to relocation (by reloading
the GDT) which brings x86 into line with all other arches

NOTE: Writing to the gd 'pointer' is expensive (but we only do it
twice) but using it to access global data members (read and write) is
still fairly cheap

Signed-off-by: Graeme Russ <graeme.russ@gmail.com>
---
 arch/x86/cpu/cpu.c                 |   53 ++++++++++++++++++++--------------
 arch/x86/cpu/start.S               |    8 ++++-
 arch/x86/include/asm/global_data.h |   21 +++++++++----
 arch/x86/include/asm/processor.h   |    6 +++-
 arch/x86/include/asm/u-boot-x86.h  |    2 +
 arch/x86/lib/board.c               |   56 +++++++++++++++++++++++++-----------
 6 files changed, 98 insertions(+), 48 deletions(-)

Comments

Simon Glass Jan. 4, 2012, 5:36 a.m. UTC | #1
Hi Graeme,

On Sun, Jan 1, 2012 at 8:09 PM, Graeme Russ <graeme.russ@gmail.com> wrote:
> Use the base address of the 'F' segment as a pointer to the global data
> structure. By adding the linear address (i.e. the 'D' segment address)
> as the first word of the global data structure, the address of the
> global data relative to the 'D' segment can be found simply, for
> example, by:
>
>        fs movl 0, %eax
>
> This makes the gd 'pointer' writable prior to relocation (by reloading
> the GDT) which brings x86 into line with all other arches

What is the GDT?

>
> NOTE: Writing to the gd 'pointer' is expensive (but we only do it
> twice) but using it to access global data members (read and write) is
> still fairly cheap
>
> Signed-off-by: Graeme Russ <graeme.russ@gmail.com>
> ---
>  arch/x86/cpu/cpu.c                 |   53 ++++++++++++++++++++--------------
>  arch/x86/cpu/start.S               |    8 ++++-
>  arch/x86/include/asm/global_data.h |   21 +++++++++----
>  arch/x86/include/asm/processor.h   |    6 +++-
>  arch/x86/include/asm/u-boot-x86.h  |    2 +
>  arch/x86/lib/board.c               |   56 +++++++++++++++++++++++++-----------
>  6 files changed, 98 insertions(+), 48 deletions(-)
>
> diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
> index bf55c26..e7a5fc0 100644
> --- a/arch/x86/cpu/cpu.c
> +++ b/arch/x86/cpu/cpu.c
> @@ -90,6 +90,37 @@ static void load_gdt(const u64 *boot_gdt, u16 num_entries)
>        asm volatile("lgdtl %0\n" : : "m" (gdt));
>  }
>
> +void init_gd(gd_t *id, u64 *gdt_addr)
> +{
> +       id->gd_addr = (ulong)id;
> +       setup_gdt(id, gdt_addr);
> +}
> +
> +void setup_gdt(gd_t *id, u64 *gdt_addr)

I will probably never understand this function but a comment might be nice.

> +{
> +       /* CS: code, read/execute, 4 GB, base 0 */
> +       gdt_addr[GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
> +
> +       /* DS: data, read/write, 4 GB, base 0 */
> +       gdt_addr[GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
> +
> +       /* FS: data, read/write, 4 GB, base (Global Data Pointer) */
> +       gdt_addr[GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, (ulong)id, 0xfffff);
> +
> +               /* 16-bit CS: code, read/execute, 64 kB, base 0 */

Extra indent?

> +       gdt_addr[GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff);
> +
> +       /* 16-bit DS: data, read/write, 64 kB, base 0 */
> +       gdt_addr[GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff);
> +
> +       load_gdt(gdt_addr, GDT_NUM_ENTRIES);
> +       load_ds(GDT_ENTRY_32BIT_DS);
> +       load_es(GDT_ENTRY_32BIT_DS);
> +       load_gs(GDT_ENTRY_32BIT_DS);
> +       load_ss(GDT_ENTRY_32BIT_DS);
> +       load_fs(GDT_ENTRY_32BIT_FS);
> +}
> +
>  int x86_cpu_init_f(void)
>  {
>        const u32 em_rst = ~X86_CR0_EM;
> @@ -117,28 +148,6 @@ int x86_cpu_init_r(void)
>            "movl       %%eax, %%cr0\n"
>            "wbinvd\n" : : "i" (nw_cd_rst) : "eax");
>
> -       /*
> -        * There are machines which are known to not boot with the GDT
> -        * being 8-byte unaligned. Intel recommends 16 byte alignment
> -        */
> -       static const u64 boot_gdt[] __aligned(16) = {
> -               /* CS: code, read/execute, 4 GB, base 0 */
> -               [GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
> -               /* DS: data, read/write, 4 GB, base 0 */
> -               [GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
> -               /* 16-bit CS: code, read/execute, 64 kB, base 0 */
> -               [GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff),
> -               /* 16-bit DS: data, read/write, 64 kB, base 0 */
> -               [GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff),
> -       };
> -
> -       load_gdt(boot_gdt, GDT_NUM_ENTRIES);
> -       load_ds(GDT_ENTRY_32BIT_DS);
> -       load_es(GDT_ENTRY_32BIT_DS);
> -       load_fs(GDT_ENTRY_32BIT_DS);
> -       load_gs(GDT_ENTRY_32BIT_DS);
> -       load_ss(GDT_ENTRY_32BIT_DS);
> -
>        /* Initialize core interrupt and exception functionality of CPU */
>        cpu_init_interrupts();
>        return 0;
> diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S
> index 9592158..4fb9e6b 100644
> --- a/arch/x86/cpu/start.S
> +++ b/arch/x86/cpu/start.S
> @@ -31,7 +31,7 @@
>  #include <asm/global_data.h>
>  #include <asm/processor.h>
>  #include <asm/processor-flags.h>
> -#include <generated/asm-offsets.h>
> +#include <generated/generic-asm-offsets.h>
>
>  .section .text
>  .code32
> @@ -85,6 +85,12 @@ car_init_ret:
>         */
>        movl    $CONFIG_SYS_INIT_SP_ADDR, %esp
>
> +       /* Initialise the Global Data Pointer */
> +       movl    $CONFIG_SYS_INIT_GD_ADDR, %eax
> +       movl    %eax, %edx
> +       addl    $GENERATED_GBL_DATA_SIZE, %edx
> +       call    init_gd;
> +
>        /* Set parameter to board_init_f() to boot flags */
>        xorl    %eax, %eax
>        movw    %bx, %ax
> diff --git a/arch/x86/include/asm/global_data.h b/arch/x86/include/asm/global_data.h
> index 05a2139..908a02c 100644
> --- a/arch/x86/include/asm/global_data.h
> +++ b/arch/x86/include/asm/global_data.h
> @@ -36,6 +36,8 @@
>  #ifndef __ASSEMBLY__
>
>  typedef        struct global_data {
> +       /* NOTE: gd_addr MUST be first member of struct global_data! */
> +       unsigned long   gd_addr;        /* Location of Global Data */
>        bd_t            *bd;
>        unsigned long   flags;
>        unsigned long   baudrate;
> @@ -51,13 +53,24 @@ typedef     struct global_data {
>        unsigned long   bus_clk;
>        unsigned long   relocaddr;      /* Start address of U-Boot in RAM */
>        unsigned long   start_addr_sp;  /* start_addr_stackpointer */
> +       unsigned long   gdt_addr;       /* Location of GDT */
> +       unsigned long   new_gd_addr;    /* New location of Global Data */
>        phys_size_t     ram_size;       /* RAM size */
>        unsigned long   reset_status;   /* reset status register at boot */
>        void            **jt;           /* jump table */
>        char            env_buf[32];    /* buffer for getenv() before reloc. */
>  } gd_t;
>
> -extern gd_t *gd;
> +static inline gd_t *get_fs_gd_ptr(void)
> +{
> +       gd_t *gd_ptr;
> +
> +       asm volatile("fs movl 0, %0\n" : "=r" (gd_ptr));
> +
> +       return gd_ptr;
> +}
> +
> +#define gd     get_fs_gd_ptr()
>
>  #endif
>
> @@ -73,12 +86,6 @@ extern gd_t *gd;
>  #define GD_FLG_DISABLE_CONSOLE 0x00040 /* Disable console (in & out)           */
>  #define GD_FLG_ENV_READY       0x00080 /* Environment imported into hash table */
>
> -#if 0
>  #define DECLARE_GLOBAL_DATA_PTR
> -#else
> -#define XTRN_DECLARE_GLOBAL_DATA_PTR    extern
> -#define DECLARE_GLOBAL_DATA_PTR     XTRN_DECLARE_GLOBAL_DATA_PTR \
> -gd_t *gd
> -#endif
>
>  #endif /* __ASM_GBL_DATA_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 203c63a..07897f9 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -24,13 +24,17 @@
>  #ifndef __ASM_PROCESSOR_H_
>  #define __ASM_PROCESSOR_H_ 1
>
> +#define GDT_ENTRY_SIZE         8
> +
>  #define GDT_ENTRY_NULL         0
>  #define GDT_ENTRY_UNUSED       (GDT_ENTRY_NULL + 1)
>  #define GDT_ENTRY_32BIT_CS     (GDT_ENTRY_UNUSED + 1)
>  #define GDT_ENTRY_32BIT_DS     (GDT_ENTRY_32BIT_CS + 1)
> -#define GDT_ENTRY_16BIT_CS     (GDT_ENTRY_32BIT_DS + 1)
> +#define GDT_ENTRY_32BIT_FS     (GDT_ENTRY_32BIT_DS + 1)
> +#define GDT_ENTRY_16BIT_CS     (GDT_ENTRY_32BIT_FS + 1)
>  #define GDT_ENTRY_16BIT_DS     (GDT_ENTRY_16BIT_CS + 1)
>
>  #define GDT_NUM_ENTRIES                (GDT_ENTRY_16BIT_DS + 1)
> +#define GDT_SIZE               (GDT_NUM_ENTRIES * GDT_ENTRY_SIZE)

Wasn't this already done in an earlier patch?

>
>  #endif
> diff --git a/arch/x86/include/asm/u-boot-x86.h b/arch/x86/include/asm/u-boot-x86.h
> index c3d2277..5540d51 100644
> --- a/arch/x86/include/asm/u-boot-x86.h
> +++ b/arch/x86/include/asm/u-boot-x86.h
> @@ -37,6 +37,8 @@ int x86_cpu_init_r(void);
>  int cpu_init_r(void);
>  int x86_cpu_init_f(void);
>  int cpu_init_f(void);
> +void init_gd(gd_t *id, u64 *gdt_addr);
> +void setup_gdt(gd_t *id, u64 *gdt_addr);
>
>  /* cpu/.../timer.c */
>  void timer_isr(void *);
> diff --git a/arch/x86/lib/board.c b/arch/x86/lib/board.c
> index bc5027b..9c4ecda 100644
> --- a/arch/x86/lib/board.c
> +++ b/arch/x86/lib/board.c
> @@ -42,20 +42,12 @@
>  #include <serial.h>
>  #include <asm/u-boot-x86.h>
>  #include <elf.h>
> +#include <asm/processor.h>
>
>  #ifdef CONFIG_BITBANGMII
>  #include <miiphy.h>
>  #endif
>
> -/*
> - * Pointer to initial global data area
> - *
> - * Here we initialize it.
> - */
> -#undef XTRN_DECLARE_GLOBAL_DATA_PTR
> -#define XTRN_DECLARE_GLOBAL_DATA_PTR   /* empty = allocate here */
> -DECLARE_GLOBAL_DATA_PTR = (gd_t *) (CONFIG_SYS_INIT_GD_ADDR);
> -
>  /************************************************************************
>  * Init Utilities                                                      *
>  ************************************************************************
> @@ -128,6 +120,7 @@ static int calculate_relocation_address(void);
>  static int copy_uboot_to_ram(void);
>  static int clear_bss(void);
>  static int do_elf_reloc_fixups(void);
> +static int copy_gd_to_ram(void);
>
>  init_fnc_t *init_sequence_f[] = {
>        cpu_init_f,
> @@ -146,6 +139,7 @@ init_fnc_t *init_sequence_f[] = {
>  };
>
>  init_fnc_t *init_sequence_r[] = {
> +       copy_gd_to_ram,
>        cpu_init_r,             /* basic cpu dependent setup */
>        board_early_init_r,     /* basic board dependent setup */
>        dram_init,              /* configure available RAM banks */
> @@ -157,8 +151,6 @@ init_fnc_t *init_sequence_r[] = {
>        NULL,
>  };
>
> -gd_t *gd;
> -
>  static int calculate_relocation_address(void)
>  {
>        ulong text_start = (ulong)&__text_start;
> @@ -171,8 +163,18 @@ static int calculate_relocation_address(void)
>         *       requirements
>         */
>
> -       /* Stack is at top of available memory */
> +       /* Global Data is at top of available memory */
>        dest_addr = gd->ram_size;
> +       dest_addr -= GENERATED_GBL_DATA_SIZE;
> +       dest_addr &= ~15;
> +       gd->new_gd_addr = dest_addr;
> +
> +       /* GDT is below Global Data */
> +       dest_addr -= GDT_SIZE;
> +       dest_addr &= ~15;
> +       gd->gdt_addr = dest_addr;
> +
> +       /* Stack is below GDT */
>        gd->start_addr_sp = dest_addr;
>
>        /* U-Boot is below the stack */
> @@ -279,6 +281,31 @@ void board_init_f_r(void)
>                ;
>  }
>
> +static int copy_gd_to_ram(void)
> +{
> +       gd_t *ram_gd;
> +
> +       /*
> +        * Global data is still in temporary memory (the CPU cache).
> +        * calculate_relocation_address() has set gd->new_gd_addr to
> +        * where the global data lives in RAM but getting it there
> +        * safely is a bit tricky due to the 'F-Segment Hack' that
> +        * we need to use for x86
> +        */
> +       ram_gd = (gd_t *)gd->new_gd_addr;
> +       memcpy((void *)ram_gd, gd, sizeof(gd_t));
> +
> +       /*
> +        * Reload the Global Descriptor Table so FS points to the
> +        * in-RAM copy of Global Data (calculate_relocation_address()
> +        * has already calculated the in-RAM location of the GDT)
> +        */
> +       ram_gd->gd_addr = (ulong)ram_gd;
> +       init_gd(ram_gd, (u64 *)gd->gdt_addr);
> +
> +       return 0;
> +}
> +
>  void board_init_r(gd_t *id, ulong dest_addr)
>  {
>  #if defined(CONFIG_CMD_NET)
> @@ -288,15 +315,10 @@ void board_init_r(gd_t *id, ulong dest_addr)
>        ulong size;
>  #endif
>        static bd_t bd_data;
> -       static gd_t gd_data;
>        init_fnc_t **init_fnc_ptr;
>
>        show_boot_progress(0x21);
>
> -       /* Global data pointer is now writable */
> -       gd = &gd_data;
> -       memcpy(gd, id, sizeof(gd_t));
> -
>        /* compiler optimization barrier needed for GCC >= 3.4 */
>        __asm__ __volatile__("" : : : "memory");
>
> --
> 1.7.5.2.317.g391b14
>
> _______________________________________________
> U-Boot mailing list
> U-Boot@lists.denx.de
> http://lists.denx.de/mailman/listinfo/u-boot

Regards,
Simon
Graeme Russ Jan. 4, 2012, 11:14 a.m. UTC | #2
Hi Simon,

On 04/01/12 16:36, Simon Glass wrote:
> Hi Graeme,
> 
> On Sun, Jan 1, 2012 at 8:09 PM, Graeme Russ <graeme.russ@gmail.com> wrote:
>> Use the base address of the 'F' segment as a pointer to the global data
>> structure. By adding the linear address (i.e. the 'D' segment address)
>> as the first word of the global data structure, the address of the
>> global data relative to the 'D' segment can be found simply, for
>> example, by:
>>
>>        fs movl 0, %eax
>>
>> This makes the gd 'pointer' writable prior to relocation (by reloading
>> the GDT) which brings x86 into line with all other arches
> 
> What is the GDT?

Global Descriptor Table - It's a kind of lookup table which the x86 CPU
uses to calculate physical addresses relative to 'segments' - By placing
the start of the 'F' segment to the physical address of the global data
structure, and adding a self-referencing physical address to the global
data structure (as the first member), reading the first word of the 'F'
segment provides the physical address of the global data

>>
>> NOTE: Writing to the gd 'pointer' is expensive (but we only do it
>> twice) but using it to access global data members (read and write) is
>> still fairly cheap
>>
>> Signed-off-by: Graeme Russ <graeme.russ@gmail.com>
>> ---
>>  arch/x86/cpu/cpu.c                 |   53 ++++++++++++++++++++--------------
>>  arch/x86/cpu/start.S               |    8 ++++-
>>  arch/x86/include/asm/global_data.h |   21 +++++++++----
>>  arch/x86/include/asm/processor.h   |    6 +++-
>>  arch/x86/include/asm/u-boot-x86.h  |    2 +
>>  arch/x86/lib/board.c               |   56 +++++++++++++++++++++++++-----------
>>  6 files changed, 98 insertions(+), 48 deletions(-)
>>
>> diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
>> index bf55c26..e7a5fc0 100644
>> --- a/arch/x86/cpu/cpu.c
>> +++ b/arch/x86/cpu/cpu.c
>> @@ -90,6 +90,37 @@ static void load_gdt(const u64 *boot_gdt, u16 num_entries)
>>        asm volatile("lgdtl %0\n" : : "m" (gdt));
>>  }
>>
>> +void init_gd(gd_t *id, u64 *gdt_addr)
>> +{
>> +       id->gd_addr = (ulong)id;
>> +       setup_gdt(id, gdt_addr);
>> +}
>> +
>> +void setup_gdt(gd_t *id, u64 *gdt_addr)
> 
> I will probably never understand this function but a comment might be nice.

Hmm, I guess I really don't know how to comment this to explain it clearly
- The GDT is a bit of a magical beast which you kind of have to learn by
osmosis ;)

> 
>> +{
>> +       /* CS: code, read/execute, 4 GB, base 0 */
>> +       gdt_addr[GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
>> +
>> +       /* DS: data, read/write, 4 GB, base 0 */
>> +       gdt_addr[GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
>> +
>> +       /* FS: data, read/write, 4 GB, base (Global Data Pointer) */
>> +       gdt_addr[GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, (ulong)id, 0xfffff);
>> +
>> +               /* 16-bit CS: code, read/execute, 64 kB, base 0 */
> 
> Extra indent?

Removed

[snip]

>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>> index 203c63a..07897f9 100644
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -24,13 +24,17 @@
>>  #ifndef __ASM_PROCESSOR_H_
>>  #define __ASM_PROCESSOR_H_ 1
>>
>> +#define GDT_ENTRY_SIZE         8
>> +
>>  #define GDT_ENTRY_NULL         0
>>  #define GDT_ENTRY_UNUSED       (GDT_ENTRY_NULL + 1)
>>  #define GDT_ENTRY_32BIT_CS     (GDT_ENTRY_UNUSED + 1)
>>  #define GDT_ENTRY_32BIT_DS     (GDT_ENTRY_32BIT_CS + 1)
>> -#define GDT_ENTRY_16BIT_CS     (GDT_ENTRY_32BIT_DS + 1)
>> +#define GDT_ENTRY_32BIT_FS     (GDT_ENTRY_32BIT_DS + 1)
>> +#define GDT_ENTRY_16BIT_CS     (GDT_ENTRY_32BIT_FS + 1)
>>  #define GDT_ENTRY_16BIT_DS     (GDT_ENTRY_16BIT_CS + 1)
>>
>>  #define GDT_NUM_ENTRIES                (GDT_ENTRY_16BIT_DS + 1)
>> +#define GDT_SIZE               (GDT_NUM_ENTRIES * GDT_ENTRY_SIZE)
> 
> Wasn't this already done in an earlier patch?

I've moved this change into patch #3

Regards,

Graeme
diff mbox

Patch

diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
index bf55c26..e7a5fc0 100644
--- a/arch/x86/cpu/cpu.c
+++ b/arch/x86/cpu/cpu.c
@@ -90,6 +90,37 @@  static void load_gdt(const u64 *boot_gdt, u16 num_entries)
 	asm volatile("lgdtl %0\n" : : "m" (gdt));
 }
 
+void init_gd(gd_t *id, u64 *gdt_addr)
+{
+	id->gd_addr = (ulong)id;
+	setup_gdt(id, gdt_addr);
+}
+
+void setup_gdt(gd_t *id, u64 *gdt_addr)
+{
+	/* CS: code, read/execute, 4 GB, base 0 */
+	gdt_addr[GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
+
+	/* DS: data, read/write, 4 GB, base 0 */
+	gdt_addr[GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
+
+	/* FS: data, read/write, 4 GB, base (Global Data Pointer) */
+	gdt_addr[GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, (ulong)id, 0xfffff);
+
+		/* 16-bit CS: code, read/execute, 64 kB, base 0 */
+	gdt_addr[GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff);
+
+	/* 16-bit DS: data, read/write, 64 kB, base 0 */
+	gdt_addr[GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff);
+
+	load_gdt(gdt_addr, GDT_NUM_ENTRIES);
+	load_ds(GDT_ENTRY_32BIT_DS);
+	load_es(GDT_ENTRY_32BIT_DS);
+	load_gs(GDT_ENTRY_32BIT_DS);
+	load_ss(GDT_ENTRY_32BIT_DS);
+	load_fs(GDT_ENTRY_32BIT_FS);
+}
+
 int x86_cpu_init_f(void)
 {
 	const u32 em_rst = ~X86_CR0_EM;
@@ -117,28 +148,6 @@  int x86_cpu_init_r(void)
 	    "movl	%%eax, %%cr0\n"
 	    "wbinvd\n" : : "i" (nw_cd_rst) : "eax");
 
-	/*
-	 * There are machines which are known to not boot with the GDT
-	 * being 8-byte unaligned. Intel recommends 16 byte alignment
-	 */
-	static const u64 boot_gdt[] __aligned(16) = {
-		/* CS: code, read/execute, 4 GB, base 0 */
-		[GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
-		/* DS: data, read/write, 4 GB, base 0 */
-		[GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
-		/* 16-bit CS: code, read/execute, 64 kB, base 0 */
-		[GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff),
-		/* 16-bit DS: data, read/write, 64 kB, base 0 */
-		[GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff),
-	};
-
-	load_gdt(boot_gdt, GDT_NUM_ENTRIES);
-	load_ds(GDT_ENTRY_32BIT_DS);
-	load_es(GDT_ENTRY_32BIT_DS);
-	load_fs(GDT_ENTRY_32BIT_DS);
-	load_gs(GDT_ENTRY_32BIT_DS);
-	load_ss(GDT_ENTRY_32BIT_DS);
-
 	/* Initialize core interrupt and exception functionality of CPU */
 	cpu_init_interrupts();
 	return 0;
diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S
index 9592158..4fb9e6b 100644
--- a/arch/x86/cpu/start.S
+++ b/arch/x86/cpu/start.S
@@ -31,7 +31,7 @@ 
 #include <asm/global_data.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
-#include <generated/asm-offsets.h>
+#include <generated/generic-asm-offsets.h>
 
 .section .text
 .code32
@@ -85,6 +85,12 @@  car_init_ret:
 	 */
 	movl	$CONFIG_SYS_INIT_SP_ADDR, %esp
 
+	/* Initialise the Global Data Pointer */
+	movl	$CONFIG_SYS_INIT_GD_ADDR, %eax
+	movl	%eax, %edx
+	addl	$GENERATED_GBL_DATA_SIZE, %edx
+	call	init_gd;
+
 	/* Set parameter to board_init_f() to boot flags */
 	xorl	%eax, %eax
 	movw	%bx, %ax
diff --git a/arch/x86/include/asm/global_data.h b/arch/x86/include/asm/global_data.h
index 05a2139..908a02c 100644
--- a/arch/x86/include/asm/global_data.h
+++ b/arch/x86/include/asm/global_data.h
@@ -36,6 +36,8 @@ 
 #ifndef __ASSEMBLY__
 
 typedef	struct global_data {
+	/* NOTE: gd_addr MUST be first member of struct global_data! */
+	unsigned long	gd_addr;	/* Location of Global Data */
 	bd_t		*bd;
 	unsigned long	flags;
 	unsigned long	baudrate;
@@ -51,13 +53,24 @@  typedef	struct global_data {
 	unsigned long	bus_clk;
 	unsigned long	relocaddr;	/* Start address of U-Boot in RAM */
 	unsigned long	start_addr_sp;	/* start_addr_stackpointer */
+	unsigned long	gdt_addr;	/* Location of GDT */
+	unsigned long	new_gd_addr;	/* New location of Global Data */
 	phys_size_t	ram_size;	/* RAM size */
 	unsigned long	reset_status;	/* reset status register at boot */
 	void		**jt;		/* jump table */
 	char		env_buf[32];	/* buffer for getenv() before reloc. */
 } gd_t;
 
-extern gd_t *gd;
+static inline gd_t *get_fs_gd_ptr(void)
+{
+	gd_t *gd_ptr;
+
+	asm volatile("fs movl 0, %0\n" : "=r" (gd_ptr));
+
+	return gd_ptr;
+}
+
+#define gd	get_fs_gd_ptr()
 
 #endif
 
@@ -73,12 +86,6 @@  extern gd_t *gd;
 #define GD_FLG_DISABLE_CONSOLE	0x00040	/* Disable console (in & out)		*/
 #define GD_FLG_ENV_READY	0x00080	/* Environment imported into hash table	*/
 
-#if 0
 #define DECLARE_GLOBAL_DATA_PTR
-#else
-#define XTRN_DECLARE_GLOBAL_DATA_PTR    extern
-#define DECLARE_GLOBAL_DATA_PTR     XTRN_DECLARE_GLOBAL_DATA_PTR \
-gd_t *gd
-#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 203c63a..07897f9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -24,13 +24,17 @@ 
 #ifndef __ASM_PROCESSOR_H_
 #define __ASM_PROCESSOR_H_ 1
 
+#define GDT_ENTRY_SIZE		8
+
 #define GDT_ENTRY_NULL		0
 #define GDT_ENTRY_UNUSED	(GDT_ENTRY_NULL + 1)
 #define GDT_ENTRY_32BIT_CS	(GDT_ENTRY_UNUSED + 1)
 #define GDT_ENTRY_32BIT_DS	(GDT_ENTRY_32BIT_CS + 1)
-#define GDT_ENTRY_16BIT_CS	(GDT_ENTRY_32BIT_DS + 1)
+#define GDT_ENTRY_32BIT_FS	(GDT_ENTRY_32BIT_DS + 1)
+#define GDT_ENTRY_16BIT_CS	(GDT_ENTRY_32BIT_FS + 1)
 #define GDT_ENTRY_16BIT_DS	(GDT_ENTRY_16BIT_CS + 1)
 
 #define GDT_NUM_ENTRIES		(GDT_ENTRY_16BIT_DS + 1)
+#define GDT_SIZE		(GDT_NUM_ENTRIES * GDT_ENTRY_SIZE)
 
 #endif
diff --git a/arch/x86/include/asm/u-boot-x86.h b/arch/x86/include/asm/u-boot-x86.h
index c3d2277..5540d51 100644
--- a/arch/x86/include/asm/u-boot-x86.h
+++ b/arch/x86/include/asm/u-boot-x86.h
@@ -37,6 +37,8 @@  int x86_cpu_init_r(void);
 int cpu_init_r(void);
 int x86_cpu_init_f(void);
 int cpu_init_f(void);
+void init_gd(gd_t *id, u64 *gdt_addr);
+void setup_gdt(gd_t *id, u64 *gdt_addr);
 
 /* cpu/.../timer.c */
 void timer_isr(void *);
diff --git a/arch/x86/lib/board.c b/arch/x86/lib/board.c
index bc5027b..9c4ecda 100644
--- a/arch/x86/lib/board.c
+++ b/arch/x86/lib/board.c
@@ -42,20 +42,12 @@ 
 #include <serial.h>
 #include <asm/u-boot-x86.h>
 #include <elf.h>
+#include <asm/processor.h>
 
 #ifdef CONFIG_BITBANGMII
 #include <miiphy.h>
 #endif
 
-/*
- * Pointer to initial global data area
- *
- * Here we initialize it.
- */
-#undef	XTRN_DECLARE_GLOBAL_DATA_PTR
-#define XTRN_DECLARE_GLOBAL_DATA_PTR	/* empty = allocate here */
-DECLARE_GLOBAL_DATA_PTR = (gd_t *) (CONFIG_SYS_INIT_GD_ADDR);
-
 /************************************************************************
  * Init Utilities							*
  ************************************************************************
@@ -128,6 +120,7 @@  static int calculate_relocation_address(void);
 static int copy_uboot_to_ram(void);
 static int clear_bss(void);
 static int do_elf_reloc_fixups(void);
+static int copy_gd_to_ram(void);
 
 init_fnc_t *init_sequence_f[] = {
 	cpu_init_f,
@@ -146,6 +139,7 @@  init_fnc_t *init_sequence_f[] = {
 };
 
 init_fnc_t *init_sequence_r[] = {
+	copy_gd_to_ram,
 	cpu_init_r,		/* basic cpu dependent setup */
 	board_early_init_r,	/* basic board dependent setup */
 	dram_init,		/* configure available RAM banks */
@@ -157,8 +151,6 @@  init_fnc_t *init_sequence_r[] = {
 	NULL,
 };
 
-gd_t *gd;
-
 static int calculate_relocation_address(void)
 {
 	ulong text_start = (ulong)&__text_start;
@@ -171,8 +163,18 @@  static int calculate_relocation_address(void)
 	 *       requirements
 	 */
 
-	/* Stack is at top of available memory */
+	/* Global Data is at top of available memory */
 	dest_addr = gd->ram_size;
+	dest_addr -= GENERATED_GBL_DATA_SIZE;
+	dest_addr &= ~15;
+	gd->new_gd_addr = dest_addr;
+
+	/* GDT is below Global Data */
+	dest_addr -= GDT_SIZE;
+	dest_addr &= ~15;
+	gd->gdt_addr = dest_addr;
+
+	/* Stack is below GDT */
 	gd->start_addr_sp = dest_addr;
 
 	/* U-Boot is below the stack */
@@ -279,6 +281,31 @@  void board_init_f_r(void)
 		;
 }
 
+static int copy_gd_to_ram(void)
+{
+	gd_t *ram_gd;
+
+	/*
+	 * Global data is still in temporary memory (the CPU cache).
+	 * calculate_relocation_address() has set gd->new_gd_addr to
+	 * where the global data lives in RAM but getting it there
+	 * safely is a bit tricky due to the 'F-Segment Hack' that
+	 * we need to use for x86
+	 */
+	ram_gd = (gd_t *)gd->new_gd_addr;
+	memcpy((void *)ram_gd, gd, sizeof(gd_t));
+
+	/*
+	 * Reload the Global Descriptor Table so FS points to the
+	 * in-RAM copy of Global Data (calculate_relocation_address()
+	 * has already calculated the in-RAM location of the GDT)
+	 */
+	ram_gd->gd_addr = (ulong)ram_gd;
+	init_gd(ram_gd, (u64 *)gd->gdt_addr);
+
+	return 0;
+}
+
 void board_init_r(gd_t *id, ulong dest_addr)
 {
 #if defined(CONFIG_CMD_NET)
@@ -288,15 +315,10 @@  void board_init_r(gd_t *id, ulong dest_addr)
 	ulong size;
 #endif
 	static bd_t bd_data;
-	static gd_t gd_data;
 	init_fnc_t **init_fnc_ptr;
 
 	show_boot_progress(0x21);
 
-	/* Global data pointer is now writable */
-	gd = &gd_data;
-	memcpy(gd, id, sizeof(gd_t));
-
 	/* compiler optimization barrier needed for GCC >= 3.4 */
 	__asm__ __volatile__("" : : : "memory");