diff mbox

[SH] PR 50751 - add HImode displacement addressing support

Message ID 1334017875.19154.144.camel@yam-132-YW-E178-FTW
State New
Headers show

Commit Message

Oleg Endo April 10, 2012, 12:31 a.m. UTC
Hello,

The attached patch adds HImode addressing support.
Tested against rev. 186243 with
sudo make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
-m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"

and no new failures.
Test cases will follow soon.

Cheers,
Oleg

ChangeLog:

	PR target/50751
	* config/sh/sh-protos.h (sh_legitimate_index_p): Add
	new arguments consider_sh2a and allow_zero.
	* config/sh/sh.c (sh_legitimate_index_p): Likewise.
	(disp_addr_displacement): New function.
	(sh_address_cost): Use disp_addr_displacement function instead
	of DISP_ADDR_OFFSET.
	(sh_legitimate_address_p): Adapt to changed
	sh_legitimate_index_p declaration.
	(sh_find_mov_disp_adjust): Remove HImode check.
	(sh_secondary_reload): Add HImode case.  Use 
	satisfies_constraint_Sdd, disp_addr_displacement and
	max_mov_insn_displacement.
	(max_mov_insn_displacement): Remove HImode check.
	* config/sh/sh.h (CONST_OK_FOR_K04, CONST_OK_FOR_K12, 
	DISP_ADDR_P, DISP_ADDR_OFFSET): Remove.
	* config/sh/constraints.md (K05, K13): New constraints.
	(K12): Correct comment.
	(Sdd): Do not use DISP_ADDR_P macro.
	(Snd): Use satisfies_constraint_Sdd.
	(Sbw): Likewise.
	* config/sh/sh.md (extendhisi2): Remove constraints from 	
	expander.
	(*extendhisi2_compact, movhi_i): Remove.
	(*extendhisi2_compact_reg, *extendhisi2_compact_mem_disp,
	*extendhisi2_compact_mem_disp, *extendhisi2_compact_snd,
	*movhi_reg_reg, *movhi_store_mem_disp05,
	*movhi_store_mem_disp13, *movhi_load_mem_disp, 
	*movhi_load_mem_disp, *movhi): New insns.
	(*extendqisi2_compact_mem_disp, *extendqisi2_compact_mem_disp,
	*movqi_store_mem_disp04, *movqi_store_mem_disp12,
	*movqi_load_mem_disp, *movqi_load_mem_disp): Use 
	sh_legitimate_index_p instead of CONST_OK_FOR_Kxx.
	Add new peepholes for HImode displacement addressing.

Comments

Kaz Kojima April 10, 2012, 1:42 p.m. UTC | #1
Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch adds HImode addressing support.
> Tested against rev. 186243 with
> sudo make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
> -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"
> 
> and no new failures.
> Test cases will follow soon.

The patch is OK for trunk.

BTW, do you have the numbers of CSiBE with this?

Regards,
	kaz
Oleg Endo April 10, 2012, 2:08 p.m. UTC | #2
On Tue, 2012-04-10 at 22:42 +0900, Kaz Kojima wrote:
> Oleg Endo <oleg.endo@t-online.de> wrote:
> > The attached patch adds HImode addressing support.
> > Tested against rev. 186243 with
> > sudo make -k check RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
> > -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"
> > 
> > and no new failures.
> > Test cases will follow soon.
> 
> The patch is OK for trunk.
> 
> BTW, do you have the numbers of CSiBE with this?
> 

Only for "-m4-single -ml -O2 -mpretend-cmove" so far.
Not so spectacular :T
I'll also do a comparison of more variants to see if something went
really bad.  It's a bit difficult to isolate the degradations because
there's quite some code reordering happening after the patch... 

Cheers,
Oleg
OpenTCP-1.0.4
  arp                     2089 -> 2061         -28 / -1.340354 %
  bootp/bootp              740 -> 704          -36 / -4.864865 %
  demo/main_demo           372 -> 368           -4 / -1.075269 %
  demo/tcp_client_demo     396 -> 396     
  demo/tcp_server_demo     468 -> 468     
  demo/udp_demo            268 -> 268     
  dhcp/dhcpc              1588 -> 1588    
  dns/dns                 1340 -> 1328         -12 / -0.895522 %
  ethernet                1240 -> 1228         -12 / -0.967742 %
  http/http_server        1784 -> 1764         -20 / -1.121076 %
  http/https_callbacks     656 -> 656     
  icmp                     348 -> 352           +4 / +1.149425 %
  ip                      1780 -> 1668        -112 / -6.292135 %
  pop3/pop3_client        3796 -> 3728         -68 / -1.791359 %
  pop3/pop3c_callbacks      28 -> 28      
  smtp/smtp_client        2284 -> 2212         -72 / -3.152364 %
  smtp/smtpc_callbacks      32 -> 32      
  system                   980 -> 980     
  tcp                     4966 -> 4970          +4 / +0.080548 %
  tftp/tftps              1024 -> 920         -104 / -10.156250 %
  timers                   228 -> 228     
  udp                     1362 -> 1322         -40 / -2.936858 %

     total:     27769 -> 27269      -500 / -1.800569 %


bzip2-1.0.2
  blocksort       7262 -> 7258          -4 / -0.055081 %
  bzip2          17288 -> 17248        -40 / -0.231374 %
  bzip2recover    3868 -> 3868    
  bzlib          10668 -> 10636        -32 / -0.299963 %
  compress       14208 -> 14192        -16 / -0.112613 %
  crctable        1024 -> 1024    
  decompress      8380 -> 8380    
  huffman         1436 -> 1436    
  randtable       2048 -> 2048    

     total:     66182 -> 66090      -92 / -0.139011 %


cg_compiler_opensrc
  atom            3728 -> 3728    
  binding         1752 -> 1728         -24 / -1.369863 %
  cgcmain         2388 -> 2388    
  cgstruct         128 -> 128     
  check           3300 -> 3300    
  compile        14052 -> 14052   
  constfold       3996 -> 3996    
  cpp             6776 -> 6768          -8 / -0.118064 %
  generic_hal     2516 -> 2516    
  hal             2720 -> 2720    
  inline          2724 -> 2724    
  memory           548 -> 548     
  parser         15796 -> 15796   
  printutils     11888 -> 11888   
  scanner         6716 -> 6672         -44 / -0.655152 %
  semantic        4364 -> 4364    
  stdlib          3224 -> 3224    
  support        24328 -> 24332         +4 / +0.016442 %
  support_iter    1832 -> 1832    
  symbols         6324 -> 6312         -12 / -0.189753 %
  tokenize         624 -> 624     
  tokens          3112 -> 3120          +8 / +0.257069 %

     total:    122836 -> 122760     -76 / -0.061871 %


compiler
  cg         2868 -> 2868    
  main       2600 -> 2600    
  parser     4524 -> 4528          +4 / +0.088417 %
  scanner    3788 -> 3788    
  vam        5264 -> 5264    
  vas        6292 -> 6300          +8 / +0.127146 %

     total:     25336 -> 25348      +12 / +0.047363 %


flex-2.5.31
  buf              1100 -> 1100    
  ccl              1220 -> 1220    
  dfa              6952 -> 6952    
  ecs               612 -> 612     
  filter           2360 -> 2360    
  gen             24512 -> 24488        -24 / -0.097911 %
  libmain            28 -> 28      
  libyywrap           4 -> 4       
  main            19804 -> 19792        -12 / -0.060594 %
  misc             6480 -> 6480    
  nfa              3812 -> 3812    
  options          2768 -> 2768    
  parse            9524 -> 9528          +4 / +0.041999 %
  regex             596 -> 596     
  scan            47012 -> 47004         -8 / -0.017017 %
  scanopt          3672 -> 3672    
  skel            96652 -> 96652   
  sym              1012 -> 1012    
  tables           2080 -> 2064         -16 / -0.769231 %
  tables_shared      28 -> 28      
  tblcmp           3764 -> 3764    
  yylex            1292 -> 1292    

     total:    235284 -> 235228     -56 / -0.023801 %


jikespg-1.3
  src/ctabs      49912 -> 49876        -36 / -0.072127 %
  src/globals      288 -> 288     
  src/lpgparse   47340 -> 47036       -304 / -0.642163 %
  src/lpgutil     5152 -> 5120         -32 / -0.621118 %
  src/main        5440 -> 5440    
  src/mkfirst    15036 -> 14948        -88 / -0.585262 %
  src/mkred       7316 -> 7264         -52 / -0.710771 %
  src/mkstates    2980 -> 2968         -12 / -0.402685 %
  src/partset     1540 -> 1540    
  src/prntstat    3672 -> 3624         -48 / -1.307190 %
  src/produce    11856 -> 11820        -36 / -0.303644 %
  src/ptables     3384 -> 3344         -40 / -1.182033 %
  src/remsp       8328 -> 8136        -192 / -2.305476 %
  src/resolve    12472 -> 12284       -188 / -1.507377 %
  src/spacetab   20228 -> 20160        -68 / -0.336168 %
  src/tabutil    13152 -> 13272       +120 / +0.912409 %
  src/timetab     9428 -> 9376         -52 / -0.551549 %

     total:    217524 -> 216496     -1028 / -0.472592 %


jpeg-6b
  ansi2knr    2920 -> 2920    
  cdjpeg       120 -> 120     
  cjpeg       5780 -> 5780    
  djpeg       6168 -> 6168    
  jcapimin     812 -> 812     
  jcapistd     500 -> 500     
  jccoefct    2056 -> 2056    
  jccolor     1584 -> 1584    
  jcdctmgr    1552 -> 1552    
  jchuff      4484 -> 4484    
  jcinit       208 -> 208     
  jcmainct     424 -> 424     
  jcmarker    2132 -> 2128          -4 / -0.187617 %
  jcmaster    2712 -> 2712    
  jcomapi      144 -> 144     
  jcparam     3080 -> 3080    
  jcphuff     5128 -> 5128    
  jcprepct    1332 -> 1332    
  jcsample    2244 -> 2244    
  jctrans     1552 -> 1552    
  jdapimin    1144 -> 1144    
  jdapistd    1072 -> 1072    
  jdatadst     300 -> 296           -4 / -1.333333 %
  jdatasrc     296 -> 296     
  jdcoefct    3756 -> 3756    
  jdcolor     1336 -> 1336    
  jddctmgr     832 -> 832     
  jdhuff      2736 -> 2736    
  jdinput     1288 -> 1288    
  jdmainct    1540 -> 1540    
  jdmarker    6364 -> 6344         -20 / -0.314268 %
  jdmaster    1892 -> 1892    
  jdmerge     1268 -> 1268    
  jdphuff     3128 -> 3128    
  jdpostct     804 -> 804     
  jdsample    1660 -> 1660    
  jdtrans      372 -> 372     
  jerror      5668 -> 5668    
  jfdctflt     416 -> 416     
  jfdctfst     440 -> 440     
  jfdctint     596 -> 596     
  jidctflt     868 -> 868     
  jidctfst     916 -> 916     
  jidctint    1136 -> 1136    
  jidctred    1296 -> 1292          -4 / -0.308642 %
  jmemmgr     3392 -> 3392    
  jmemnobs      72 -> 72      
  jpegtran    3956 -> 3956    
  jquant1     3036 -> 3036    
  jquant2     4400 -> 4400    
  jutils       472 -> 472     
  rdbmp       2236 -> 2236    
  rdcolmap    1424 -> 1424    
  rdgif         96 -> 96      
  rdjpgcom    2616 -> 2616    
  rdppm       1732 -> 1732    
  rdrle          0 -> 0       
  rdswitch    2092 -> 2092    
  rdtarga     1896 -> 1896    
  transupp    4540 -> 4436        -104 / -2.290749 %
  wrbmp       2928 -> 2928    
  wrgif       3100 -> 3096          -4 / -0.129032 %
  wrjpgcom    3644 -> 3644    
  wrppm        612 -> 612     
  wrrle          0 -> 0       
  wrtarga     1020 -> 1028          +8 / +0.784314 %

     total:    129320 -> 129188     -132 / -0.102072 %


libmspack
  mspack/cabc               8 -> 8       
  mspack/cabd            5912 -> 5920          +8 / +0.135318 %
  mspack/chmc               8 -> 8       
  mspack/chmd            3732 -> 3732    
  mspack/hlpc               8 -> 8       
  mspack/hlpd               8 -> 8       
  mspack/kwajc              8 -> 8       
  mspack/kwajd              8 -> 8       
  mspack/litc               8 -> 8       
  mspack/litd               8 -> 8       
  mspack/lzxc               0 -> 0       
  mspack/lzxd            6212 -> 6212    
  mspack/mszipc             0 -> 0       
  mspack/mszipd            24 -> 24      
  mspack/qtmc               0 -> 0       
  mspack/qtmd            5480 -> 5280        -200 / -3.649635 %
  mspack/system           808 -> 808     
  mspack/szddc              8 -> 8       
  mspack/szddd              8 -> 8       
  test/cabd_md5          1272 -> 1272    
  test/cabd_test        10156 -> 10152         -4 / -0.039386 %
  test/cabextract_md5   24284 -> 24144       -140 / -0.576511 %
  test/cabrip             528 -> 528     
  test/chmd_md5          1144 -> 1144    
  test/md5               3108 -> 3108    

     total:     62740 -> 62404      -336 / -0.535544 %


libpng-1.2.5
  png         3060 -> 3056          -4 / -0.130719 %
  png2pnm     2828 -> 2828    
  pngerror     968 -> 968     
  pngget      2680 -> 2676          -4 / -0.149254 %
  pngmem       596 -> 596     
  pngpread    6676 -> 6676    
  pngread     7560 -> 7560    
  pngrio       340 -> 340     
  pngrtran   20524 -> 20020       -504 / -2.455662 %
  pngrutil   17156 -> 17152         -4 / -0.023315 %
  pngset      6592 -> 6576         -16 / -0.242718 %
  pngtest     6984 -> 6984    
  pngtrans    1972 -> 1972    
  pngwio       396 -> 396     
  pngwrite    7376 -> 7360         -16 / -0.216920 %
  pngwtran    1708 -> 1708    
  pngwutil   14356 -> 14288        -68 / -0.473670 %
  pnm2png     3008 -> 3008    

     total:    104780 -> 104164     -616 / -0.587898 %


linux-2.4.23-pre3-testplatform
  arch/testplatform/kernel/bitmap         12 -> 12      
  arch/testplatform/kernel/cpcmd           4 -> 4       
  arch/testplatform/kernel/debug        4012 -> 4012    
  arch/testplatform/kernel/ebcdic       1536 -> 1536    
  arch/testplatform/kernel/entry          32 -> 32      
  arch/testplatform/kernel/head            4 -> 4       
  arch/testplatform/kernel/init_task   16384 -> 16384   
  arch/testplatform/kernel/irq            28 -> 28      
  arch/testplatform/kernel/needed         28 -> 28      
  arch/testplatform/kernel/process      1068 -> 1068    
  arch/testplatform/kernel/ptrace       1504 -> 1504    
  arch/testplatform/kernel/reipl           4 -> 4       
  arch/testplatform/kernel/s390_ext      708 -> 708     
  arch/testplatform/kernel/s390fpu        20 -> 20      
  arch/testplatform/kernel/semaphore     252 -> 252     
  arch/testplatform/kernel/setup        2724 -> 2712         -12 / -0.440529 %
  arch/testplatform/kernel/signal       3772 -> 3772    
  arch/testplatform/kernel/sys_s390     1116 -> 1116    
  arch/testplatform/kernel/time           20 -> 20      
  arch/testplatform/kernel/traps        6540 -> 6536          -4 / -0.061162 %
  arch/testplatform/lib/checksum           4 -> 4       
  arch/testplatform/lib/delay              8 -> 8       
  arch/testplatform/lib/memcpy             4 -> 4       
  arch/testplatform/lib/memset             4 -> 4       
  arch/testplatform/lib/misaligned       156 -> 156     
  arch/testplatform/lib/strcmp             4 -> 4       
  arch/testplatform/lib/strncpy            4 -> 4       
  arch/testplatform/lib/uaccess           12 -> 12      
  arch/testplatform/mm/extable           116 -> 116     
  arch/testplatform/mm/fault            3500 -> 3500    
  arch/testplatform/mm/init             2756 -> 2756    
  arch/testplatform/mm/ioremap           716 -> 716     
  drivers/block/blkpg                   1152 -> 1152    
  drivers/block/elevator                 672 -> 636          -36 / -5.357143 %
  drivers/block/genhd                    288 -> 288     
  drivers/block/ll_rw_blk               6820 -> 6728         -92 / -1.348974 %
  drivers/char/mem                      2488 -> 2488    
  drivers/char/misc                      908 -> 908     
  drivers/char/n_tty                    5156 -> 5172         +16 / +0.310318 %
  drivers/char/pty                      1732 -> 1696         -36 / -2.078522 %
  drivers/char/random                   5384 -> 5388          +4 / +0.074294 %
  drivers/char/raw                      1556 -> 1552          -4 / -0.257069 %
  drivers/char/tty_io                  11156 -> 11068        -88 / -0.788813 %
  drivers/char/tty_ioctl                1896 -> 1884         -12 / -0.632911 %
  drivers/net/Space                     3404 -> 3404    
  drivers/net/auto_irq                    64 -> 64      
  drivers/net/loopback                   364 -> 356           -8 / -2.197802 %
  drivers/net/net_init                   776 -> 772           -4 / -0.515464 %
  drivers/net/setup                       44 -> 44      
  fs/attr                                776 -> 772           -4 / -0.515464 %
  fs/bad_inode                           232 -> 232     
  fs/binfmt_elf                            0 -> 0       
  fs/binfmt_script                       532 -> 532     
  fs/block_dev                          3620 -> 3608         -12 / -0.331492 %
  fs/buffer                            11872 -> 11776        -96 / -0.808625 %
  fs/char_dev                            400 -> 400     
  fs/dcache                             4652 -> 4656          +4 / +0.085985 %
  fs/devices                             984 -> 988           +4 / +0.406504 %
  fs/dnotify                             600 -> 608           +8 / +1.333333 %
  fs/exec                               4288 -> 4292          +4 / +0.093284 %
  fs/ext3/balloc                        4280 -> 4280    
  fs/ext3/bitmap                         140 -> 140     
  fs/ext3/dir                           1140 -> 1140    
  fs/ext3/file                           392 -> 396           +4 / +1.020408 %
  fs/ext3/fsync                          296 -> 296     
  fs/ext3/ialloc                        2756 -> 2760          +4 / +0.145138 %
  fs/ext3/inode                        12356 -> 12360         +4 / +0.032373 %
  fs/ext3/ioctl                          712 -> 712     
  fs/ext3/namei                         8176 -> 8168          -8 / -0.097847 %
  fs/ext3/super                        10088 -> 10064        -24 / -0.237906 %
  fs/ext3/symlink                        116 -> 116     
  fs/fcntl                              1736 -> 1740          +4 / +0.230415 %
  fs/fifo                                640 -> 640     
  fs/file                                748 -> 748     
  fs/file_table                          812 -> 816           +4 / +0.492611 %
  fs/filesystems                           0 -> 0       
  fs/inode                              6072 -> 6076          +4 / +0.065876 %
  fs/iobuf                              1084 -> 1084    
  fs/ioctl                               412 -> 416           +4 / +0.970874 %
  fs/jbd/checkpoint                     3276 -> 3276    
  fs/jbd/commit                         4380 -> 4364         -16 / -0.365297 %
  fs/jbd/journal                        7940 -> 7932          -8 / -0.100756 %
  fs/jbd/recovery                       2120 -> 2132         +12 / +0.566038 %
  fs/jbd/revoke                         2460 -> 2456          -4 / -0.162602 %
  fs/jbd/transaction                    9236 -> 9208         -28 / -0.303162 %
  fs/lockd/clntlock                      924 -> 924     
  fs/lockd/clntproc                     3296 -> 3304          +8 / +0.242718 %
  fs/lockd/host                         1708 -> 1700          -8 / -0.468384 %
  fs/lockd/lockd_syms                      0 -> 0       
  fs/lockd/mon                          1052 -> 1048          -4 / -0.380228 %
  fs/lockd/svc                          1632 -> 1632    
  fs/lockd/svc4proc                     2900 -> 2888         -12 / -0.413793 %
  fs/lockd/svclock                      2700 -> 2700    
  fs/lockd/svcproc                      3140 -> 3132          -8 / -0.254777 %
  fs/lockd/svcshare                      524 -> 524     
  fs/lockd/svcsubs                      1336 -> 1336    
  fs/lockd/xdr                          5084 -> 5064         -20 / -0.393391 %
  fs/lockd/xdr4                         3764 -> 3756          -8 / -0.212540 %
  fs/locks                              8896 -> 8900          +4 / +0.044964 %
  fs/namei                             10168 -> 10192        +24 / +0.236035 %
  fs/namespace                          4676 -> 4684          +8 / +0.171086 %
  fs/nfs/dir                            4756 -> 4744         -12 / -0.252313 %
  fs/nfs/file                           1144 -> 1144    
  fs/nfs/flushd                          556 -> 556     
  fs/nfs/inode                          7760 -> 7780         +20 / +0.257732 %
  fs/nfs/nfs2xdr                        3992 -> 3980         -12 / -0.300601 %
  fs/nfs/nfs3proc                       3148 -> 3164         +16 / +0.508259 %
  fs/nfs/nfs3xdr                        7472 -> 7464          -8 / -0.107066 %
  fs/nfs/pagelist                       2428 -> 2428    
  fs/nfs/proc                           1896 -> 1900          +4 / +0.210970 %
  fs/nfs/read                           2380 -> 2380    
  fs/nfs/symlink                         396 -> 396     
  fs/nfs/unlink                          800 -> 800     
  fs/nfs/write                          7228 -> 7212         -16 / -0.221361 %
  fs/nfsd/auth                           112 -> 112     
  fs/nfsd/export                        5712 -> 5716          +4 / +0.070028 %
  fs/nfsd/lockd                          304 -> 304     
  fs/nfsd/nfs3proc                      4340 -> 4340    
  fs/nfsd/nfs3xdr                       7492 -> 7480         -12 / -0.160171 %
  fs/nfsd/nfscache                      1720 -> 1720    
  fs/nfsd/nfsctl                         924 -> 924     
  fs/nfsd/nfsfh                         4884 -> 4920         +36 / +0.737101 %
  fs/nfsd/nfsproc                       3680 -> 3692         +12 / +0.326087 %
  fs/nfsd/nfssvc                        1472 -> 1472    
  fs/nfsd/nfsxdr                        3740 -> 3724         -16 / -0.427807 %
  fs/nfsd/stats                           44 -> 44      
  fs/nfsd/vfs                           9684 -> 9644         -40 / -0.413052 %
  fs/open                               4052 -> 4048          -4 / -0.098717 %
  fs/partitions/check                   1740 -> 1740    
  fs/partitions/msdos                    308 -> 308     
  fs/pipe                               2580 -> 2580    
  fs/quota                              2576 -> 2576    
  fs/ramfs/inode                        1364 -> 1380         +16 / +1.173021 %
  fs/read_write                         2272 -> 2272    
  fs/readdir                            1460 -> 1464          +4 / +0.273973 %
  fs/select                             2088 -> 2080          -8 / -0.383142 %
  fs/seq_file                           2248 -> 2248    
  fs/stat                                668 -> 664           -4 / -0.598802 %
  fs/super                              3544 -> 3524         -20 / -0.564334 %
  fs/xattr                               896 -> 896     
  init/do_mounts                        3896 -> 3892          -4 / -0.102669 %
  init/main                             1608 -> 1608    
  init/version                           536 -> 536     
  ipc/msg                               2812 -> 2804          -8 / -0.284495 %
  ipc/sem                               5084 -> 5036         -48 / -0.944138 %
  ipc/shm                               3308 -> 3308    
  ipc/util                              1048 -> 1040          -8 / -0.763359 %
  kernel/acct                              4 -> 4       
  kernel/capability                       36 -> 36      
  kernel/context                         712 -> 712     
  kernel/dma                              12 -> 12      
  kernel/exec_domain                     788 -> 788     
  kernel/exit                           2588 -> 2588    
  kernel/fork                           4272 -> 4272    
  kernel/info                            276 -> 276     
  kernel/itimer                          740 -> 740     
  kernel/kmod                            752 -> 752     
  kernel/module                          968 -> 968     
  kernel/panic                           720 -> 720     
  kernel/printk                         2340 -> 2304         -36 / -1.538462 %
  kernel/ptrace                         1184 -> 1184    
  kernel/resource                       1144 -> 1144    
  kernel/sched                          4736 -> 4736    
  kernel/signal                         3996 -> 3996    
  kernel/softirq                        1804 -> 1804    
  kernel/sys                            4048 -> 4048    
  kernel/sysctl                           52 -> 52      
  kernel/time                           1704 -> 1704    
  kernel/timer                          3452 -> 3448          -4 / -0.115875 %
  kernel/user                            264 -> 264     
  lib/brlock                               0 -> 0       
  lib/bust_spinlocks                      72 -> 72      
  lib/cmdline                            308 -> 308     
  lib/ctype                              256 -> 256     
  lib/dec_and_lock                         0 -> 0       
  lib/dump_stack                          72 -> 72      
  lib/errno                                0 -> 0       
  lib/rbtree                             808 -> 808     
  lib/rwsem-spinlock                     472 -> 472     
  lib/string                             848 -> 848     
  lib/vsprintf                          4588 -> 4588    
  lib/zlib_deflate/deflate              5564 -> 5548         -16 / -0.287563 %
  lib/zlib_deflate/deflate_syms            0 -> 0       
  lib/zlib_deflate/deftree              7868 -> 8080        +212 / +2.694459 %
  lib/zlib_inflate/infblock             2476 -> 2476    
  lib/zlib_inflate/infcodes             1476 -> 1476    
  lib/zlib_inflate/inffast               908 -> 908     
  lib/zlib_inflate/inflate              2376 -> 2376    
  lib/zlib_inflate/inflate_syms            0 -> 0       
  lib/zlib_inflate/inftrees             6640 -> 6640    
  lib/zlib_inflate/infutil               320 -> 320     
  mm/bootmem                            1772 -> 1772    
  mm/filemap                           13604 -> 13620        +16 / +0.117612 %
  mm/memory                             7684 -> 7684    
  mm/mlock                              1616 -> 1616    
  mm/mmap                               3960 -> 3960    
  mm/mprotect                           2192 -> 2192    
  mm/mremap                             2104 -> 2104    
  mm/numa                                892 -> 892     
  mm/oom_kill                            720 -> 720     
  mm/page_alloc                         3728 -> 3728    
  mm/page_io                             660 -> 656           -4 / -0.606061 %
  mm/shmem                              6956 -> 6980         +24 / +0.345026 %
  mm/slab                               4888 -> 4888    
  mm/swap                                388 -> 388     
  mm/swap_state                         1128 -> 1128    
  mm/swapfile                           6528 -> 6516         -12 / -0.183824 %
  mm/vmalloc                            3012 -> 3012    
  mm/vmscan                             2568 -> 2568    
  net/802/p8023                          120 -> 120     
  net/8021q/vlan                        2896 -> 2884         -12 / -0.414365 %
  net/8021q/vlan_dev                    4296 -> 4256         -40 / -0.931099 %
  net/8021q/vlanproc                      16 -> 16      
  net/core/datagram                     1868 -> 1856         -12 / -0.642398 %
  net/core/dev                          9964 -> 9896         -68 / -0.682457 %
  net/core/dev_mcast                     892 -> 892     
  net/core/dst                           956 -> 956     
  net/core/ethtool                      2208 -> 2208    
  net/core/iovec                        1072 -> 1072    
  net/core/neighbour                    8424 -> 8416          -8 / -0.094967 %
  net/core/rtnetlink                    3180 -> 3176          -4 / -0.125786 %
  net/core/scm                          1024 -> 1024    
  net/core/skbuff                       6136 -> 6096         -40 / -0.651890 %
  net/core/sock                         3912 -> 3908          -4 / -0.102249 %
  net/core/utils                         260 -> 260     
  net/ethernet/eth                       728 -> 724           -4 / -0.549451 %
  net/ipv4/af_inet                      5604 -> 5552         -52 / -0.927909 %
  net/ipv4/arp                          4736 -> 4708         -28 / -0.591216 %
  net/ipv4/devinet                      6232 -> 6208         -24 / -0.385109 %
  net/ipv4/fib_frontend                 2772 -> 2788         +16 / +0.577201 %
  net/ipv4/fib_hash                     4760 -> 4756          -4 / -0.084034 %
  net/ipv4/fib_semantics                3072 -> 3028         -44 / -1.432292 %
  net/ipv4/icmp                         4072 -> 4052         -20 / -0.491159 %
  net/ipv4/igmp                        10968 -> 10944        -24 / -0.218818 %
  net/ipv4/inetpeer                     2068 -> 2012         -56 / -2.707930 %
  net/ipv4/ip_forward                    488 -> 480           -8 / -1.639344 %
  net/ipv4/ip_fragment                  3328 -> 3304         -24 / -0.721154 %
  net/ipv4/ip_input                      888 -> 880           -8 / -0.900901 %
  net/ipv4/ip_options                   3536 -> 3536    
  net/ipv4/ip_output                    5544 -> 5460         -84 / -1.515152 %
  net/ipv4/ip_sockglue                  5360 -> 5308         -52 / -0.970149 %
  net/ipv4/proc                         2816 -> 2816    
  net/ipv4/protocol                      460 -> 460     
  net/ipv4/raw                          3324 -> 3272         -52 / -1.564380 %
  net/ipv4/route                       11496 -> 11488         -8 / -0.069589 %
  net/ipv4/sysctl_net_ipv4                 0 -> 0       
  net/ipv4/tcp                         20032 -> 20020        -12 / -0.059904 %
  net/ipv4/tcp_diag                     3460 -> 3272        -188 / -5.433526 %
  net/ipv4/tcp_input                   27584 -> 27504        -80 / -0.290023 %
  net/ipv4/tcp_ipv4                    15368 -> 15240       -128 / -0.832900 %
  net/ipv4/tcp_minisocks                4748 -> 4716         -32 / -0.673968 %
  net/ipv4/tcp_output                  11064 -> 11020        -44 / -0.397686 %
  net/ipv4/tcp_timer                    4388 -> 4380          -8 / -0.182315 %
  net/ipv4/udp                          5316 -> 5168        -148 / -2.784048 %
  net/ipv4/utils                          72 -> 72      
  net/netlink/af_netlink                5040 -> 5028         -12 / -0.238095 %
  net/sched/sch_generic                 2392 -> 2392    
  net/socket                            7380 -> 7384          +4 / +0.054201 %
  net/sunrpc/auth                        844 -> 836           -8 / -0.947867 %
  net/sunrpc/auth_null                   336 -> 336     
  net/sunrpc/auth_unix                   832 -> 828           -4 / -0.480769 %
  net/sunrpc/clnt                       4264 -> 4264    
  net/sunrpc/pmap_clnt                   816 -> 804          -12 / -1.470588 %
  net/sunrpc/sched                      5056 -> 5072         +16 / +0.316456 %
  net/sunrpc/sunrpc_syms                   0 -> 0       
  net/sunrpc/svc                        2052 -> 2052    
  net/sunrpc/svcauth                     544 -> 544     
  net/sunrpc/svcsock                    5860 -> 5852          -8 / -0.136519 %
  net/sunrpc/timer                       164 -> 164     
  net/sunrpc/xdr                        1972 -> 1972    
  net/sunrpc/xprt                       6736 -> 6736    

     total:    786028 -> 784340     -1688 / -0.214751 %


lwip-0.5.3.preproc
  proj/unixsim/apps/fs        39868 -> 39868   
  proj/unixsim/apps/httpd       848 -> 844           -4 / -0.471698 %
  proj/unixsim/apps/shell      9688 -> 9688    
  proj/unixsim/apps/tcpecho     196 -> 196     
  proj/unixsim/apps/udpecho     204 -> 204     
  proj/unixsim/simhost          504 -> 504     
  proj/unixsim/simnode          464 -> 464     
  proj/unixsim/simrouter        628 -> 628     
  src/api/api_lib              2288 -> 2248         -40 / -1.748252 %
  src/api/api_msg              1904 -> 1876         -28 / -1.470588 %
  src/api/err                     0 -> 0       
  src/api/sockets              1452 -> 1432         -20 / -1.377410 %
  src/api/tcpip                 380 -> 380     
  src/core/inet                 628 -> 608          -20 / -3.184713 %
  src/core/ipv4/icmp            952 -> 940          -12 / -1.260504 %
  src/core/ipv4/ip             1852 -> 1900         +48 / +2.591793 %
  src/core/ipv4/ip_addr           4 -> 4       
  src/core/mem                 1136 -> 1128          -8 / -0.704225 %
  src/core/memp                 908 -> 900           -8 / -0.881057 %
  src/core/netif                736 -> 736     
  src/core/pbuf                1592 -> 1556         -36 / -2.261307 %
  src/core/stats                 20 -> 20      
  src/core/sys                  488 -> 484           -4 / -0.819672 %
  src/core/tcp                 3168 -> 3112         -56 / -1.767677 %
  src/core/tcp_input           5132 -> 5012        -120 / -2.338270 %
  src/core/tcp_output          2880 -> 2804         -76 / -2.638889 %
  src/core/udp                 1224 -> 1200         -24 / -1.960784 %
  src/netif/arp                1540 -> 1536          -4 / -0.259740 %
  src/netif/loopif              132 -> 124           -8 / -6.060606 %
  src/netif/tcpdump            1704 -> 1696          -8 / -0.469484 %

     total:     82520 -> 82092      -428 / -0.518662 %


mpeg2dec-0.3.1
  libmpeg2/alloc                   120 -> 120     
  libmpeg2/cpu_accel                 4 -> 4       
  libmpeg2/cpu_state                 4 -> 4       
  libmpeg2/decode                 2968 -> 2968    
  libmpeg2/header                 4120 -> 4120    
  libmpeg2/idct                   2020 -> 1940         -80 / -3.960396 %
  libmpeg2/idct_alpha                0 -> 0       
  libmpeg2/idct_altivec              0 -> 0       
  libmpeg2/idct_mlib                 0 -> 0       
  libmpeg2/idct_mmx                  0 -> 0       
  libmpeg2/motion_comp            5004 -> 5004    
  libmpeg2/motion_comp_alpha         0 -> 0       
  libmpeg2/motion_comp_altivec       0 -> 0       
  libmpeg2/motion_comp_mlib          0 -> 0       
  libmpeg2/motion_comp_mmx           0 -> 0       
  libmpeg2/slice                 20128 -> 20104        -24 / -0.119237 %
  libvo/video_out                  160 -> 160     
  libvo/video_out_dx                 0 -> 0       
  libvo/video_out_null             200 -> 200     
  libvo/video_out_pgm              544 -> 544     
  libvo/video_out_sdl              800 -> 800     
  libvo/video_out_x11                0 -> 0       
  libvo/yuv2rgb                   3928 -> 3860         -68 / -1.731161 %
  libvo/yuv2rgb_mlib                 0 -> 0       
  libvo/yuv2rgb_mmx                  0 -> 0       
  src/extract_mpeg2               3604 -> 3604    
  src/getopt                      3144 -> 3144    
  src/gettimeofday                   0 -> 0       
  src/mpeg2dec                    5032 -> 5032    

     total:     51780 -> 51608      -172 / -0.332175 %


mpgcut-1.1
  mpgcut   23496 -> 23320       -176 / -0.749064 %

     total:     23496 -> 23320      -176 / -0.749064 %


replaypc-0.4.0.preproc
  ReplayPC               4320 -> 4320    
  addressbook             672 -> 672     
  bigfile                 228 -> 228     
  build-ndx               640 -> 600          -40 / -6.250000 %
  cg                     2168 -> 2132         -36 / -1.660517 %
  channelset              220 -> 220     
  crypt                   932 -> 932     
  crypt-test             1068 -> 1068    
  dump                   1576 -> 1576    
  dump-addressbook        500 -> 500     
  dump-cg2                504 -> 504     
  dump-channelset         408 -> 408     
  dump-filedarray         536 -> 536     
  dump-guide              564 -> 564     
  dump-headend            364 -> 364     
  dump-programset         688 -> 688     
  dump-replaychannels     344 -> 344     
  dump-replayshows        336 -> 336     
  dump-zipcode2           324 -> 324     
  filedarray              224 -> 224     
  find-GOPs               260 -> 260     
  guide                  7240 -> 7240    
  guideclient             740 -> 740     
  headend                1768 -> 1764          -4 / -0.226244 %
  httpclient             2452 -> 2444          -8 / -0.326264 %
  httpfs                 6524 -> 6524    
  httpfsclient           2152 -> 2148          -4 / -0.185874 %
  mark-commercial        2160 -> 2160    
  ndx                     456 -> 440          -16 / -3.508772 %
  ndx-dump               4572 -> 4488         -84 / -1.837270 %
  program                4752 -> 4752    
  rddns                   556 -> 556     
  rddnsclient            1192 -> 1192    
  rtv                     704 -> 704     
  rtvguide               1632 -> 1632    
  sleep                    20 -> 20      
  un-protect             1244 -> 1236          -8 / -0.643087 %
  version-test            176 -> 176     
  zipcode                 416 -> 412           -4 / -0.961538 %

     total:     55632 -> 55428      -204 / -0.366695 %


teem-1.6.0-src
  src/air/754                  1852 -> 1852    
  src/air/array                 604 -> 604     
  src/air/dio                  1860 -> 1860    
  src/air/endianAir            1340 -> 1340    
  src/air/enum                 1004 -> 1004    
  src/air/miscAir              4364 -> 4364    
  src/air/mop                  1680 -> 1680    
  src/air/parseAir             5584 -> 5584    
  src/air/sane                 3220 -> 3220    
  src/air/string               1408 -> 1408    
  src/air/test/doubleprint      660 -> 660     
  src/air/test/floatprint       652 -> 652     
  src/air/test/fp              1640 -> 1640    
  src/air/test/tline            464 -> 464     
  src/air/test/tmop             620 -> 620     
  src/air/test/tok              288 -> 288     
  src/air/threadAir            1140 -> 1140    
  src/alan/coreAlan            4696 -> 4684         -12 / -0.255537 %
  src/alan/enumsAlan           2480 -> 2480    
  src/alan/methodsAlan         2560 -> 2548         -12 / -0.468750 %
  src/alan/test/tspot          1000 -> 1000    
  src/bane/clip                1728 -> 1724          -4 / -0.231481 %
  src/bane/defaultsBane          32 -> 32      
  src/bane/gkmsFlotsam         3452 -> 3452    
  src/bane/gkmsHvol            3056 -> 3056    
  src/bane/gkmsInfo            1456 -> 1456    
  src/bane/gkmsMite            1348 -> 1348    
  src/bane/gkmsOpac            3380 -> 3380    
  src/bane/gkmsPvg             2992 -> 2992    
  src/bane/gkmsScat            1652 -> 1652    
  src/bane/gkmsTxf             3084 -> 3084    
  src/bane/hvol                8007 -> 7999          -8 / -0.099913 %
  src/bane/inc                 4252 -> 4248          -4 / -0.094073 %
  src/bane/measr               1692 -> 1688          -4 / -0.236407 %
  src/bane/methodsBane          864 -> 864     
  src/bane/rangeBane           1752 -> 1736         -16 / -0.913242 %
  src/bane/scat                 824 -> 824     
  src/bane/test/opac            512 -> 512     
  src/bane/test/pos             612 -> 612     
  src/bane/test/sigma           204 -> 204     
  src/bane/test/tblah          1180 -> 1180    
  src/bane/test/tinfo           564 -> 564     
  src/bane/trex                 380 -> 380     
  src/bane/trnsf               5120 -> 5116          -4 / -0.078125 %
  src/bane/valid               2704 -> 2688         -16 / -0.591716 %
  src/biff/biff                2904 -> 2900          -4 / -0.137741 %
  src/biff/test/test            256 -> 256     
  src/dye/convertDye           3196 -> 3200          +4 / +0.125156 %
  src/dye/methodsDye           2280 -> 2276          -4 / -0.175439 %
  src/dye/test/bow              600 -> 600     
  src/dye/test/conv             456 -> 456     
  src/echo/bounds              7044 -> 7044    
  src/echo/color               7468 -> 7468    
  src/echo/enumsEcho          13168 -> 13168   
  src/echo/intx               12264 -> 12288        +24 / +0.195695 %
  src/echo/lightEcho            644 -> 644     
  src/echo/list                1848 -> 1848    
  src/echo/matter               364 -> 364     
  src/echo/methodsEcho          688 -> 688     
  src/echo/model               1152 -> 1152    
  src/echo/objmethods          1868 -> 1868    
  src/echo/renderEcho          5736 -> 5732          -4 / -0.069735 %
  src/echo/set                 1520 -> 1520    
  src/echo/sqd                 7780 -> 7816         +36 / +0.462725 %
  src/echo/test/test           1092 -> 1092    
  src/echo/test/trend          9708 -> 9708    
  src/ell/cubicEll             1488 -> 1496          +8 / +0.537634 %
  src/ell/eigen                3984 -> 3984    
  src/ell/genmat               3548 -> 3540          -8 / -0.225479 %
  src/ell/mat                 16960 -> 16960   
  src/ell/miscEll              1044 -> 1044    
  src/ell/quat                11748 -> 11748   
  src/ell/test/invert           708 -> 708     
  src/ell/test/sort3            572 -> 572     
  src/ell/test/tq              4336 -> 4336    
  src/ell/vecEll               1148 -> 1148    
  src/gage/ctx                 5120 -> 5112          -8 / -0.156250 %
  src/gage/defaultsGage          52 -> 52      
  src/gage/filter              2932 -> 2932    
  src/gage/miscGage            5388 -> 5388    
  src/gage/print               5680 -> 5680    
  src/gage/pvl                 1688 -> 1684          -4 / -0.236967 %
  src/gage/scl                19232 -> 19232   
  src/gage/sclanswer           7236 -> 7232          -4 / -0.055279 %
  src/gage/sclfilter           6796 -> 6792          -4 / -0.058858 %
  src/gage/sclprint            1076 -> 1076    
  src/gage/shape               3104 -> 3100          -4 / -0.128866 %
  src/gage/st                  3760 -> 3760    
  src/gage/update              5512 -> 5508          -4 / -0.072569 %
  src/gage/vecGage            13900 -> 13892         -8 / -0.057554 %
  src/gage/vecprint              56 -> 56      
  src/hest/defaultsHest          12 -> 12      
  src/hest/methodsHest         3004 -> 3004    
  src/hest/parseHest          14088 -> 14080         -8 / -0.056786 %
  src/hest/test/bday           1136 -> 1136    
  src/hest/test/ex0             496 -> 496     
  src/hest/test/ex1            1228 -> 1228    
  src/hest/test/ex2            1016 -> 1016    
  src/hest/test/ex3            1648 -> 1648    
  src/hest/test/ex4            1184 -> 1184    
  src/hest/test/ex5             868 -> 868     
  src/hest/test/strings        1176 -> 1176    
  src/hest/usage               3816 -> 3816    
  src/hex/dehex                1372 -> 1372    
  src/hex/enhex                 840 -> 840     
  src/hoover/defaultsHoover      20 -> 20      
  src/hoover/methodsHoover     1956 -> 1948          -8 / -0.408998 %
  src/hoover/rays              3740 -> 3740    
  src/hoover/stub                48 -> 48      
  src/limn/cam                 5504 -> 5508          +4 / +0.072674 %
  src/limn/defaultsLimn          16 -> 16      
  src/limn/env                 1276 -> 1276    
  src/limn/hestLimn            1028 -> 1028    
  src/limn/io                   820 -> 820     
  src/limn/light                940 -> 940     
  src/limn/methodsLimn          356 -> 356     
  src/limn/obj                  892 -> 892     
  src/limn/qn                  2256 -> 2252          -4 / -0.177305 %
  src/limn/renderLimn          3304 -> 3296          -8 / -0.242131 %
  src/limn/shapes              4540 -> 4540    
  src/limn/splineEval          5328 -> 5320          -8 / -0.150150 %
  src/limn/splineMethods       5740 -> 5724         -16 / -0.278746 %
  src/limn/splineMisc         14952 -> 14952   
  src/limn/test/light           600 -> 600     
  src/limn/test/map             344 -> 344     
  src/limn/test/soid           4720 -> 4720    
  src/limn/test/tbc            1868 -> 1868    
  src/limn/test/tcam           1808 -> 1808    
  src/limn/test/tcamanim       3912 -> 3912    
  src/limn/test/tps            3200 -> 3200    
  src/limn/test/tspline        2160 -> 2160    
  src/limn/transform           2912 -> 2904          -8 / -0.274725 %
  src/mite/defaultsMite          32 -> 32      
  src/mite/ray                 4500 -> 4488         -12 / -0.266667 %
  src/mite/renderMite          1404 -> 1404    
  src/mite/thread               312 -> 312     
  src/mite/txf                 8220 -> 8216          -4 / -0.048662 %
  src/mite/user                1064 -> 1060          -4 / -0.375940 %
  src/moss/defaultsMoss          20 -> 20      
  src/moss/hestMoss            1440 -> 1440    
  src/moss/methodsMoss         1276 -> 1268          -8 / -0.626959 %
  src/moss/sampler             2540 -> 2532          -8 / -0.314961 %
  src/moss/test/invert          336 -> 336     
  src/moss/xform               4060 -> 4060    
  src/nrrd/accessors           7660 -> 7664          +4 / +0.052219 %
  src/nrrd/apply1D             9250 -> 9242          -8 / -0.086486 %
  src/nrrd/arith               6776 -> 6756         -20 / -0.295159 %
  src/nrrd/arraysNrrd          2364 -> 2364    
  src/nrrd/axis                2964 -> 2964    
  src/nrrd/cc                 13500 -> 13492         -8 / -0.059259 %
  src/nrrd/ccmethods           1432 -> 1432    
  src/nrrd/comment              636 -> 636     
  src/nrrd/convertNrrd         4764 -> 4764    
  src/nrrd/defaultsNrrd         760 -> 760     
  src/nrrd/encoding             668 -> 664           -4 / -0.598802 %
  src/nrrd/encodingAscii       1644 -> 1644    
  src/nrrd/encodingBzip2        576 -> 576     
  src/nrrd/encodingGzip         640 -> 640     
  src/nrrd/encodingHex         1752 -> 1748          -4 / -0.228311 %
  src/nrrd/encodingRaw         2244 -> 2240          -4 / -0.178253 %
  src/nrrd/endianNrrd           644 -> 644     
  src/nrrd/enumsNrrd          86276 -> 86276   
  src/nrrd/filt                5880 -> 5876          -4 / -0.068027 %
  src/nrrd/format               732 -> 732     
  src/nrrd/formatEPS           2620 -> 2620    
  src/nrrd/formatNRRD          4108 -> 4104          -4 / -0.097371 %
  src/nrrd/formatPNG            780 -> 780     
  src/nrrd/formatPNM           3452 -> 3452    
  src/nrrd/formatText          3312 -> 3312    
  src/nrrd/formatVTK           6516 -> 6504         -12 / -0.184162 %
  src/nrrd/gzio                   0 -> 0       
  src/nrrd/hestNrrd            1480 -> 1480    
  src/nrrd/histogram           7408 -> 7384         -24 / -0.323974 %
  src/nrrd/iter                 804 -> 804     
  src/nrrd/kernel             21532 -> 21528         -4 / -0.018577 %
  src/nrrd/keyvalue            1072 -> 1072    
  src/nrrd/map                 5896 -> 5888          -8 / -0.135685 %
  src/nrrd/measure             9496 -> 9496    
  src/nrrd/methodsNrrd         5584 -> 5576          -8 / -0.143266 %
  src/nrrd/parseNrrd           7604 -> 7588         -16 / -0.210416 %
  src/nrrd/range                836 -> 836     
  src/nrrd/read                3604 -> 3604    
  src/nrrd/reorder            11412 -> 11388        -24 / -0.210305 %
  src/nrrd/resampleNrrd        7792 -> 7792    
  src/nrrd/simple              5600 -> 5588         -12 / -0.214286 %
  src/nrrd/subset              3268 -> 3252         -16 / -0.489596 %
  src/nrrd/superset            5844 -> 5836          -8 / -0.136893 %
  src/nrrd/test/ax             3336 -> 3336    
  src/nrrd/test/convo          2124 -> 2124    
  src/nrrd/test/genvol         1684 -> 1684    
  src/nrrd/test/io              412 -> 412     
  src/nrrd/test/kv              428 -> 428     
  src/nrrd/test/minmax          340 -> 340     
  src/nrrd/test/quadvol        1604 -> 1604    
  src/nrrd/test/tkernel        1828 -> 1828    
  src/nrrd/test/tline           420 -> 420     
  src/nrrd/test/trand           224 -> 224     
  src/nrrd/test/tread           308 -> 308     
  src/nrrd/test/typestest       384 -> 384     
  src/nrrd/tmfKernel         130528 -> 130528  
  src/nrrd/winKernel          16460 -> 16460   
  src/nrrd/write               4768 -> 4756         -12 / -0.251678 %
  src/ten/aniso                3552 -> 3536         -16 / -0.450450 %
  src/ten/bimod                6632 -> 6628          -4 / -0.060314 %
  src/ten/chan                10004 -> 9992         -12 / -0.119952 %
  src/ten/defaultsTen            52 -> 52      
  src/ten/enumsTen            26184 -> 26184   
  src/ten/epireg              15664 -> 15648        -16 / -0.102145 %
  src/ten/fiber                4768 -> 4776          +8 / +0.167785 %
  src/ten/fiberMethods         3408 -> 3400          -8 / -0.234742 %
  src/ten/glyph                6668 -> 6664          -4 / -0.059988 %
  src/ten/miscTen              4224 -> 4220          -4 / -0.094697 %
  src/ten/mod                  3860 -> 3860    
  src/ten/tenGage              3200 -> 3192          -8 / -0.250000 %
  src/ten/tendAnhist           1272 -> 1272    
  src/ten/tendAnplot           1756 -> 1756    
  src/ten/tendAnscale          1484 -> 1484    
  src/ten/tendAnvol            1600 -> 1600    
  src/ten/tendBmat             1436 -> 1436    
  src/ten/tendEllipse          2648 -> 2648    
  src/ten/tendEpireg           4640 -> 4640    
  src/ten/tendEstim            4016 -> 4016    
  src/ten/tendEval             1916 -> 1916    
  src/ten/tendEvaladd          1080 -> 1080    
  src/ten/tendEvalclamp        1368 -> 1368    
  src/ten/tendEvalpow           992 -> 992     
  src/ten/tendEvec             1872 -> 1872    
  src/ten/tendEvecrgb          2008 -> 2008    
  src/ten/tendEvq              2284 -> 2284    
  src/ten/tendExpand           1552 -> 1552    
  src/ten/tendFiber            2832 -> 2832    
  src/ten/tendFlotsam          1688 -> 1684          -4 / -0.236967 %
  src/ten/tendGlyph            6980 -> 6980    
  src/ten/tendMake             1072 -> 1072    
  src/ten/tendNorm             1404 -> 1404    
  src/ten/tendPoint            1704 -> 1704    
  src/ten/tendSatin            5452 -> 5448          -4 / -0.073368 %
  src/ten/tendShrink           1040 -> 1040    
  src/ten/tendSim              1456 -> 1456    
  src/ten/tendSlice            1116 -> 1116    
  src/ten/tendSten             1436 -> 1436    
  src/ten/tensor               6952 -> 6940         -12 / -0.172612 %
  src/ten/test/tem             1116 -> 1116    
  src/ten/test/tg              2780 -> 2780    
  src/ten/test/tt              2708 -> 2708    
  src/unrrdu/1op               1716 -> 1716    
  src/unrrdu/2op               2228 -> 2228    
  src/unrrdu/3op               2372 -> 2372    
  src/unrrdu/about             2856 -> 2856    
  src/unrrdu/axdelete          1460 -> 1460    
  src/unrrdu/axinfo            1524 -> 1524    
  src/unrrdu/axinsert          1176 -> 1176    
  src/unrrdu/axmerge           1432 -> 1432    
  src/unrrdu/axsplit           1204 -> 1204    
  src/unrrdu/block             1184 -> 1184    
  src/unrrdu/ccadj             1236 -> 1236    
  src/unrrdu/ccfind            1792 -> 1792    
  src/unrrdu/ccmerge           2708 -> 2708    
  src/unrrdu/ccsettle          1116 -> 1116    
  src/unrrdu/cmedian           2560 -> 2560    
  src/unrrdu/convert           1216 -> 1216    
  src/unrrdu/crop              1536 -> 1536    
  src/unrrdu/data              1376 -> 1376    
  src/unrrdu/dhisto            1396 -> 1396    
  src/unrrdu/dice              2168 -> 2168    
  src/unrrdu/flip               884 -> 884     
  src/unrrdu/flotsam           3228 -> 3224          -4 / -0.123916 %
  src/unrrdu/gamma             1408 -> 1408    
  src/unrrdu/head              1648 -> 1648    
  src/unrrdu/heq               2132 -> 2132    
  src/unrrdu/histax            1396 -> 1396    
  src/unrrdu/histo             1584 -> 1584    
  src/unrrdu/imap              2532 -> 2532    
  src/unrrdu/inset             1476 -> 1476    
  src/unrrdu/jhisto            2816 -> 2816    
  src/unrrdu/join              1548 -> 1548    
  src/unrrdu/lut               2160 -> 2160    
  src/unrrdu/make              7020 -> 7020    
  src/unrrdu/minmax            1268 -> 1268    
  src/unrrdu/pad               1908 -> 1908    
  src/unrrdu/permute           1096 -> 1096    
  src/unrrdu/project           1960 -> 1960    
  src/unrrdu/quantize          1688 -> 1688    
  src/unrrdu/resample          3528 -> 3528    
  src/unrrdu/reshape           1060 -> 1060    
  src/unrrdu/rmap              2600 -> 2600    
  src/unrrdu/save              2988 -> 2988    
  src/unrrdu/shuffle           1752 -> 1752    
  src/unrrdu/slice             1324 -> 1324    
  src/unrrdu/splice            1464 -> 1464    
  src/unrrdu/swap               928 -> 928     
  src/unrrdu/unblock           1216 -> 1216    
  src/unrrdu/unquantize        1244 -> 1244    

     total:   1116413 -> 1115973    -440 / -0.039412 %


ttt-0.10.1.preproc
  src/connect4    3496 -> 3496    
  src/engine      2132 -> 2132    
  src/main        1260 -> 1260    
  src/parse       1484 -> 1484    
  src/ttt         5216 -> 5216    
  src/util        1988 -> 1988    

     total:     15576 -> 15576      +0 / +0.000000 %


unrarlib-0.4.0
  samples/unix/getfile/getfile             844 -> 844     
  samples/unix/listarchive/listarchive    1028 -> 1020          -8 / -0.778210 %
  unrarlib/unrarlib                      12316 -> 12256        -60 / -0.487171 %

     total:     14188 -> 14120      -68 / -0.479278 %


zlib-1.1.4
  adler32      396 -> 396     
  compress     172 -> 172     
  crc32       1264 -> 1264    
  deflate     5568 -> 5548         -20 / -0.359195 %
  gzio        3784 -> 3780          -4 / -0.105708 %
  infblock    2832 -> 2832    
  infcodes    1528 -> 1528    
  inffast      976 -> 976     
  inflate     1736 -> 1736    
  inftrees    6692 -> 6692    
  infutil      320 -> 320     
  trees       8888 -> 9092        +204 / +2.295230 %
  uncompr      140 -> 140     
  zutil        232 -> 232     

     total:     34528 -> 34708      +180 / +0.521316 %

sum:  3171932 -> 3166112    -5820 / -0.183484 %
avg: -323.333333 / -0.325262 %
max: zlib-1.1.4      34528 -> 34708         +180 / +0.521316 %
min: OpenTCP-1.0.4   27769 -> 27269         -500 / -1.800569 %

top 10 files perf
OpenTCP-1.0.4  tftp/tftps                                 1024 -> 920           -104 / -10.156250 %
OpenTCP-1.0.4  ip                                         1780 -> 1668          -112 / -6.292135 %
replaypc-0.4.0.preproc  build-ndx                          640 -> 600            -40 / -6.250000 %
lwip-0.5.3.preproc  src/netif/loopif                       132 -> 124             -8 / -6.060606 %
linux-2.4.23-pre3-testplatform  net/ipv4/tcp_diag         3460 -> 3272          -188 / -5.433526 %
linux-2.4.23-pre3-testplatform  drivers/block/elevator     672 -> 636            -36 / -5.357143 %
OpenTCP-1.0.4  bootp/bootp                                 740 -> 704            -36 / -4.864865 %
mpeg2dec-0.3.1  libmpeg2/idct                             2020 -> 1940           -80 / -3.960396 %
libmspack  mspack/qtmd                                    5480 -> 5280          -200 / -3.649635 %
replaypc-0.4.0.preproc  ndx                                456 -> 440            -16 / -3.508772 %

top 10 files diff
libpng-1.2.5  pngrtran                              20524 -> 20020         -504 / -2.455662 %
jikespg-1.3  src/lpgparse                           47340 -> 47036         -304 / -0.642163 %
libmspack  mspack/qtmd                               5480 -> 5280          -200 / -3.649635 %
jikespg-1.3  src/remsp                               8328 -> 8136          -192 / -2.305476 %
jikespg-1.3  src/resolve                            12472 -> 12284         -188 / -1.507377 %
linux-2.4.23-pre3-testplatform  net/ipv4/tcp_diag    3460 -> 3272          -188 / -5.433526 %
mpgcut-1.1  mpgcut                                  23496 -> 23320         -176 / -0.749064 %
linux-2.4.23-pre3-testplatform  net/ipv4/udp         5316 -> 5168          -148 / -2.784048 %
libmspack  test/cabextract_md5                      24284 -> 24144         -140 / -0.576511 %
linux-2.4.23-pre3-testplatform  net/ipv4/tcp_ipv4   15368 -> 15240         -128 / -0.832900 %
Nathan Froyd April 10, 2012, 2:14 p.m. UTC | #3
----- Original Message -----
> > BTW, do you have the numbers of CSiBE with this?
> 
> Only for "-m4-single -ml -O2 -mpretend-cmove" so far.
> Not so spectacular :T
> I'll also do a comparison of more variants to see if something went
> really bad.  It's a bit difficult to isolate the degradations because
> there's quite some code reordering happening after the patch...

Are you looking at execution time or code size?  If the latter, you could try disabling scheduling for comparison purposes (-fno-schedule-insns -fno-schedule-insns2).  Even if you're looking at execution time, disabling scheduling might make it easier to see where things are going wrong in the patched code.

-Nathan
Oleg Endo April 10, 2012, 2:18 p.m. UTC | #4
On Tue, 2012-04-10 at 07:14 -0700, Nathan Froyd wrote:
> ----- Original Message -----
> > > BTW, do you have the numbers of CSiBE with this?
> > 
> > Only for "-m4-single -ml -O2 -mpretend-cmove" so far.
> > Not so spectacular :T
> > I'll also do a comparison of more variants to see if something went
> > really bad.  It's a bit difficult to isolate the degradations because
> > there's quite some code reordering happening after the patch...
> 
> Are you looking at execution time or code size?  If the latter, you
> could try disabling scheduling for comparison purposes
> (-fno-schedule-insns -fno-schedule-insns2).  Even if you're looking at
> execution time, disabling scheduling might make it easier to see where
> things are going wrong in the patched code.

Code size only.  Thanks for the hint, will try it out.

Cheers,
Oleg
Kaz Kojima April 11, 2012, 1:10 p.m. UTC | #5
Oleg Endo <oleg.endo@t-online.de> wrote:
>> BTW, do you have the numbers of CSiBE with this?
>> 
> 
> Only for "-m4-single -ml -O2 -mpretend-cmove" so far.
> Not so spectacular :T
> I'll also do a comparison of more variants to see if something went
> really bad.  It's a bit difficult to isolate the degradations because
> there's quite some code reordering happening after the patch... 

Thanks for numbers!  Looks good considering that HImode would be
less frequently used than QImode in the usual working set.

Regards,
	kaz
Oleg Endo April 11, 2012, 1:40 p.m. UTC | #6
On Wed, 2012-04-11 at 22:10 +0900, Kaz Kojima wrote:
> Oleg Endo <oleg.endo@t-online.de> wrote:
> >> BTW, do you have the numbers of CSiBE with this?
> >> 
> > 
> > Only for "-m4-single -ml -O2 -mpretend-cmove" so far.
> > Not so spectacular :T
> > I'll also do a comparison of more variants to see if something went
> > really bad.  It's a bit difficult to isolate the degradations because
> > there's quite some code reordering happening after the patch... 
> 
> Thanks for numbers!  Looks good considering that HImode would be
> less frequently used than QImode in the usual working set.
> 
Sure, no problem.
I think there is some room for improvement in the
'sh_find_mov_disp_adjust' function.  If it was a bit smarter, all the
displacement move insns would instantly benefit from it.

Cheers,
Oleg
diff mbox

Patch

Index: gcc/config/sh/predicates.md
===================================================================
--- gcc/config/sh/predicates.md	(revision 186233)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -404,7 +404,7 @@ 
       if (GET_CODE (x) == PLUS
 	  && REG_P (XEXP (x, 0))
 	  && CONST_INT_P (XEXP (x, 1)))
-	return sh_legitimate_index_p (mode, XEXP (x, 1));
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
     }
 
   if (TARGET_SHMEDIA
@@ -466,7 +466,7 @@ 
       if (GET_CODE (x) == PLUS
 	  && REG_P (XEXP (x, 0))
 	  && CONST_INT_P (XEXP (x, 1)))
-	return sh_legitimate_index_p (mode, XEXP (x, 1));
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
     }
 
   return general_operand (op, mode);
Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 186233)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -56,7 +56,7 @@ 
 extern bool fp_zero_operand (rtx);
 extern bool fp_one_operand (rtx);
 extern rtx get_fpscr_rtx (void);
-extern bool sh_legitimate_index_p (enum machine_mode, rtx);
+extern bool sh_legitimate_index_p (enum machine_mode, rtx, bool, bool);
 extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int);
 extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
 extern bool nonpic_symbol_mentioned_p (rtx);
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 186233)
+++ gcc/config/sh/sh.c	(working copy)
@@ -304,6 +304,7 @@ 
 static int mov_insn_size (enum machine_mode, bool);
 static int max_mov_insn_displacement (enum machine_mode, bool);
 static int mov_insn_alignment_mask (enum machine_mode, bool);
+static HOST_WIDE_INT disp_addr_displacement (rtx);
 
 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
 
@@ -3160,11 +3161,6 @@ 
      scale the max. displacement value accordingly.  */
   const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
 
-  /* FIXME: HImode with displacement addressing is not supported yet.
-     Make it purposefully fail for now.  */
-  if (mode == HImode)
-    return 0;
-
   /* SH2A supports FPU move insns with 12 bit displacements.
      Other variants to do not support any kind of displacements for
      FPU move insns.  */
@@ -3194,15 +3190,24 @@ 
   return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
 }
 
+/* Return the displacement value of a displacement address.  */
+
+static inline HOST_WIDE_INT
+disp_addr_displacement (rtx x)
+{
+  gcc_assert (satisfies_constraint_Sdd (x));
+  return INTVAL (XEXP (XEXP (x, 0), 1));
+}
+
 /* Compute the cost of an address.  */
 
 static int
 sh_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
 {
   /* 'reg + disp' addressing.  */
-  if (DISP_ADDR_P (x))
+  if (satisfies_constraint_Sdd (x))
     {
-      const HOST_WIDE_INT offset = DISP_ADDR_OFFSET (x);
+      const HOST_WIDE_INT offset = disp_addr_displacement (x);
       const enum machine_mode mode = GET_MODE (x);
 
       /* The displacement would fit into a 2 byte move insn.  */
@@ -9665,7 +9670,8 @@ 
    with MODE.  */
 
 bool
-sh_legitimate_index_p (enum machine_mode mode, rtx op)
+sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
+		       bool allow_zero)
 {
   if (! CONST_INT_P (op))
     return false;
@@ -9686,15 +9692,15 @@ 
   else
     {
       const HOST_WIDE_INT offset = INTVAL (op);
-      const int max_disp = max_mov_insn_displacement (mode, TARGET_SH2A);
-      const int align_mask = mov_insn_alignment_mask (mode, TARGET_SH2A);
+      const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
+      const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
 
       /* If the mode does not support any displacement always return false.
 	 Even though an index of '0' is actually always valid, it will cause
 	 troubles when e.g. a DFmode move is split into two SFmode moves,
 	 where one SFmode move will have index '0' and the other move will
 	 have index '4'.  */
-       if (max_disp < 1)
+       if (!allow_zero && max_disp < 1)
 	return false;
 
       return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
@@ -9728,7 +9734,7 @@ 
 
       if (GET_MODE_SIZE (mode) <= 8
 	  && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
-	  && sh_legitimate_index_p (mode, xop1))
+	  && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
 	return true;
 
       if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
@@ -9875,11 +9881,6 @@ 
   if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
     return res;
 
-  /* FIXME: HImode with displacement addressing is not supported yet.
-     Make it purposefully fail for now.  */
-  if (mov_insn_sz == 2)
-    return res;
-
   /* Keeps the previous behavior for QImode displacement addressing.
      This just decides how the offset is re-based.  Removing this special
      case will result in slightly bigger code on average, but it's not that
@@ -12566,12 +12567,14 @@ 
   if (rclass == FPUL_REGS && true_regnum (x) == -1)
     return GENERAL_REGS;
 
-  /* Force mov.b displacement addressing insn to use R0 as the other operand.
+  /* Force mov.b / mov.w displacement addressing insn to use R0 as
+     the other operand.
      On SH2A could also just leave it alone here, which would result in a
      4 byte move insn being generated instead.  However, for this to work
      the insns must have the appropriate alternatives.  */
-  if (mode == QImode && rclass != R0_REGS
-      && DISP_ADDR_P (x) && DISP_ADDR_OFFSET (x) < 16)
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS
+      && satisfies_constraint_Sdd (x)
+      && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
     return R0_REGS;
 
   /* When reload is trying to address a QImode or HImode subreg on the stack, 
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h	(revision 186233)
+++ gcc/config/sh/sh.h	(working copy)
@@ -1210,24 +1210,12 @@ 
   ((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \
    || (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32))
 
-#define CONST_OK_FOR_K04(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
-				 && ((HOST_WIDE_INT)(VALUE)) <= 15)
-
 #define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
 				 && ((HOST_WIDE_INT)(VALUE)) <= 255)
 
-#define CONST_OK_FOR_K12(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
-				 && ((HOST_WIDE_INT)(VALUE)) <= 4095)
-
 #define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
   (((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
 
-#define DISP_ADDR_P(X) (MEM_P (X) && GET_CODE (XEXP (X, 0)) == PLUS \
-			 && REG_P (XEXP (XEXP (X, 0), 0)) \
-			 && CONST_INT_P (XEXP (XEXP (X, 0), 1)))
-
-#define DISP_ADDR_OFFSET(X) (INTVAL (XEXP (XEXP (X, 0), 1)))
-
 #if 0
 #define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \
   ((((REGCLASS_HAS_FP_REG (CLASS) 					\
Index: gcc/config/sh/constraints.md
===================================================================
--- gcc/config/sh/constraints.md	(revision 186234)
+++ gcc/config/sh/constraints.md	(working copy)
@@ -145,16 +145,28 @@ 
   (and (match_code "const_int")
        (match_test "ival >= 0 && ival <= 15")))
 
+(define_constraint "K05"
+  "An unsigned 5-bit constant, as used in mov.w displacement addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
 (define_constraint "K08"
   "An unsigned 8-bit constant, as used in and, or, etc."
   (and (match_code "const_int")
        (match_test "ival >= 0 && ival <= 255")))
  
 (define_constraint "K12"
-  "An unsigned 12-bit constant, as used in SH2A 12-bit displacement addressing."
+  "An unsigned 12-bit constant, as used in SH2A 12-bit mov.b displacement
+   addressing."
   (and (match_code "const_int")
        (match_test "ival >= 0 && ival <= 4095")))
 
+(define_constraint "K13"
+  "An unsigned 13-bit constant, as used in SH2A 12-bit mov.w displacement
+   addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 8191")))
+
 (define_constraint "K16"
   "An unsigned 16-bit constant, as used in SHmedia shori."
   (and (match_code "const_int")
@@ -262,6 +274,16 @@ 
   (and (match_test "memory_operand (op, GET_MODE (op))")
        (match_test "GET_CODE (XEXP (op, 0)) != PLUS")))
 
+(define_memory_constraint "Sdd"
+  "A memory reference that uses displacement addressing."
+  (and (match_test "MEM_P (op) && GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Snd"
+  "A memory reference that excludes displacement addressing."
+  (match_test "! satisfies_constraint_Sdd (op)"))
+
 (define_memory_constraint "Sbv"
   "A memory reference, as used in SH2A bclr.b, bset.b, etc."
   (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
@@ -269,15 +291,7 @@ 
 
 (define_memory_constraint "Sbw"
   "A memory reference, as used in SH2A bclr.b, bset.b, etc."
-  (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
-       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
-       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+  (and (match_test "satisfies_constraint_Sdd (op)")
+       (match_test "GET_MODE (op) == QImode")
        (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
 
-(define_memory_constraint "Snd"
-  "A memory reference that excludes displacement addressing."
-  (match_test "! DISP_ADDR_P (op)"))
-
-(define_memory_constraint "Sdd"
-  "A memory reference that uses displacement addressing."
-  (match_test "DISP_ADDR_P (op)"))
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 186233)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4768,20 +4768,18 @@ 
     operands[1] = XEXP (operands[1], 0);
 })
 
+;; FIXME: Maybe fold HImode and QImode stuff with mode iterator?
 (define_expand "extendhisi2"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
-	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "")))]
   ""
   "")
 
-(define_insn "*extendhisi2_compact"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
-	(sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))]
-  "TARGET_SH1"
-  "@
-	exts.w	%1,%0
-	mov.w	%1,%0"
-  [(set_attr "type" "arith,load")])
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "")))]
+  ""
+  "")
 
 (define_insn "*extendhisi2_media"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4811,12 +4809,6 @@ 
 			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
 })
 
-(define_expand "extendqisi2"
-  [(set (match_operand:SI 0 "arith_reg_dest" "")
-	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "")))]
-  ""
-  "")
-
 (define_insn "*extendqisi2_compact_reg"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
 	(sign_extend:SI (match_operand:QI 1 "register_operand" "r,t")))]
@@ -4826,6 +4818,15 @@ 
 	movt	%0"
   [(set_attr "type" "arith,arith")])
 
+(define_insn "*extendhisi2_compact_reg"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r,t")))]
+  "TARGET_SH1"
+  "@
+	exts.w	%1,%0
+	movt	%0"
+  [(set_attr "type" "arith,arith")])
+
 ;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute.
 ;; See movqi insns.
 (define_insn "*extendqisi2_compact_mem_disp"
@@ -4833,20 +4834,31 @@ 
 	(sign_extend:SI
 	 (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
 			  (match_operand:SI 2 "const_int_operand" "K04,N")))))]
-  "TARGET_SH1 && ! TARGET_SH2A && CONST_OK_FOR_K04 (INTVAL (operands[2]))"
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[2], false, true)"
   "@
 	mov.b	@(%O2,%1),%0
 	mov.b	@%1,%0"
   [(set_attr "type" "load")])
 
+(define_insn "*extendhisi2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+			  (match_operand:SI 2 "const_int_operand" "K05,N")))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[2], false, true)"
+  "@
+	mov.w	@(%O2,%1),%0
+	mov.w	@%1,%0"
+  [(set_attr "type" "load")])
+
 (define_insn "*extendqisi2_compact_mem_disp"
   [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r")
 	(sign_extend:SI
 	 (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
 			  (match_operand:SI 2 "const_int_operand" "K04,N,K12")))))]
-  "TARGET_SH2A
-   && (CONST_OK_FOR_K04 (INTVAL (operands[2]))
-       || (CONST_OK_FOR_K12 (INTVAL (operands[2]))))"
+  "TARGET_SH2A && sh_legitimate_index_p (QImode, operands[2], true, true)"
   "@
 	mov.b	@(%O2,%1),%0
 	mov.b	@%1,%0
@@ -4854,8 +4866,23 @@ 
   [(set_attr "type" "load")
    (set_attr "length" "2,2,4")])
 
-;; This will take care of other QImode addressing modes than displacement
-;; addressing.
+(define_insn "*extendhisi2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+			  (match_operand:SI 2 "const_int_operand" "K05,N,K13")))))]
+  "TARGET_SH2A && sh_legitimate_index_p (HImode, operands[2], true, true)"
+  "@
+	mov.w	@(%O2,%1),%0
+	mov.w	@%1,%0
+	mov.w	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
+;; The *_snd patterns will take care of other QImode/HImode addressing
+;; modes than displacement addressing.  They must be defined _after_ the
+;; displacement addressing patterns.  Otherwise the displacement addressing
+;; patterns will not be picked.
 (define_insn "*extendqisi2_compact_snd"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
 	(sign_extend:SI (match_operand:QI 1 "movsrc_no_disp_mem_operand" "Snd")))]
@@ -4863,6 +4890,13 @@ 
   "mov.b	%1,%0"
   [(set_attr "type" "load")])
 
+(define_insn "*extendhisi2_compact_snd"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(sign_extend:SI (match_operand:HI 1 "movsrc_no_disp_mem_operand" "Snd")))]
+  "TARGET_SH1"
+  "mov.w	%1,%0"
+  [(set_attr "type" "load")])
+
 (define_insn "*extendqisi2_media"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
@@ -5441,6 +5475,14 @@ 
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "")
+	(match_operand:HI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, HImode);
+})
+
 (define_expand "movqi"
   [(set (match_operand:QI 0 "general_operand" "")
 	(match_operand:QI 1 "general_operand" ""))]
@@ -5456,6 +5498,7 @@ 
 ;; With the movqi_reg_reg being specified before movqi it will be intially 
 ;; picked to load/store regs.  If the regs regs are on the stack reload will
 ;; try other insns and not stick to movqi_reg_reg.
+;; The same applies to the movhi variants.
 (define_insn "*movqi_reg_reg"
   [(set (match_operand:QI 0 "arith_reg_dest"   "=r,r")
 	(match_operand:QI 1 "register_operand" "r,t"))]
@@ -5465,44 +5508,82 @@ 
 	movt	%0"
   [(set_attr "type" "move,arith")])
 
+(define_insn "*movhi_reg_reg"
+  [(set (match_operand:HI 0 "arith_reg_dest"   "=r,r")
+	(match_operand:HI 1 "register_operand" "r,t"))]
+  "TARGET_SH1"
+  "@
+	mov	%1,%0
+	movt	%0"
+  [(set_attr "type" "move,arith")])
+
 ;; FIXME: The non-SH2A and SH2A variants should be combined by adding
 ;; "enabled" attribute as it is done in other targets.
 (define_insn "*movqi_store_mem_disp04"
   [(set (mem:QI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r")
 			 (match_operand:SI 1 "const_int_operand" "K04,N")))
 	(match_operand:QI 2 "arith_reg_operand" "z,r"))]
-  "TARGET_SH1 && CONST_OK_FOR_K04 (INTVAL (operands[1]))"
+  "TARGET_SH1 && sh_legitimate_index_p (QImode, operands[1], false, true)"
   "@
 	mov.b	%2,@(%O1,%0)
 	mov.b	%2,@%0"
   [(set_attr "type" "store")])
 
+(define_insn "*movhi_store_mem_disp05"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r")
+			 (match_operand:SI 1 "const_int_operand" "K05,N")))
+	(match_operand:HI 2 "arith_reg_operand" "z,r"))]
+  "TARGET_SH1 && sh_legitimate_index_p (HImode, operands[1], false, true)"
+  "@
+	mov.w	%2,@(%O1,%0)
+	mov.w	%2,@%0"
+  [(set_attr "type" "store")])
+
 (define_insn "*movqi_store_mem_disp12"
   [(set (mem:QI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
 			 (match_operand:SI 1 "const_int_operand" "K12")))
 	(match_operand:QI 2 "arith_reg_operand" "r"))]
-  "TARGET_SH2A && CONST_OK_FOR_K12 (INTVAL (operands[1]))"
+  "TARGET_SH2A && sh_legitimate_index_p (QImode, operands[1], true, true)"
   "mov.b	%2,@(%O1,%0)"
   [(set_attr "type" "store")
    (set_attr "length" "4")])
 
+(define_insn "*movhi_store_mem_disp13"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+			 (match_operand:SI 1 "const_int_operand" "K13")))
+	(match_operand:HI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH2A && sh_legitimate_index_p (HImode, operands[1], true, true)"
+  "mov.w	%2,@(%O1,%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
 (define_insn "*movqi_load_mem_disp"
   [(set (match_operand:QI 0 "arith_reg_dest" "=z,r")
 	(mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
 			 (match_operand:SI 2 "const_int_operand" "K04,N"))))]
-  "TARGET_SH1 && ! TARGET_SH2A && CONST_OK_FOR_K04 (INTVAL (operands[2]))"
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[2], false, true)"
   "@
 	mov.b	@(%O2,%1),%0
 	mov.b	@%1,%0"
   [(set_attr "type" "load")])
 
+(define_insn "*movhi_load_mem_disp"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=z,r")
+	(mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+			 (match_operand:SI 2 "const_int_operand" "K05,N"))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[2], false, true)"
+  "@
+	mov.w	@(%O2,%1),%0
+	mov.w	@%1,%0"
+  [(set_attr "type" "load")])
+
 (define_insn "*movqi_load_mem_disp"
   [(set (match_operand:QI 0 "arith_reg_dest" "=z,r,r")
 	(mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
 			 (match_operand:SI 2 "const_int_operand" "K04,N,K12"))))]
-  "TARGET_SH2A
-   && (CONST_OK_FOR_K04 (INTVAL (operands[2]))
-       || CONST_OK_FOR_K12 (INTVAL (operands[2])))"
+  "TARGET_SH2A && sh_legitimate_index_p (QImode, operands[2], true, true)"
   "@
 	mov.b	@(%O2,%1),%0
 	mov.b	@%1,%0
@@ -5510,6 +5591,18 @@ 
   [(set_attr "type" "load")
    (set_attr "length" "2,2,4")])
 
+(define_insn "*movhi_load_mem_disp"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=z,r,r")
+	(mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+			 (match_operand:SI 2 "const_int_operand" "K05,N,K13"))))]
+  "TARGET_SH2A && sh_legitimate_index_p (HImode, operands[2], true, true)"
+  "@
+	mov.w	@(%O2,%1),%0
+	mov.w	@%1,%0
+	mov.w	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
 ;; The m constraints basically allow any kind of addresses to be used with any
 ;; source/target register as the other operand.  This is not true for 
 ;; displacement addressing modes on anything but SH2A.  That's why the
@@ -5528,6 +5621,21 @@ 
 	lds	%1,%0"
  [(set_attr "type" "movi8,load,store,prget,prset")])
 
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,m,r,l")
+	(match_operand:HI 1 "general_movsrc_operand"  "Q,i,m,r,l,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], HImode)
+       || arith_reg_operand (operands[1], HImode))"
+  "@
+	mov.w	%1,%0
+	mov	%1,%0
+	mov.w	%1,%0
+	mov.w	%1,%0
+	sts	%1,%0
+	lds	%1,%0"
+ [(set_attr "type" "pcload,movi8,load,store,prget,prset")])
+
 (define_insn "*movqi_media"
   [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
 	(match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))]
@@ -5560,28 +5668,6 @@ 
   operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
 })
 
-;; When storing r0, we have to avoid reg+reg addressing.
-(define_insn "movhi_i"
-  [(set (match_operand:HI 0 "general_movdst_operand"   "=r,r,r,r,m,r,l,r")
-	(match_operand:HI 1 "general_movsrc_operand" "Q,rI08,m,t,r,l,r,i"))]
-  "TARGET_SH1
-   && (arith_reg_operand (operands[0], HImode)
-       || arith_reg_operand (operands[1], HImode))
-   && (!MEM_P (operands[0])
-       || GET_CODE (XEXP (operands[0], 0)) != PLUS
-       || !REG_P (XEXP (XEXP (operands[0], 0), 1))
-       || ! refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0))"
-  "@
-	mov.w	%1,%0
-	mov	%1,%0
-	mov.w	%1,%0
-	movt	%0
-	mov.w	%1,%0
-	sts	%1,%0
-	lds	%1,%0
-	fake	%1,%0"
-  [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")])
-
 (define_insn "*movhi_media"
   [(set (match_operand:HI 0 "general_movdst_operand"     "=r,r,r,r,m")
 	(match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))]
@@ -5607,14 +5693,6 @@ 
    && ! satisfies_constraint_I16 (operands[1])"
   [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
 
-(define_expand "movhi"
-  [(set (match_operand:HI 0 "general_movdst_operand" "")
-	(match_operand:HI 1 "general_movsrc_operand"  ""))]
-  ""
-{
-  prepare_move_operands (operands, HImode);
-})
-
 (define_expand "reload_inhi"
   [(set (match_operand:SI 2 "" "=&r")
 	(match_operand:HI 1 "inqhi_operand" ""))
@@ -11563,10 +11641,10 @@ 
 
 ;; Fold sequence:
 ;;	mov #54,r0
-;;	mov.b @(r0,r15),r0
+;;	mov.{b,w} @(r0,r15),r0
 ;;	mov r0,r3
 ;; into:
-;;	mov.b @(54,r15),r3
+;;	mov.{b,w} @(54,r15),r3
 ;;
 (define_peephole2
   [(set (match_operand:SI 0 "arith_reg_dest" "")
@@ -11578,17 +11656,33 @@ 
    (set (match_operand:QI 4 "arith_reg_dest" "")
 	(match_operand:QI 5 "arith_reg_operand" ""))]
   "TARGET_SH2A
-   && CONST_OK_FOR_K12 (INTVAL (operands[1]))
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
    && REGNO (operands[2]) == REGNO (operands[5])
    && peep2_reg_dead_p (3, operands[5])"
   [(set (match_dup 4) (mem:QI (plus:SI (match_dup 3) (match_dup 1))))]
   "")
 
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))
+   (set (match_operand:HI 4 "arith_reg_dest" "")
+	(match_operand:HI 5 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && REGNO (operands[2]) == REGNO (operands[5])
+   && peep2_reg_dead_p (3, operands[5])"
+  [(set (match_dup 4) (mem:HI (plus:SI (match_dup 3) (match_dup 1))))]
+  "")
+
 ;; Fold sequence:
 ;;	mov #54,r0
-;;	mov.b @(r0,r15),r1
+;;	mov.{b,w} @(r0,r15),r1
 ;; into:
-;;	mov.b @(54,r15),r1
+;;	mov.{b,w} @(54,r15),r1
 ;;
 (define_peephole2
   [(set (match_operand:SI 0 "arith_reg_dest" "")
@@ -11598,19 +11692,37 @@ 
 	 (mem:QI (plus:SI (match_dup 0)
 			  (match_operand:SI 3 "arith_reg_operand" "")))))]
   "TARGET_SH2A
-   && CONST_OK_FOR_K12 (INTVAL (operands[1]))
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
    && (peep2_reg_dead_p (2, operands[0])
        || REGNO (operands[0]) == REGNO (operands[2]))"
   [(set (match_dup 2)
 	(sign_extend:SI (mem:QI (plus:SI (match_dup 3) (match_dup 1)))))]
   "")
 
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	 (sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && (peep2_reg_dead_p (2, operands[0])
+       || REGNO (operands[0]) == REGNO (operands[2]))"
+  [(set (match_dup 2)
+	(sign_extend:SI (mem:HI (plus:SI (match_dup 3) (match_dup 1)))))]
+  "")
+
 ;; Fold sequence:
-;;	mov.b @(r0,r15),r0
+;;	mov.{b,w} @(r0,r15),r0
 ;;	mov r0,r3
 ;; into:
-;;	mov.b @(r0,r15),r3
+;;	mov.{b,w} @(r0,r15),r3
 ;;
+;; This can happen when initially a displacement address is picked, where
+;; the destination reg is fixed to r0, and then the address is transformed
+;; into 'r0 + reg'.
 (define_peephole2
   [(set (match_operand:SI 0 "arith_reg_dest" "")
 	(sign_extend:SI
@@ -11625,6 +11737,20 @@ 
 	(mem:QI (plus:SI (match_dup 1) (match_dup 2))))]
   "")
 
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" "")))))
+   (set (match_operand:HI 3 "arith_reg_dest" "")
+	(match_operand:HI 4 "arith_reg_operand" ""))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+	(mem:HI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
 ;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn'
 ;; to `mov #k,r0; mov.l @(r0,r15),rn'.  These sequences are generated by
 ;; reload when the constant is too large for a reg+offset address.