GNU Pascal Homepage - gpc - gpc-announce - gpc-de - gpc-doc
Diese Seite auf deutsch

Mail #8797

Back to main page of archive

Previous mail   Next mail   Unformatted/full headers
Overview  10 days   Subject   Date   Thread   Author  

From: Mirsad Todorovac
Subject: GPC preformance kludge with local initialized arrays
Date: 7 Jun 2003, 19:57:29


Note the following lsiting, and resulting assembly code (i686, Linux):

You might think that the function is very optimized, since it requires
only two comparisons and a lookup in table per character checked?

Alas, GPC does a proper call to the real memcpy() function of complete
``v'' array on each call of function DigitValue() !!!

This makes the run twice as slow as when ``v'' array is made global
(edited):

-----------------------------------------------------
make test arr a=8
116.290u 0.400s 1:59.62 97.5%   0+0k 0+0io 10385pf+0w
time a.out arr a=8
114.190u 0.280s 1:58.53 96.5%   0+0k 0+0io 120pf+0w
114.500u 0.210s 2:03.95 92.5%   0+0k 0+0io 120pf+0w
114.650u 0.040s 1:57.13 97.9%   0+0k 0+0io 121pf+0w
114.840u 0.020s 1:57.00 98.1%   0+0k 0+0io 120pf+0w
115.180u 0.040s 2:05.03 92.1%   0+0k 0+0io 120pf+0w
106.200u 1.390s 2:07.25 84.5%   0+0k 0+0io 120pf+0w
111.700u 0.080s 2:04.02 90.1%   0+0k 0+0io 120pf+0w
make test arr a=8 glob
65.840u 0.160s 1:07.62 97.6%    0+0k 0+0io 10385pf+0w
time a.out arr a=8 glob
63.610u 0.060s 1:06.63 95.5%    0+0k 0+0io 120pf+0w
63.710u 0.030s 1:05.21 97.7%    0+0k 0+0io 120pf+0w
63.620u 0.040s 1:06.02 96.4%    0+0k 0+0io 120pf+0w
63.610u 0.030s 1:05.41 97.2%    0+0k 0+0io 120pf+0w
63.840u 0.000s 1:05.15 97.9%    0+0k 0+0io 120pf+0w
63.980u 0.010s 1:05.29 98.0%    0+0k 0+0io 120pf+0w
64.000u 0.020s 1:05.19 98.2%    0+0k 0+0io 120pf+0w
-----------------------------------------------------

Just FYI, making ``v'' array [0..255] of Integer (for aligned access) made
it even 10s slower (probably problems with FSB and cache), instead of what
is commonly said, and complete code is not a bit faster from this variant:

  function DigitValue (Dig: Char): Integer; attribute (inline, const);
  var d : Integer; attribute (register);
  begin
    if      (Dig >= '0') and (Dig <= '9' ) then
      DigitValue  := Ord (Dig) - Ord ('0')
    else if (Dig >= 'a') and (Dig <= 'z') then
      DigitValue  := Ord (Dig) - Ord ('a') + 10
    else if (Dig >= 'A') and (Dig <= 'Z') then
      DigitValue  := Ord (Dig) - Ord ('A') + 10
    else
      DigitValue := -1
  end;

... even though this code has six branches.

Is it the GNU Pascal problem or the back-end problem?

NOTE: even attribute (const) after initialization of the array didn't help
to evade memcpy().

Mirsad

isvalidnumberbase.pas:
----------------------------------------------------------------------------
program isvalidnum (output);

var i, Base: Integer;
    OK     : Boolean;

function IsValidNumberBase2 (s: String; Base: Integer): Boolean; attribute(const);
var i, dv  : Integer; attribute (register);
    b      : Byte;    attribute (register);
      function DigitValue (Dig: Char): Integer; {attribute (inline, const);}
        var d : Integer; attribute (register);
{$if 0}
{$if Low (Char) < 0}
{$error "this won't work: negative Char used as index}
{$endif}
{$endif}
      v : array [0..255] of ByteInt = (
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
               -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
               25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, -1,
               -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
               25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 );

  begin
    if (Dig >= '0') and (Dig <= 'z') then
      DigitValue := v[Ord (Dig)]
    else
      DigitValue := -1;
  end;

begin
  b := Base;
  Assert ((b >= 2) and (b <= 36), 'base out of range (2..36)');
  i := 1;
  if s[i] = '-' then
    Inc (i);
  for i := i to Length (s) do
{$define VERSION1}
{$ifdef  VERSION1}
  begin
    dv := DigitValue (s[i]);
    if (dv < 0) or (dv >= b) then
      Return False
  end;
 {$else}
  if not (DigitValue (s[i]) in [0 .. b-1]) then
    Return False;
   {$endif}
  Return True;
end;

begin { main }
  OK := IsValidNumberBase2 ('010101010101101010101', 2) and
        not IsValidNumberBase2 ('0101010210101101010101', 2) and
        IsValidNumberBase2 ('0121012210101101010101', 3);

  if OK then
    WriteLn ('OK');
end.
-------------------------------------------------------------------------

isvalidnumberbase.s (edited):
-------------------------------------------------------------------------
	.file	"isvalidnumberbase.pas"
	.local	I
	.comm	I,4,4
	.local	Base
	.comm	Base,4,4
	.local	Ok
	.comm	Ok,1,1
	.section	.rodata
.LC0:
	.byte	-1
.
.
.
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	0
	.byte	1
	.byte	2
	.byte	3
	.byte	4
	.byte	5
	.byte	6
	.byte	7
	.byte	8
	.byte	9
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	10
	.byte	11
	.byte	12
	.byte	13
	.byte	14
	.byte	15
	.byte	16
	.byte	17
	.byte	18
	.byte	19
	.byte	20
	.byte	21
	.byte	22
	.byte	23
	.byte	24
	.byte	25
	.byte	26
	.byte	27
	.byte	28
	.byte	29
	.byte	30
	.byte	31
	.byte	32
	.byte	33
	.byte	34
	.byte	35
	.byte	36
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	10
	.byte	11
	.byte	12
	.byte	13
	.byte	14
	.byte	15
	.byte	16
	.byte	17
	.byte	18
	.byte	19
	.byte	20
	.byte	21
	.byte	22
	.byte	23
	.byte	24
	.byte	25
	.byte	26
	.byte	27
	.byte	28
	.byte	29
	.byte	30
	.byte	31
	.byte	32
	.byte	33
	.byte	34
	.byte	35
	.byte	36
	.byte	-1
	.byte	-1
.
.
.
	.byte	-1
	.byte	-1
	.byte	-1
	.byte	-1
	.text
	.p2align 4,,15
	.type	Digitvalue.0,@function
Digitvalue.0:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$296, %esp
	movl	%ebx, -4(%ebp)
	leal	-280(%ebp), %eax
	movzbl	8(%ebp), %ebx
	movl	%ecx, -12(%ebp)
	movl	$256, 8(%esp)
	movl	$.LC0, 4(%esp)
	movl	%eax, (%esp)
	call	memcpy
	cmpb	$47, %bl
	jbe	.L4
	cmpb	$122, %bl
	ja	.L4
	movzbl	%bl, %eax
	movsbl	-280(%eax,%ebp),%eax
.L5:
	movl	-4(%ebp), %ebx
	movl	%ebp, %esp
	popl	%ebp
	ret
	.p2align 4,,7
.L4:
	movl	$-1, %eax
	jmp	.L5
.Lfe1:
	.size	Digitvalue.0,.Lfe1-Digitvalue.0
	.section	.rodata.str1.1,"aMS",@progbits,1
.LC1:
	.string	"base out of range (2..36)"
	.text
	.p2align 4,,15
.globl Isvalidnumberbase2
	.type	Isvalidnumberbase2,@function
Isvalidnumberbase2:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	subl	$92, %esp
	movl	8(%ebp), %edi
	movzbl	12(%ebp), %eax
	cmpb	$1, %al
	seta	%bl
	movb	%al, -73(%ebp)
	testb	%bl, %bl
	je	.L6
	cmpb	$36, %al
	setbe	%bl
.L6:
	movl	$25, -72(%ebp)
	leal	-64(%ebp), %eax
	movl	$25, 8(%esp)
	movl	$.LC1, 4(%esp)
	movl	%eax, (%esp)
	call	memcpy
	leal	-72(%ebp), %eax
	movl	%eax, 4(%esp)
	movzbl	%bl, %eax
	movl	$1, %ebx
	movl	$25, -68(%ebp)
	movl	%eax, (%esp)
	call	_p_Assert
	cmpb	$45, 8(%edi)
	je	.L18
.L8:
	movl	4(%edi), %esi
	cmpl	%esi, %ebx
	jg	.L9
	movb	$0, -74(%ebp)
	.p2align 4,,15
.L10:
	cmpb	$0, -74(%ebp)
	je	.L12
	cmpl	%esi, %ebx
	je	.L9
	incl	%ebx
.L12:
	movb	$1, -74(%ebp)
	leal	-24(%ebp), %ecx
	movzbl	7(%edi,%ebx), %eax
	movl	%eax, (%esp)
	call	Digitvalue.0
	movl	%eax, %ecx
	shrl	$31, %ecx
	testb	%cl, %cl
	movl	%eax, %edx
	jne	.L17
	movzbl	-73(%ebp), %eax
	cmpl	%eax, %edx
	jl	.L14
	testl	%edx, %edx
	js	.L14
	movb	$1, %cl
.L14:
	testb	%cl, %cl
	je	.L10
.L17:
	xorl	%eax, %eax
.L1:
	addl	$92, %esp
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
	.p2align 4,,7
.L9:
	movb	$1, %al
	jmp	.L1
.L18:
	movl	$2, %ebx
	jmp	.L8
.Lfe2:
	.size	Isvalidnumberbase2,.Lfe2-Isvalidnumberbase2
	.section	.rodata.str1.1
.LC2:
	.string	"010101010101101010101"
.LC5:
	.string	"OK"
.LC4:
	.string	"0121012210101101010101"
.LC3:
	.string	"0101010210101101010101"
	.text
	.p2align 4,,15
.globl pascal_main_program
	.type	pascal_main_program,@function
pascal_main_program:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$136, %esp
	movl	$21, 8(%esp)
	leal	-32(%ebp), %eax
	movl	$21, -40(%ebp)
	movl	$.LC2, 4(%esp)
	movl	%eax, (%esp)
	call	memcpy
	leal	-40(%ebp), %eax
	movl	$21, -36(%ebp)
	movl	$2, 4(%esp)
	movl	%eax, (%esp)
	call	Isvalidnumberbase2
	testb	%al, %al
	jne	.L28
.L21:
	testb	%al, %al
	jne	.L29
.L23:
	movb	%al, Ok
	testb	%al, %al
	jne	.L30
.L19:
	movl	%ebp, %esp
	popl	%ebp
	ret
	.p2align 4,,7
.L30:
	movl	$22, 24(%esp)
	movl	$2, 20(%esp)
	movl	$.LC5, 16(%esp)
	movl	$17, 12(%esp)
	movl	$2, 8(%esp)
	movl	$784, 4(%esp)
	movl	$_p_Output, (%esp)
	call	_p_Internal_Write
	movl	_p_InOutRes, %eax
	testl	%eax, %eax
	je	.L19
	jmp	.L31
	.p2align 4,,7
.L29:
	movl	$22, -104(%ebp)
	leal	-96(%ebp), %eax
	movl	$22, 8(%esp)
	movl	$.LC4, 4(%esp)
	movl	%eax, (%esp)
	call	memcpy
	leal	-104(%ebp), %eax
	movl	$22, -100(%ebp)
	movl	$3, 4(%esp)
	movl	%eax, (%esp)
	call	Isvalidnumberbase2
	jmp	.L23
	.p2align 4,,7
.L28:
	movl	$22, -72(%ebp)
	leal	-64(%ebp), %eax
	movl	$22, 8(%esp)
	movl	$.LC3, 4(%esp)
	movl	%eax, (%esp)
	call	memcpy
	leal	-72(%ebp), %eax
	movl	$22, -68(%ebp)
	movl	$2, 4(%esp)
	movl	%eax, (%esp)
	call	Isvalidnumberbase2
	testb	%al, %al
	sete	%al
	jmp	.L21
.L31:
	call	_p_CheckInOutRes
.Lfe3:
	.size	pascal_main_program,.Lfe3-pascal_main_program
	.data
	.type	ctor_run_condition_14.1,@object
	.size	ctor_run_condition_14.1,1
ctor_run_condition_14.1:
	.byte	0
	.text
	.p2align 4,,15
.globl init_pascal_main_program
	.type	init_pascal_main_program,@function
init_pascal_main_program:
	cmpb	$0, ctor_run_condition_14.1
	pushl	%ebp
	movl	%esp, %ebp
	je	.L34
	popl	%ebp
	ret
	.p2align 4,,7
.L34:
	popl	%ebp
	movb	$1, ctor_run_condition_14.1
	jmp	_p_DoInitProc
.Lfe4:
	.size	init_pascal_main_program,.Lfe4-init_pascal_main_program
	.p2align 4,,15
.globl main
	.type	main,@function
main:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$24, %esp
	movl	__GPC_RTS_VERSION_20030507__, %eax
	movl	16(%ebp), %eax
	andl	$-16, %esp
	movl	%eax, 8(%esp)
	movl	12(%ebp), %eax
	movl	%eax, 4(%esp)
	movl	8(%ebp), %eax
	movl	%eax, (%esp)
	call	_p_initialize
	call	init_pascal_main_program
	call	pascal_main_program
	call	_p_finalize
	movl	%ebp, %esp
	xorl	%eax, %eax
	popl	%ebp
	ret
.Lfe5:
	.size	main,.Lfe5-main
	.ident	"GCC: (GNU) 3.2.1"
-------------------------------------------------------------------------------

Previous mail   Next mail   Unformatted/full headers
Overview  10 days   Subject   Date   Thread   Author  


Replies

Author Subject Date
Frank Heckenbach GPC preformance kludge with local 8 Jun 2003, 02:05:41
CBFalconer GPC preformance kludge with local 7 Jun 2003, 22:14:15
Frank Heckenbach GPC preformance kludge with local 9 Jun 2003, 13:41:53

Back to main page of archive


Note: This page contains information that does not originate from the owner of this web site, but from the authors of the mails archived. The owner of this web site is not responsible for the content of such information. Any use of that infomation requires the consent of the respective author.

Where WWW addresses (URLs) in the mails archived are marked as hyperlinks, this is only for the comfort of the reader. The content of the web pages linked to like this does not necessarily reflect the opinion of the owner of this web site or of the authors of the mails archived. The owner of this web site is not responsible for the content of such web pages. Those pages are explicitly not to be considered as part of the content of this page, but merely as references.


This page was created by Crystal 0.999 (Linux 2.4.27/i686).