Oberon Community Platform Forum
December 12, 2019, 09:14:19 AM *
Welcome, Guest. Please login or register.
Did you miss your activation email?

Login with username, password and session length
News:
 
   Home   Help Search Login Register  
Pages: [1]
  Print  
Author Topic: Floating point precission or representation is wrong  (Read 3250 times)
sage
Full Member
***
Posts: 170



WWW
« on: June 14, 2008, 07:10:42 PM »

Code:
MODULE TestFloat;
IMPORT StdLog := AosOut, Utilities;
PROCEDURE Test*;
VAR
str: ARRAY 32 OF CHAR;
f: LONGREAL;
BEGIN
f := 1.0;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := 0.1;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := 0.2;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := 0.01;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := 0.02;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := -1.0;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := -0.1;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
f := -0.01;
StdLog.String("f = "); Utilities.FloatToStr(f, 0, 18, 0, str); StdLog.String(str); StdLog.Ln;
END Test;
END TestFloat.

S.Free TestFloat~
TestFloat.Test~
TestFloat.Test~ produces strange output in log:
Quote
f =   1.000000000000000000
f =   0.100000001490116100
f =   0.200000002980232200
f =   0.009999999776482582
f =   0.019999999552965160
f =  -1.000000000000000000
f =  -0.100000001490116100
f =  -0.009999999776482582
Actually numbers in garbage has some relations... 29802322 = 14901161 * 2


Identical code for BlackBox Component Builder 1.5 and results:

Code:
PROCEDURE Test*;
VAR
f: LONGREAL;
BEGIN
f := 1.0;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := 0.1;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := 0.2;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := 0.01;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := 0.02;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := -1.0;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := -0.1;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
f := -0.01;
StdLog.String("f = "); StdLog.Real(f); StdLog.Ln;
END Test;
Quote
f =  1.0
f =  0.1
f =  0.2
f =  0.01
f =  0.02
f =  -1.0
f =  -0.1
f =  -0.01

I've tryed to check if floating point precission are IEEE-compliant. See description here: http://www.fizyka.umk.pl/nrbook/c20-1.pdf
Adopted version of CpcFloat.Mod from http://www.zinnamturm.eu/pac/CpcFloat.txt attached to post

Achieved results:

BlackBox Component Builder 1.5 (type REAL):
Quote
ibeta =  2
it =  53
machep =  -52
eps =  2.220446049250313E-16
negep =  -53
epsneg =  1.110223024625157E-16
iexp =  11
minexp =  -1022
xmin =  2.225073858507201E-308
maxexp =  1024
xmax =  1.797693134862316E+308
irnd =  5
ngrd =  0

Bluebottle / WinAos (type LONGREAL):
Quote
ibeta = 2
it = 53
machep = -52
eps =   0.00000000000000022204
negep = -53
epsneg =   0.00000000000000011102
iexp = 11
minexp = -1022
xmin =   0.00000000000000000000
maxexp = 1024
xmax =   17976931348623150000000000000
irnd = 5
ngrd = 0

Probably results are IEEE-compliant, except for values xmin and xmax because of wrong representation.

Further inspectiong of code shows strong difference in corresponding code used for converting floating point values to strings.
Aos:
Code:
PROCEDURE FloatToStr*(x: LONGREAL; n, f, D: LONGINT; VAR str: ARRAY OF CHAR);
VAR pos, len, e, i, h, l: LONGINT; r, z: LONGREAL; d: ARRAY 16 OF CHAR; s: CHAR;

PROCEDURE Wr(ch: CHAR);
BEGIN IF pos < len THEN str[pos] := ch; INC(pos) END;
END Wr;

BEGIN
len := LEN(str)-1; pos := 0;
e := Reals.ExpoL(x);
IF (e = 2047) OR (ABS(D) > 308) THEN
Wr("N"); Wr("a"); Wr("N")
ELSE
IF D = 0 THEN DEC(n, 2) ELSE DEC(n, 7) END;
IF n < 2 THEN n := 2 END;
IF f < 0 THEN f := 0 END;
IF n < f + 2 THEN n := f + 2 END;
DEC(n, f);
IF (e # 0) & (x < 0) THEN s := "-"; x := - x ELSE s := " " END;
IF e = 0 THEN
h := 0; l := 0; DEC(e, D-1) (* no denormals *)
ELSE
e := (e - 1023) * 301029 DIV 1000000; (* ln(2)/ln(10) = 0.301029996 *)
z := Reals.Ten(e+1);
IF x >= z THEN x := x/z; INC(e) ELSE x:= x * Reals.Ten(-e) END;
DEC(e, D-1); i := -(e+f);
IF i <= 0 THEN r := 5 * Reals.Ten(i) ELSE r := 0 END;
IF x >= 10 THEN
x := x * Reals.Ten(-1) + r; INC(e)
ELSE
x := x + r;
IF x >= 10 THEN x := x * Reals.Ten(-1); INC(e) END
END;
x := x * Reals.Ten(7); h:= ENTIER(x); x := (x-h) * Reals.Ten(8); l := ENTIER(x)
END;
i := 15;
WHILE i > 7 DO d[i] := CHR(l MOD 10 + ORD("0")); l := l DIV 10; DEC(i) END;
WHILE i >= 0 DO d[i] := CHR(h MOD 10 + ORD("0")); h := h DIV 10; DEC(i) END;
IF n <= e THEN n := e + 1 END;
IF e > 0 THEN
WHILE n > e DO Wr(" "); DEC(n) END;
Wr(s); e:= 0;
WHILE n > 0 DO
DEC(n);
IF e < 16 THEN Wr(d[e]); INC(e) ELSE Wr("0") END
END;
Wr(".")
ELSE
WHILE n > 1 DO Wr(" "); DEC(n) END;
Wr(s); Wr("0"); Wr(".");
WHILE (0 < f) & (e < 0) DO Wr("0"); DEC(f); INC(e) END
END;
WHILE f > 0 DO
DEC(f);
IF e < 16 THEN Wr(d[e]); INC(e) ELSE Wr("0") END
END;
IF D # 0 THEN
IF D < 0 THEN Wr("D"); Wr("-"); D := - D
ELSE Wr("D"); Wr("+")
END;
Wr(CHR(D DIV 100 + ORD("0"))); D := D MOD 100;
Wr(CHR(D DIV 10 + ORD("0"))); Wr(CHR(D MOD 10 + ORD("0")))
END
END;
str[pos] := 0X
END FloatToStr;

BlackBox Component Builder:
Code:
PROCEDURE RealToStringForm* (x: REAL; precision, minW, expW: INTEGER; fillCh: CHAR;
OUT s: ARRAY OF CHAR);
VAR exp, len, i, j, n, k, p: INTEGER; m: ARRAY 80 OF CHAR; neg: BOOLEAN;
BEGIN
ASSERT((precision > 0) (*& (precision <= 18)*), 20);
ASSERT((minW >= 0) & (minW < LEN(s)), 21);
ASSERT((expW > -LEN(s)) & (expW <= 3), 22);
exp := Math.Exponent(x);
IF exp = MAX(INTEGER) THEN
IF fillCh = "0" THEN fillCh := digitspace END;
x := Math.Mantissa(x);
IF x = -1 THEN m := "-inf"; n := 4
ELSIF x = 1 THEN m := "inf"; n := 3
ELSE m := "nan"; n := 3
END;
i := 0; j := 0;
WHILE minW > n DO s[i] := fillCh; INC(i); DEC(minW) END;
WHILE (j <= n) & (i < LEN(s)) DO s[i] := m[j]; INC(i); INC(j) END
ELSE
neg := FALSE; len := 1; m := "00";
IF x < 0 THEN x := -x; neg := TRUE; DEC(minW) END;
IF x # 0 THEN
exp := (exp - 8) * 30103 DIV 100000; (* * log(2) *)
IF exp > 0 THEN
n := SHORT(ENTIER(x / Math.IntPower(10, exp)));
x := x / Math.IntPower(10, exp) - n
ELSIF exp > -maxExp THEN
n := SHORT(ENTIER(x * Math.IntPower(10, -exp)));
x := x * Math.IntPower(10, -exp) - n
ELSE
n := SHORT(ENTIER(x * Math.IntPower(10, -exp - 2 * maxDig) * factor * factor));
x := x * Math.IntPower(10, -exp - 2 * maxDig) * factor * factor - n
END;
(* x0 = (n + x) * 10^exp, 200 < n < 5000 *)
p := precision - 4;
IF n < 1000 THEN INC(p) END;
IF (expW < 0) & (p > exp - expW) THEN p := exp - expW END;
IF p >= 0 THEN
x := x + 0.5 / Math.IntPower(10, p); (* rounding correction *)
IF x >= 1 THEN INC(n); x := x - 1 END
ELSIF p = -1 THEN INC(n, 5)
ELSIF p = -2 THEN INC(n, 50)
ELSIF p = -3 THEN INC(n, 500)
END;
i := 0; k := 1000; INC(exp, 3);
IF n < 1000 THEN k := 100; DEC(exp) END;
WHILE (i < precision) & ((k > 0) OR (x # 0)) DO
IF k > 0 THEN p := n DIV k; n := n MOD k; k := k DIV 10
ELSE x := x * 10; p := SHORT(ENTIER(x)); x := x - p
END;
m[i] := CHR(p + ORD("0")); INC(i);
IF p # 0 THEN len := i END
END
END;
(* x0 = m[0].m[1]...m[len-1] * 10^exp *)
i := 0;
IF (expW < 0) OR (expW = 0) & (exp >= -3) & (exp <= len + 1) THEN
n := exp + 1; k := len - n;
IF n < 1 THEN n := 1 END;
IF expW < 0 THEN k := -expW ELSIF k < 1 THEN k := 1 END;
j := minW - n - k - 1; p := -exp;
IF neg & (p >= MAX(0, n) + MAX(0, k)) THEN neg := FALSE; INC(j) END
ELSE
IF ABS(exp) >= 100 THEN expW := 3
ELSIF (expW < 2) & (ABS(exp) >= 10) THEN expW := 2
ELSIF expW < 1 THEN expW := 1
END;
IF len < 2 THEN len := 2 END;
j := minW - len - 3 - expW; k := len;
IF j > 0 THEN
k := k + j; j := 0;
IF k > precision THEN j := k - precision; k := precision END
END;
n := 1; DEC(k); p := 0
END;
IF neg & (fillCh = "0") THEN s[i] := "-"; INC(i); neg := FALSE END;
WHILE j > 0 DO s[i] := fillCh; INC(i); DEC(j) END;
IF neg & (i < LEN(s)) THEN s[i] := "-"; INC(i) END;
j := 0;
WHILE (n > 0) & (i < LEN(s)) DO
IF (p <= 0) & (j < len) THEN s[i] := m[j]; INC(j) ELSE s[i] := "0" END;
INC(i); DEC(n); DEC(p)
END;
IF i < LEN(s) THEN s[i] := "."; INC(i) END;
WHILE (k > 0) & (i < LEN(s)) DO
IF (p <= 0) & (j < len) THEN s[i] := m[j]; INC(j) ELSE s[i] := "0" END;
INC(i); DEC(k); DEC(p)
END;
IF expW > 0 THEN
IF i < LEN(s) THEN s[i] := "E"; INC(i) END;
IF i < LEN(s) THEN
IF exp < 0 THEN s[i] := "-"; exp := -exp ELSE s[i] := "+" END;
INC(i)
END;
IF (expW = 3) & (i < LEN(s)) THEN s[i] := CHR(exp DIV 100 + ORD("0")); INC(i) END;
IF (expW >= 2) & (i < LEN(s)) THEN s[i] := CHR(exp DIV 10 MOD 10 + ORD("0")); INC(i) END;
IF i < LEN(s) THEN s[i] := CHR(exp MOD 10 + ORD("0")); INC(i) END
END
END;
IF i < LEN(s) THEN s[i] := 0X ELSE HALT(23) END
END RealToStringForm;



* CpcFloat.Mod (4.67 KB - downloaded 297 times.)
Logged
Pages: [1]
  Print  
 
Jump to:  

Powered by MySQL Powered by PHP Powered by SMF 1.1.21 | SMF © 2015, Simple Machines Valid XHTML 1.0! Valid CSS!