forked from cirosantilli/cpp-cheat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
float_h.c
133 lines (92 loc) · 3.11 KB
/
float_h.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
# float.h
Gives characteristics of floating point numbers and of base numerical operations
for the current architecture
All macros that start with FLT have versions starting with:
- DBL for `double`
- LDBL for `long double`
*/
#include "common.h"
int main() {
/*
# Rounding method
# FLT_ROUNDS
Rounding method of sums.
Values:
- -1: indeterminable
- 0: toward zero
- 1: to nearest
- 2: toward positive infinity
- 3: toward negative infinity
TODO can it be changed?
*/
{
printf("FLT_ROUNDS = %d\n", FLT_ROUNDS);
}
/*
# FLT_MIN
Smalles positive number closest to zero that can be represented in a normal float.
Any number with absolute value smaller than this is subnormal,
and support is optional.
*/
{
printf("FLT_MIN = %a\n", FLT_MIN);
printf("DBL_MIN = %a\n", DBL_MIN);
printf("LDBL_MIN = %La\n", LDBL_MIN);
}
/*
# FLT_RADIX
Radix of the mantissa.
TODO wow, there are non radix 2 representation implementations?!
IEEE 754 specifies the 2015-hardware-lowly-implemented radix 10,
maybe that is the major motivation?
# FLT_MANT_DIG
Number of digits bits for the mantissa:
- 24 on 32-bit float
# FLT_MANT_DIG
*/
{
printf("FLT_RADIX = %d\n", FLT_RADIX);
printf("FLT_MANT_DIG = %d\n", FLT_MANT_DIG);
}
#if __STDC_VERSION__ >= 201112L
/*
# subnormal numbers
C11
Defined in IEC 60599.
E.g.:
0.01
Is represented as:
1 * 10^-2
However the exponent has a fixed number of bits, so if the exponent is too small.
A solution to incrase that exponent is to allow number that start with 0.
So if for example -4 is the smallest possible exponent, 10^-5 could be represented as:
0.1 * 10^-4
Such a number that cannot be represented without trailling zeroes is a subnormal number.
The tradeoff is that subnormal numbers have limited precision.
C11 specifies that the implementation of this feature is optional,
and oe can check if those are supported in the implementation via the `HAS_SUBNORM` macros.
As of 2013 hardware support is low but starting to appear.
Before this date, implementations are done on software, and are therefore slow.
The smallest floating normal number is `FLT_MIN`.
Values:
- -1: undeterminable
- 0: no
- 1: yes
*/
{
printf("FLT_HAS_SUBNORM = %d\n", FLT_HAS_SUBNORM);
printf("DBL_HAS_SUBNORM = %d\n", DBL_HAS_SUBNORM);
printf("LDBL_HAS_SUBNORM = %d\n", LDBL_HAS_SUBNORM);
assert(isnormal(LDBL_MIN));
if (LDBL_HAS_SUBNORM) {
long double ldbl_min_2 = LDBL_MIN / 2.0;
printf("LDBL_MIN / 2.0 = %La\n", ldbl_min_2);
assert(ldbl_min_2 != 0);
assert(ldbl_min_2 != LDBL_MIN);
assert(! isnormal(ldbl_min_2));
}
}
#endif
return EXIT_SUCCESS;
}