Skip to content

Commit

Permalink
Faster Python - 15th version
Browse files Browse the repository at this point in the history
  • Loading branch information
tisnik committed May 22, 2024
1 parent 4518633 commit 3c146d9
Showing 1 changed file with 173 additions and 0 deletions.
173 changes: 173 additions & 0 deletions docs/faster_python.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ threading.Thread(target=worker).start()

---

### GIL

* Threads just concurrent, not parallel

![GIL](images/GIL.gif)

---

### Goal

---
Expand Down Expand Up @@ -131,6 +139,15 @@ threading.Thread(target=worker).start()

---

## Python w/o GIL

* Recent CPython optimized slightly
* CPython w/o GIL
- fork of standard CPython
* Most AOT/JIT compilers support "nogil"

---

## Technologies available today

* Cython
Expand Down Expand Up @@ -551,8 +568,10 @@ def function():
## Mypyc

* AOT compiler
- a bit similar to Cython
* Heavily based on type hints
- part of `mypy` package
- might be problematic for large code base

---

Expand All @@ -564,7 +583,10 @@ def function():

* AOT compiler
* Is able to compile into "one file"
- can be used to make application distribution
* Does not require type hints everywhere
* (Very) slow compilation process
- up to 30 minutes for large code base!

---

Expand All @@ -578,6 +600,157 @@ def function():

---

![mandelbrot](images/mandelbrot.png)

---

```python
def calc_mandelbrot(width, height, maxiter, palette):
print("P3")
print("{w} {h}".format(w=width, h=height))
print("255")

cy = -1.5
for y in range(0, height):
cx = -2.0
for x in range(0, width):
zx = 0.0
zy = 0.0
i = 0
while i < maxiter:
zx2 = zx * zx
zy2 = zy * zy
if zx2 + zy2 > 4.0:
break
zy = 2.0 * zx * zy + cy
zx = zx2 - zy2 + cx
i += 1

r = palette[i % 256][0]
g = palette[i % 256][1]
b = palette[i % 256][2]
print("{r} {g} {b}".format(r=r, g=g, b=b))
cx += 3.0/width
cy += 3.0/height
```

---

```python
@cython.cdivision(True)
@cython.nogil
@cython.cfunc
def calc_mandelbrot(width: cython.int, height: cython.int, maxiter: cython.int, palette: cython.p_uchar) -> cython.int:
zx: cython.double
zy: cython.double
zx2: cython.double
zy2: cython.double
cx: cython.double
cy: cython.double
r: cython.uchar
g: cython.uchar
b: cython.uchar
i: cython.int
index: cython.int

printf("P3\n%d %d\n255\n", width, height)

cy = -1.5
for y in range(0, height):
cx = -2.0
for x in range(0, width):
zx = 0.0
zy = 0.0
i = 0
while i < maxiter:
zx2 = zx * zx
zy2 = zy * zy
if zx2 + zy2 > 4.0:
break
zy = 2.0 * zx * zy + cy
zx = zx2 - zy2 + cx
i += 1

index = i * 3
r = palette[index]
g = palette[index+1]
b = palette[index+2]
printf("%d %d %d\n", r, g, b)
cx += 3.0/width
cy += 3.0/height
```

---

```C
static int __pyx_f_17mandelbrot_cython_calc_mandelbrot(int __pyx_v_width, int __pyx_v_height, int __pyx_v_maxiter, unsigned char *__pyx_v_palette) {
double __pyx_v_zx;
double __pyx_v_zy;
double __pyx_v_zx2;
double __pyx_v_zy2;
double __pyx_v_cx;
double __pyx_v_cy;
unsigned char __pyx_v_r;
unsigned char __pyx_v_g;
unsigned char __pyx_v_b;
int __pyx_v_i;
int __pyx_v_index;
CYTHON_UNUSED long __pyx_v_y;
CYTHON_UNUSED long __pyx_v_x;
int __pyx_r;
int __pyx_t_1;
int __pyx_t_2;
long __pyx_t_3;
int __pyx_t_4;
int __pyx_t_5;
long __pyx_t_6;
int __pyx_t_7;

(void)(printf(((char const *)"P3\n%d %d\n255\n"), __pyx_v_width, __pyx_v_height));

__pyx_v_cy = -1.5;
__pyx_t_1 = __pyx_v_height;
__pyx_t_2 = __pyx_t_1;
for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
__pyx_v_y = __pyx_t_3;
__pyx_v_cx = -2.0;
__pyx_t_4 = __pyx_v_width;
__pyx_t_5 = __pyx_t_4;
for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
__pyx_v_x = __pyx_t_6;
__pyx_v_zx = 0.0;
__pyx_v_zy = 0.0;
__pyx_v_i = 0;
while (1) {
__pyx_t_7 = (__pyx_v_i < __pyx_v_maxiter);
if (!__pyx_t_7) break;
__pyx_v_zx2 = (__pyx_v_zx * __pyx_v_zx);
__pyx_v_zy2 = (__pyx_v_zy * __pyx_v_zy);
__pyx_t_7 = ((__pyx_v_zx2 + __pyx_v_zy2) > 4.0);
if (__pyx_t_7) {
goto __pyx_L8_break;
}
__pyx_v_zy = (((2.0 * __pyx_v_zx) * __pyx_v_zy) + __pyx_v_cy);
__pyx_v_zx = ((__pyx_v_zx2 - __pyx_v_zy2) + __pyx_v_cx);
__pyx_v_i = (__pyx_v_i + 1);
}
__pyx_L8_break:;
__pyx_v_index = (__pyx_v_i * 3);
__pyx_v_r = (__pyx_v_palette[__pyx_v_index]);
__pyx_v_g = (__pyx_v_palette[(__pyx_v_index + 1)]);
__pyx_v_b = (__pyx_v_palette[(__pyx_v_index + 2)]);
(void)(printf(((char const *)"%d %d %d\n"), __pyx_v_r, __pyx_v_g, __pyx_v_b));
__pyx_v_cx = (__pyx_v_cx + (3.0 / ((double)__pyx_v_width)));
}
__pyx_v_cy = (__pyx_v_cy + (3.0 / ((double)__pyx_v_height)));
}
__pyx_r = 0;
return __pyx_r;
}
```
---
## Benchmarks (1/2)
* native: ANSI C variant (no Python)
Expand Down

0 comments on commit 3c146d9

Please sign in to comment.