incomplete implementation of qr decomposition #371

v923z · 2021-04-23T18:33:36Z

This PR partially implements the request in #363 by adding QR decomposition to ulab.

v923z · 2021-04-23T18:48:51Z

@JamesTev This doesn't work correctly. I am pretty sure it is a trivial sign error somewhere, but I don't find it at the moment. Do you think you could take a look at the code?

micropython-ulab/code/numpy/linalg/linalg.c

Lines 378 to 464 in cba8873

    
           static mp_obj_t linalg_qr(mp_obj_t M) { 
        
               if(!MP_OBJ_IS_TYPE(M, &ulab_ndarray_type)) { 
        
                   mp_raise_TypeError(translate("operation is defined for ndarrays only")); 
        
               } 
        
               ndarray_obj_t *source = MP_OBJ_TO_PTR(M); 
        
               if(source->ndim != 2) { 
        
                   mp_raise_ValueError(translate("operation is defined for 2D arrays only")); 
        
               } 
        
               size_t m = source->shape[ULAB_MAX_DIMS - 2]; // rows 
        
               size_t n = source->shape[ULAB_MAX_DIMS - 1]; // columns 
        
               ndarray_obj_t *Q = ndarray_new_dense_ndarray(2, source->shape, NDARRAY_FLOAT); 
        
               ndarray_obj_t *R = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, n, n), NDARRAY_FLOAT); 
        
               mp_float_t *qarray = (mp_float_t *)Q->array; 
        
               mp_float_t *rarray = (mp_float_t *)R->array; 
        
               // simply copy the entries of source to a float array 
        
               mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype); 
        
               uint8_t *sarray = (uint8_t *)source->array; 
        
               for(size_t i = 0; i < m; i++) { 
        
                   for(size_t j = 0; j < n; j++) { 
        
                       *rarray++ = func(sarray); 
        
                       sarray += source->strides[ULAB_MAX_DIMS - 1]; 
        
                   } 
        
                   sarray -= n * source->strides[ULAB_MAX_DIMS - 1]; 
        
                   sarray += source->strides[ULAB_MAX_DIMS - 2]; 
        
               } 
        
               rarray -= m * n; 
        
               // start with the unit matrix 
        
               for(size_t i = 0; i < m; i++) { 
        
                   qarray[i * (n + 1)] = 1.0; 
        
               } 
        
               for(size_t j = 0; j < n; j++) { 
        
                   for(size_t i = m - 1; i > j; i--) { 
        
                       mp_float_t c, s; 
        
                       // Givens matrix: 
        
                       // ( (c  s), 
        
                       //   (-s c) ) 
        
                       if(rarray[i * n + j] == 0.0) { 
        
                           c = (rarray[(i - 1) * n + j] >= 0.0) ? 1.0 : -1.0; 
        
                           s = 0.0; 
        
                       } else if(rarray[(i - 1) * n + j] == 0.0) { 
        
                           c = 0.0; 
        
                           s = (rarray[i * n + j] >= 0.0) ? -1.0 : 1.0; 
        
                       } else { 
        
                           mp_float_t t, u; 
        
                           if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) > MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j])) { 
        
                               t = rarray[i * n + j] / rarray[(i - 1) * n + j]; 
        
                               u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); 
        
                               c = 1.0 / u; 
        
                               s = c * t; 
        
                           } else { 
        
                               t = rarray[(i - 1) * n + j] / rarray[i * n + j]; 
        
                               u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); 
        
                               s = 1.0 / u; 
        
                               c = s * t; 
        
                           } 
        
                       } 
        
                       mp_float_t r1 = 0.0, r2 = 0.0; 
        
                       // update R: multiply with the rotation matrix from the left 
        
                       for(size_t k = 0; k < n; k++) { 
        
                           r1 = rarray[(i - 1) * n + k]; 
        
                           r2 = rarray[i * n + k]; 
        
                           rarray[(i - 1) * n + k] = c * r1 + s * r2; 
        
                           rarray[i * n + k] = - s * r1 + c * r2; 
        
                       } 
        
                       // update Q: multiply with the transpose of the rotation matrix from the right 
        
                       for(size_t k = 0; k < n; k++) { 
        
                           r1 = qarray[k * n + (i - 1)]; 
        
                           r2 = qarray[k * n + i]; 
        
                           qarray[k * n + (i - 1)] = c * r1 - s * r2; 
        
                           qarray[k * n + i] = s * r1 + c * r2; 
        
                       } 
        
                   } 
        
               } 
        
               mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(2, NULL)); 
        
               tuple->items[0] = MP_OBJ_FROM_PTR(Q); 
        
               tuple->items[1] = MP_OBJ_FROM_PTR(R); 
        
               return tuple; 
        
           }

from ulab import numpy as np

a = np.array([[1, 2], [3, 4]])
q, r = np.linalg.qr(a)
print(q)
print(r)

results in

array([[0.3162277660168379, 0.9486832980505138],
       [-0.9486832980505138, 0.3162277660168379]], dtype=float64)
array([[3.162277660168379, 4.427188724235731],
       [-1.110223024625157e-16, -0.632455532033676]], dtype=float64)

while numpy gives

(array([[-0.31622777, -0.9486833 ],
        [-0.9486833 ,  0.31622777]]),
 array([[-3.16227766, -4.42718872],
        [ 0.        , -0.63245553]]))

fix small glitch in optimize

Co-authored-by: Dan Halbert <[email protected]>

add circuitpython compilation section to README

Lowercase MP macros

update ESP32 compile instructions in readme

…th two tests

…lg module

Adding solve_triangular function in numpy.linalg module

change to lowercase macro style, so that `circuitpython` can automatically build

add docs for solve_triangular

update docs

JamesTev · 2021-05-10T10:06:53Z

@v923z Sorry for the late reply - just been swamped with uni work and exams. I will be sure to check this out soon once my exams are done! Thanks a lot.

JamesTev · 2021-06-16T12:08:50Z

EDIT: actually, I think I made a mistake, when testing. I get identical results in numpy, and ulab. I copied the test case incorrectly. numpy result

@v923z Has this issue been fixed or are you still working on the weird sign issue? I pulled this branch and flashed the new firmware and am getting some weird sign differences compared to normal numpy.

I tested with np.linalg.qr(A) with A = np.arange(9).reshape((3,3)). Numpy gives:

(array([[ 0.        ,  0.91287093,  0.40824829],
        [-0.4472136 ,  0.36514837, -0.81649658],
        [-0.89442719, -0.18257419,  0.40824829]]),
 array([[-6.70820393e+00, -8.04984472e+00, -9.39148551e+00],
        [ 0.00000000e+00,  1.09544512e+00,  2.19089023e+00],
        [ 0.00000000e+00,  0.00000000e+00, -8.88178420e-16]]))

uLab gives:

(array([[0.0, -0.4472136, -0.8944272],
       [-0.9128709, -0.3651484, 0.1825742],
       [0.4082483, -0.8164966, 0.4082483]], dtype=float32), array([[-6.708203, -8.049845, -9.391485],
       [0.0, -1.095445, -2.19089], # sign issues in this row
       [0.0, 0.0, 0.0]], dtype=float32))

So, there are sign issues in both Q and R matrices and Q matrix seems to be transposed in the uLab version. Did you find similar?

v923z · 2021-06-16T17:09:11Z

@JamesTev

So, there are sign issues in both Q and R matrices and Q matrix seems to be transposed in the uLab version. Did you find similar?

Thanks for taking the time! I'll look into this.

v923z · 2021-06-16T18:35:58Z

@JamesTev
Apart from the transpose, I think the code is OK. The QR decomposition is unique only, if you demand that the diagonal entries of R be positive.

>>> from ulab import numpy as np

>>> A = np.arange(9).reshape((3,3))
>>> q, r = np.linalg.qr(A)
>>> print(q)
>>> print()
>>> print(r)
>>> print(np.dot(q.transpose(), r))

array([[0.0, -0.447213595499958, -0.8944271909999159],
       [-0.9128709291752768, -0.3651483716701107, 0.1825741858350553],
       [0.408248290463863, -0.8164965809277261, 0.408248290463863]], dtype=float64)

array([[-6.708203932499369, -8.049844718999243, -9.391485505499116],
       [0.0, -1.095445115010332, -2.190890230020665],
       [0.0, 0.0, -4.440892098500626e-16]], dtype=float64)

array([[0.0, 1.0, 2.0],
       [3.0, 4.0, 4.999999999999999],
       [6.0, 6.999999999999999, 7.999999999999998]], dtype=float64)

I pulled this branch and flashed the new firmware and am getting some weird sign differences compared to normal numpy.

By the way, you can test things in the unix port. It's easier than having to flash the firmware.

v923z · 2021-06-17T19:09:26Z

@JamesTev I have fixed the issue with the transpose.

JamesTev · 2021-06-17T19:13:01Z

@v923z great, thank you!

v923z · 2021-06-17T19:15:22Z

@JamesTev If you think it is OK, I would merge it.

v923z · 2021-06-17T19:16:17Z

@JamesTev Do you think you could write a small test script?

JamesTev · 2021-06-17T19:18:19Z

I verified that it works with the eigen solver algorithm that I’m using which is a good start. But yes, I can try think of some edge cases and get back to you.

v923z · 2021-06-18T05:26:04Z

@JamesTev I have clean forgotten, I still have to implement the reduced mode.

JamesTev · 2021-06-18T15:43:43Z

I noticed something a bit strange: as part of another calculation, I computed a matrix A as below:

A = np.array([[-3.090257e+12, -7.008567e+05,  4.739371e+05,  2.532784e+05],
       [-2.618467e+04, -3.090258e+12,  4.707228e+05,  2.515600e+05],
       [-2.622055e+04, -6.970566e+05, -3.090256e+12,  2.519048e+05],
       [-2.613640e+04, -6.948194e+05,  4.698551e+05, -3.090257e+12]])

The numpy and ulab QR functions gave different results. In particular, the ulab version gives an incorrect R matrix (not upper triangular). Do you think this could be due to some small numerical error that is very noticeable in this case because of the magnitude of the entries in A? I tried computing again with (A/1000) and the ulab-computed R matrix was much closer to the expected upper triangular form. This issue arises with different matrices (different A) too; it seems increasingly so as the magnitude of entries in A increases.

Numpy result for np.linalg.qr(A):

# formatted to list to visualise more easily
Q = [array([-1.00000000e+00,  8.47330231e-09,  8.48490599e-09, -8.45767782e-09]),
 array([-8.47329850e-09, -1.00000000e+00,  2.25565790e-07, -2.24841906e-07]),
 array([-8.48490918e-09, -2.25565825e-07, -1.00000000e+00,  1.52044035e-07]),
 array([-8.45767844e-09, -2.24841872e-07,  1.52044085e-07,  1.00000000e+00])]

R = [array([ 3.09025700e+12,  7.27041390e+05, -4.47716566e+05, -2.27142004e+05]),
 array([0.00000000e+00, 3.09025800e+12, 2.26333241e+05, 4.43259114e+05]),
 array([ 0.0000000e+00,  0.0000000e+00,  3.0902560e+12, -7.2176004e+05]),
 array([ 0.000000e+00,  0.000000e+00,  0.000000e+00, -3.090257e+12])]

ulab.np.linalg.qr(A) result:

Q = array([[-1.0, 8.473302e-09, -8.484906e-09, -8.45768e-09],
       [-8.4733e-09, -1.0, -2.110726e-07, -2.103953e-07],
       [-8.484911e-09, -2.086163e-07, 1.0, 2.086163e-07],
       [-8.457682e-09, -2.384186e-07, -1.490116e-07, 1.0]], dtype=float32)

R = array([[3.090257e+12, 727041.3, -447716.6, -227142.0],
       [0.001594594, 3.090258e+12, 131072.0, 393216.0],
       [-0.000580081, -0.0, -3.090256e+12, 524288.0],
       [0.002179488, 0.0, -131072.0, -3.090257e+12]], dtype=float32) # notice non-zero terms below main diagonal

v923z · 2021-06-18T20:48:31Z

@JamesTev

This seems to me to be a round-off error. With 64 bits, I get the following:

>>> from ulab import numpy as np

>>> A = np.array([[-3.090257e+12, -7.008567e+05,  4.739371e+05,  2.532784e+05],
       [-2.618467e+04, -3.090258e+12,  4.707228e+05,  2.515600e+05],
       [-2.622055e+04, -6.970566e+05, -3.090256e+12,  2.519048e+05],
       [-2.613640e+04, -6.948194e+05,  4.698551e+05, -3.090257e+12]])

>>> q, r = np.linalg.qr(A)
>>> print(q)
>>> print()
>>> print(r)

array([[-1.0, 8.473302314666956e-09, -8.484905986702145e-09, -8.457677823969375e-09],
       [-8.473298499121594e-09, -1.0, -2.255657904819399e-07, -2.248419064713072e-07],
       [-8.484909183928712e-09, -2.255658246297365e-07, 1.0, 1.520440346958196e-07],
       [-8.457678439042448e-09, -2.248418721206846e-07, -1.520440853219895e-07, 1.0]], dtype=float64)

array([[3090257000000.001, 727041.3902643195, -447716.5664473673, -227142.0042689324],
       [-1.04984705117826e-25, 3090258000000.152, 226333.2414550781, 443259.1143798828],
       [2.568302040251067e-12, -0.0, -3090256000000.068, 721760.0402832033],
       [-2.576570284484911e-12, 0.0, -0.000244140625, -3090256999999.907]], dtype=float64)

I don't know what the proper solution would be. I believe, the culprit is this:

micropython-ulab/code/numpy/linalg/linalg.c

Lines 429 to 438 in a84db8f

    
           if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) > MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j])) { // r[i-1, j], r[i, j] 
        
               t = rarray[i * n + j] / rarray[(i - 1) * n + j]; // r[i, j]/r[i-1, j] 
        
               u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); 
        
               c = -1.0 / u; 
        
               s = c * t; 
        
           } else { 
        
               t = rarray[(i - 1) * n + j] / rarray[i * n + j]; // r[i-1, j]/r[i, j] 
        
               u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); 
        
               s = -1.0 / u; 
        
               c = s * t;

You have values that differ by 8 orders of magnitude (e.g., in the first column, 3.090257e+12, and -2.618467e+04), and the Givens rotations actually compare these values, and I think that trips the condition in the second line of the snippet above. I guess, what happens is that compared to A[0,0], A[1,0] is zero, so for these values no actual rotation takes place. In other words, you just don't have enough depth to compare such wildly different values. https://en.wikipedia.org/wiki/Single-precision_floating-point_format

I think, a fair comparison with numpy would be, if you could force numpy to carry out the computation with 32-bit floats, and see what that results in.

v923z · 2021-06-19T06:47:17Z

@JamesTev Ha! This tripped me. I think you copied the wrong output. numpy's results are different with your input matrix.

Output:

QR mode=complete:

Q: [[ 0.00000000e+00  1.00000000e+00 -5.55111512e-17]
 [-4.47213595e-01 -1.66533454e-16 -8.94427191e-01]
 [-8.94427191e-01  0.00000000e+00  4.47213595e-01]], 
R: [[-4.47213595 -6.70820393]
 [ 0.          1.        ]
 [ 0.          0.        ]]

I have also implemented the mode keyword argument. Could you, please, check that it works as it should?

As for the sign errors that you found earlier, I think that is not a fundamental issue, and it shouldn't change too much. But if you want to, we could do a sweep before we return the matrices, and multiply the columns, if the diagonal entry is negative. I believe that should fix it.

Next up is linalg.svd;)

JamesTev · 2021-06-23T13:58:36Z

@JamesTev Ha! This tripped me. I think you copied the wrong output. numpy's results are different with your input matrix.

@v923z Sorry, I don't quite follow? I've repeated the experiment and checked the input matrix to both versions is the same and I get the same result (with both reduced and complete as expected).

JamesTev · 2021-06-23T13:59:16Z

I have also implemented the mode keyword argument. Could you, please, check that it works as it should?

Great! Thanks a lot - I will have a look

v923z · 2021-06-23T19:48:55Z

@JamesTev Ha! This tripped me. I think you copied the wrong output. numpy's results are different with your input matrix.

@v923z Sorry, I don't quite follow? I've repeated the experiment and checked the input matrix to both versions is the same and I get the same result (with both reduced and complete as expected).

@JamesTev
You are right. I am pretty sure I messed up something. Sorry for the noise!

JamesTev · 2021-06-29T09:43:18Z

@JamesTev
You are right. I am pretty sure I messed up something. Sorry for the noise!

No problem! Any ideas on what the issue may be? Do you think it is indeed an issue with single precision?

v923z · 2021-06-29T12:13:46Z

Do you think it is indeed an issue with single precision?

Yes, I am pretty confident. As far as I remember, on some platforms you can switch on double precision, even if the MCU doesn't have a proper FPU. Double precision will then be implemented in firmware. I don't know, whether your MCU falls in this category, and one also has to pay a speed penalty.

JamesTev · 2021-06-30T09:17:50Z

I see. Forgive my ignorance but I'm using an ESP32 and believe it has a FPU. Do I need to set a flag somewhere in the ulab firmware and rebuild to enable double precision?

v923z · 2021-06-30T09:45:26Z

@JamesTev

I'm using an ESP32 and believe it has a FPU. Do I need to set a flag somewhere in the ulab firmware and rebuild to enable double precision?

No, that actually happens automatically here:

micropython-ulab/code/ndarray.h

Lines 30 to 34 in 764d5b9

    
           #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT 
        
           #define FLOAT_TYPECODE 'f' 
        
           #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE 
        
           #define FLOAT_TYPECODE 'd' 
        
           #endif

So, you need to set the appropriate flag (MICROPY_FLOAT_IMPL) in micropython itself. I have virtually zero experience with the ESP32, so I am not too much of a help here.

v923z · 2021-06-30T09:47:50Z

@JamesTev James, I think this is what you need: micropython/micropython#4380 (comment)

JamesTev · 2021-06-30T10:02:35Z

@JamesTev James, I think this is what you need: micropython/micropython#4380 (comment)

Yeah, found it. Thanks! I've just tested again with double precision enabled and QR is working as expected 👍

v923z · 2021-06-30T10:36:54Z

Yeah, found it. Thanks! I've just tested again with double precision enabled and QR is working as expected 👍

Thanks! If you can supply a small test script, I would like to merge this.

JamesTev · 2021-07-05T09:28:23Z

Thanks! If you can supply a small test script, I would like to merge this.

Sure. Would a basic (micro)python script be okay?

v923z · 2021-07-05T10:22:16Z

Thanks! If you can supply a small test script, I would like to merge this.

Sure. Would a basic (micro)python script be okay?

Yes. We can always extend it later, if needed. I just want something that, first, demonstrates the usage, second, provides a rudimentary safeguard against introducing error, if we change the code.

v923z · 2021-07-22T18:38:43Z

Obsolete, fixed in #427.

tannewt and others added 3 commits April 22, 2021 17:54

Lowercase MP macros

e3bf07c

incomplete implementation of qr decomposition

9a3c32a

revert to upper case macro, so that the circuitpython test passes

cba8873

v923z and others added 22 commits April 26, 2021 21:52

fix small glitch in optimize

f375359

Merge pull request #374 from v923z/optimize-fix

64d53aa

fix small glitch in optimize

add circuitpython compilation section to README

bd3b030

Update README.md

c79a993

Co-authored-by: Dan Halbert <[email protected]>

Update README.md

0924daa

Co-authored-by: Dan Halbert <[email protected]>

Merge pull request #375 from v923z/readme

ecae9e8

add circuitpython compilation section to README

Merge pull request #370 from adafruit/lowercase_macros

3b2d7dd

Lowercase MP macros

add ESP32 cmake instructions to readme

e01f805

add build script for ESP32 cmake

16af741

refer to build script in readme

9464cb7

Merge pull request #376 from v923z/readme

6e0b9bb

update ESP32 compile instructions in readme

implemented solve_triangular function in numpy.linalg module along wi…

ab60c5c

…th two tests

implemented solve_triangular and syncing with original repo

3e63c8e

corrections to solve_triangular, moved solve_triangular to scipy.lina…

f9b0e43

…lg module

corrected date in ulab-change-log

ba6409a

removed redundant function pointers, added copyright note

e52fa96

Merge pull request #377 from RoboticExplorationLab/master

a61cd88

Adding solve_triangular function in numpy.linalg module

fix circuitpython CI

1c02726

change to lowercase macro style, so that `circuitpython` can automatically build

add docs for solve_triangular

df2c994

Merge pull request #380 from v923z/solve-docs

3c39995

add docs for solve_triangular

update docs

43d7de1

Merge pull request #381 from v923z/doc-fix

5f1d4f5

update docs

jepler and others added 3 commits May 10, 2021 09:03

build-cp.sh: Drop unneeded workaround.

ebdff02

qr works now with square matrices

24f0001

requirements_cp_dev: Update with requirements from circuitpython

1d05843

fix transposition glitch in linalg.qr

a84db8f

implement mode keyword argument

47a0b3c

v923z mentioned this pull request Jul 22, 2021

add qr implementation #427

Merged

v923z closed this Jul 22, 2021

v923z deleted the qr branch July 22, 2021 18:38

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

incomplete implementation of qr decomposition #371

incomplete implementation of qr decomposition #371

v923z commented Apr 23, 2021 •

edited

Loading

v923z commented Apr 23, 2021

JamesTev commented May 10, 2021

JamesTev commented Jun 16, 2021

v923z commented Jun 16, 2021

v923z commented Jun 16, 2021

v923z commented Jun 17, 2021

JamesTev commented Jun 17, 2021

v923z commented Jun 17, 2021

v923z commented Jun 17, 2021

JamesTev commented Jun 17, 2021

v923z commented Jun 18, 2021

JamesTev commented Jun 18, 2021

v923z commented Jun 18, 2021 •

edited

Loading

v923z commented Jun 19, 2021

JamesTev commented Jun 23, 2021

JamesTev commented Jun 23, 2021

v923z commented Jun 23, 2021

JamesTev commented Jun 29, 2021

v923z commented Jun 29, 2021

JamesTev commented Jun 30, 2021

v923z commented Jun 30, 2021

v923z commented Jun 30, 2021

JamesTev commented Jun 30, 2021

v923z commented Jun 30, 2021

JamesTev commented Jul 5, 2021

v923z commented Jul 5, 2021

v923z commented Jul 22, 2021

incomplete implementation of qr decomposition #371

incomplete implementation of qr decomposition #371

Conversation

v923z commented Apr 23, 2021 • edited Loading

v923z commented Apr 23, 2021

JamesTev commented May 10, 2021

JamesTev commented Jun 16, 2021

v923z commented Jun 16, 2021

v923z commented Jun 16, 2021

v923z commented Jun 17, 2021

JamesTev commented Jun 17, 2021

v923z commented Jun 17, 2021

v923z commented Jun 17, 2021

JamesTev commented Jun 17, 2021

v923z commented Jun 18, 2021

JamesTev commented Jun 18, 2021

v923z commented Jun 18, 2021 • edited Loading

v923z commented Jun 19, 2021

JamesTev commented Jun 23, 2021

JamesTev commented Jun 23, 2021

v923z commented Jun 23, 2021

JamesTev commented Jun 29, 2021

v923z commented Jun 29, 2021

JamesTev commented Jun 30, 2021

v923z commented Jun 30, 2021

v923z commented Jun 30, 2021

JamesTev commented Jun 30, 2021

v923z commented Jun 30, 2021

JamesTev commented Jul 5, 2021

v923z commented Jul 5, 2021

v923z commented Jul 22, 2021

v923z commented Apr 23, 2021 •

edited

Loading

v923z commented Jun 18, 2021 •

edited

Loading