From 8b36253bbef759105ef563d432f15966323a33c9 Mon Sep 17 00:00:00 2001 From: Markus Paaso Date: Wed, 7 Sep 2016 21:35:30 +0300 Subject: [PATCH 1/2] Optimized computing of non-edge forces by removing the need to calc sqrt(D) by squaring and inversing theta --- sptree.cpp | 6 +++--- sptree.h | 2 +- tsne.cpp | 11 ++++++----- tsne.h | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sptree.cpp b/sptree.cpp index 10aa4ef8..12e6ae23 100644 --- a/sptree.cpp +++ b/sptree.cpp @@ -338,7 +338,7 @@ unsigned int SPTree::getDepth() { // Compute non-edge forces using Barnes-Hut algorithm -void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) +void SPTree::computeNonEdgeForces(unsigned int point_index, double squared_inv_theta, double neg_f[], double* sum_Q) { // Make sure that we spend no time on empty nodes or self-interactions @@ -357,7 +357,7 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double cur_width = boundary->getWidth(d); max_width = (max_width > cur_width) ? max_width : cur_width; } - if(is_leaf || max_width / sqrt(D) < theta) { + if(is_leaf || D > max_width * max_width * squared_inv_theta) { // Compute and add t-SNE force between point and current node D = 1.0 / (1.0 + D); @@ -369,7 +369,7 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double else { // Recursively apply Barnes-Hut to children - for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); + for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, squared_inv_theta, neg_f, sum_Q); } } diff --git a/sptree.h b/sptree.h index b138a9a1..10e4a27e 100644 --- a/sptree.h +++ b/sptree.h @@ -101,7 +101,7 @@ class SPTree void rebuildTree(); void getAllIndices(unsigned int* indices); unsigned int getDepth(); - void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q); + void computeNonEdgeForces(unsigned int point_index, double squared_inv_theta, double neg_f[], double* sum_Q); void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f); void print(); diff --git a/tsne.cpp b/tsne.cpp index 3ead7712..95b15762 100644 --- a/tsne.cpp +++ b/tsne.cpp @@ -143,12 +143,13 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit if(exact) printf("Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); else printf("Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); start = clock(); + double squared_inv_theta = 1.0 / (theta * theta); for(int iter = 0; iter < max_iter; iter++) { // Compute (approximate) gradient if(exact) computeExactGradient(P, Y, N, no_dims, dY); - else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta); + else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, squared_inv_theta); // Update gains for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); @@ -200,7 +201,7 @@ void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexit // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) -void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta) +void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double squared_inv_theta) { // Construct space-partitioning tree on current map @@ -212,7 +213,7 @@ void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp double* neg_f = (double*) calloc(N * D, sizeof(double)); if(pos_f == NULL || neg_f == NULL) { printf("Memory allocation failed!\n"); exit(1); } tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); - for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q); + for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, squared_inv_theta, neg_f + n * D, &sum_Q); // Compute final t-SNE gradient for(int i = 0; i < N * D; i++) { @@ -310,14 +311,14 @@ double TSNE::evaluateError(double* P, double* Y, int N, int D) { } // Evaluate t-SNE cost function (approximately) -double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta) +double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double squared_inv_theta) { // Get estimate of normalization term SPTree* tree = new SPTree(D, Y, N); double* buff = (double*) calloc(D, sizeof(double)); double sum_Q = .0; - for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q); + for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, squared_inv_theta, buff, &sum_Q); // Loop over all edges to compute t-SNE error int ind1, ind2; diff --git a/tsne.h b/tsne.h index 26008559..80f37331 100644 --- a/tsne.h +++ b/tsne.h @@ -49,10 +49,10 @@ class TSNE private: - void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta); + void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double squared_inv_theta); void computeExactGradient(double* P, double* Y, int N, int D, double* dC); double evaluateError(double* P, double* Y, int N, int D); - double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta); + double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double squared_inv_theta); void zeroMean(double* X, int N, int D); void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K); From e599275e082b2d70ef7f49b52714fe4b46097869 Mon Sep 17 00:00:00 2001 From: Markus Paaso Date: Mon, 5 Sep 2016 17:33:06 +0300 Subject: [PATCH 2/2] Slightly improved SPTree constructor performance --- sptree.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sptree.cpp b/sptree.cpp index 12e6ae23..9507a569 100644 --- a/sptree.cpp +++ b/sptree.cpp @@ -92,30 +92,30 @@ SPTree::SPTree(unsigned int D, double* inp_data, unsigned int N) { // Compute mean, width, and height of current map (boundaries of SPTree) - int nD = 0; double* mean_Y = (double*) calloc(D, sizeof(double)); - double* min_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) min_Y[d] = DBL_MAX; - double* max_Y = (double*) malloc(D * sizeof(double)); for(unsigned int d = 0; d < D; d++) max_Y[d] = -DBL_MAX; - for(unsigned int n = 0; n < N; n++) { - for(unsigned int d = 0; d < D; d++) { - mean_Y[d] += inp_data[n * D + d]; - if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d]; - if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d]; + double* width = (double*) malloc(D * sizeof(double)); + double min_Y, max_Y; + int nD; + for(unsigned int d = 0; d < D; d++) { + nD = 0; + min_Y = DBL_MAX; + max_Y = -DBL_MAX; + for(unsigned int n = 0; n < N; n++) { + mean_Y[d] += inp_data[nD + d]; + if(inp_data[nD + d] < min_Y) min_Y = inp_data[nD + d]; + if(inp_data[nD + d] > max_Y) max_Y = inp_data[nD + d]; + nD += D; } - nD += D; + mean_Y[d] /= (double) N; + width[d] = fmax(max_Y - mean_Y[d], mean_Y[d] - min_Y) + 1e-5; } - for(int d = 0; d < D; d++) mean_Y[d] /= (double) N; // Construct SPTree - double* width = (double*) malloc(D * sizeof(double)); - for(int d = 0; d < D; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; init(NULL, D, inp_data, mean_Y, width); fill(N); // Clean up memory free(mean_Y); - free(max_Y); - free(min_Y); free(width); }