From 86098d474e101b9ee074b7e364f88f0a7e17c1f0 Mon Sep 17 00:00:00 2001 From: Lance Hepler Date: Tue, 26 Feb 2019 16:44:37 -0800 Subject: [PATCH 01/17] Add algorithm stability test, turn off -ffast-math Turning off -ffast-math is because otherwise any little compile flags change will change the output. Also remove unwanted library_dirs and include_dirs. --- .gitignore | 2 +- Makefile | 3 ++- setup.py | 10 ++++------ tsne/tests/test_stability.npy | Bin 0 -> 2528 bytes tsne/tests/test_stability.py | 16 ++++++++++++++++ 5 files changed, 23 insertions(+), 8 deletions(-) create mode 100644 tsne/tests/test_stability.npy create mode 100644 tsne/tests/test_stability.py diff --git a/.gitignore b/.gitignore index 3da6175..43bc1b8 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ nosetests.xml # Project specific tsne/bh_sne.cpp -tsne/bh_sne_3d.cpp tsne/bh_sne_src/bh_tsne *.pkl.gz .cache +.pytest_cache/ diff --git a/Makefile b/Makefile index be317d5..96a7c82 100644 --- a/Makefile +++ b/Makefile @@ -10,4 +10,5 @@ sdist: clean: rm -rf *.pyc *.so build/ bh_sne.cpp - rm -rf tsne/*.pyc tsne/*.so tsne/build/ tsne/bh_sne.cpp + rm -rf tsne/*.pyc tsne/*.so tsne/*.o tsne/build/ tsne/bh_sne.cpp + rm -rf .pytest_cache/ diff --git a/setup.py b/setup.py index 38cd82b..c38a833 100644 --- a/setup.py +++ b/setup.py @@ -44,17 +44,15 @@ ext_modules = [Extension(name='bh_sne', sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], - include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'], - library_dirs=['/usr/local/lib', '/usr/lib64/atlas'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-ffast-math'], + include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], + extra_compile_args=['-msse3', '-O3', '-fPIC', '-w'], extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s'], language='c++'), Extension(name='bh_sne_3d', sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'], - include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'], - library_dirs=['/usr/local/lib', '/usr/lib64/atlas'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-ffast-math', '-DTSNE3D'], + include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], + extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-DTSNE3D'], extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s'], language='c++')] diff --git a/tsne/tests/test_stability.npy b/tsne/tests/test_stability.npy new file mode 100644 index 0000000000000000000000000000000000000000..25cd4decd3a927ac6bf69d99821b25329e803efc GIT binary patch literal 2528 zcmbW2`8U*y8^;-2rUheW%#1O%`OGjg#!SA}+H&m$H z6x~EfqJ^k=M7wMiipX-aM^p;s>;4Ph=eO7Uyx-?M=Q+nA4}sm7~^`EZymQ( zB=FF0eP~!d56_LpW!Y7m!;Z3u#;9k;ScAKi19MGaQwODBS*sC_dgW>x1e<~DO-IEh zo-tNy%=?`ASO_J?#Z?XiMmV4*UA%lm0JoM@dE|TWQQ>YlrF)$jc=}F#&X~Pl!i6S> zBr|gu|7BdT+lRp2Uwms1MTz0k+n0$XQyy}wt!#}f1d!_7G68qFsH=6Q{j#?J+5==7f8n*r z&;R(B1ib#pOW?y?@v%lYS7CsdGJEt_Vj*Iur4u6`m62rS}&)>)$MkwjFvXQKlK=6%^ z%&!?n$azw+Ei}*+&MP>~?P!?YXZyY5zK#-bA6L_v$Tr5x5tlS8k}2%0f4Ah8Hi3nk ztm7PK^JJrvy#;+-Y`B#1`9_=w{1^?by(AvKib#;Oll8DoaDsGH#)e_nWb1fG20DdF z*cJK=xY}&E#W9|P4I4M~GJSQy?)~}o3(kC#<4LnGDU0b~`n{nr9hqoxB6ZX1Q3gb8 zP5Uv?Hy3T=z1~Q=w4vIryw6{mi7iD(ax*+=aESJj`z`5-Jdg5(eN}-0ejA?lm6b5D zrakZLR3r-^^4WCf`3ZRu@3>F;J}MkrHTL8F#YVYLoaIV|4LYE?d9Z%)$O8P`TeG-; zLx(k`ZN^8=k&v2LQ{+ftLzunu;Swu4nz^TxFB+l3%B!{}iXM~lm3xo*PL?sDVOrua zKY@uX#ceSK2X!ILC&ulK(|okmmn_xsr+{`U@8wX7GWw3l7sM1WK&k0sao=7F4s1^C z(ci8IgX5tkTCG#^rbY75CF%@lK3;ZvM3sqOp8n2diUvX(Y&V!{y1 zKN#s842<02SGI%-yKBM@{W(p=SIvg&&#Ka4P?*wlG@6FbRenw`YiB~;2bKK7BnJN3 z>*D_4ISV4HP9+k9Y~0{h8&+7sf-InaPnGqvnu z*R;Ip%)!jd0xb(iqyR@4$5U71{lQdEgSJ?=*+c3b=mc%9Am&)h$c@-%gU;ws1l0uXURyuPQ zT#ur|0J%Szqi2GiG33W=Hzstq@^@_1=itV6+f=U%btta3EkE03ihOEFL%EO&J`XPU zo;l0K#YrDwcZqf$sg+#_=st84+r-#bpXSq1h`e(QzX09Z! zwx?>q8YA){-J3$B7wZhLUr>RQrt!3CP1TILrGO_MdA7wo8qRaa8DTqiZ`3!X#yF~G8I&^bl|eBm%Ofti-EF=;WJ*G&LGReeg2r0{%vi3mqQ8_1qUXv#71P;xsi~1#?n;4Et!o7{X}Wm4 z#Gy+%tO@q;!ojzcKwqP``wv-A!1+x-Cm~OOx$@ojPFB<421UIoKVFEDuTP{r@1ugW zIH5Uxp9t?MZwwiJsR@(hq*Ixb1p0&wMoD5RKr)rq90}3K0{6qcTOVt~3%50Hp8Jik zWFj^vA!I&8$5EbK>lLEw>;IZpSZYJR!qGb?)%e&LY4>cAD-+HLTQz7!T;vzCTdSrh zFkZZM+)TzoqdRxohYxB(hVbs+=CeI7lctw>C(t0E;Qf!uI6g)f?i-K(PJ>L3Vf}mI z1d?_gxN`Tt4s4Z=k@w90<7#pJ^Tc!Na3}X;Ld{bX%&5E*q1iez51Rc9VPIH)|Kav?$aSL=(l^$p#~^P@wK-a)`AzZX3N9#xL6+`y#C0I2Gg;% g^N`BJ4V7*?S3h9F`e0#Ka{wDxt;qXbwPzmu56_or_W%F@ literal 0 HcmV?d00001 diff --git a/tsne/tests/test_stability.py b/tsne/tests/test_stability.py new file mode 100644 index 0000000..68a2eac --- /dev/null +++ b/tsne/tests/test_stability.py @@ -0,0 +1,16 @@ + +def test_seed(): + from tsne import bh_sne + from sklearn.datasets import load_iris + import numpy as np + import os.path + + iris = load_iris() + + X = iris.data + y = iris.target + + t1 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True) + t2 = np.load(os.path.join(os.path.dirname(__file__), "test_stability.npy")) + + assert np.all(t1 == t2) From 68d9b110896673166ca5c0911e6ea94827787b65 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 11:23:06 -0700 Subject: [PATCH 02/17] Turn on LTO. --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index c38a833..bb99e24 100644 --- a/setup.py +++ b/setup.py @@ -45,15 +45,15 @@ ext_modules = [Extension(name='bh_sne', sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w'], - extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s'], + extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-flto'], + extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s', '-flto'], language='c++'), Extension(name='bh_sne_3d', sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'], include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-DTSNE3D'], - extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s'], + extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-DTSNE3D', '-flto'], + extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s', '-flto'], language='c++')] ext_modules = cythonize(ext_modules) From fe44b4ae1004717dea0fd351b7ca059e852cd40b Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Mon, 25 Feb 2019 18:09:02 -0800 Subject: [PATCH 03/17] Major build cleanup. Fix the Makefile dependencies so things get rebuilt when they should be. Add a `test` target. Only produce one .so file, which knows how to run both 2d and 3d. Break out main and the I/O methods into a separate cpp file, which is not used for the python extension module. Do not create pointless objects. The methods are all static. TSNE is now a namespace, not a class, and the exported `run` method is wrapped in `extern "C"` to make linkage easier. Minor cleanup of includes / using statements. Because I just couldn't help myself, remove a couple places where new/delete were being used for no good reason. Get rid of a few places where NDIMS (the build time constant) and a run-time variable, which are supposed to have the same value, were both being used in a method. This means flowing the template a bit further up the tree. There's lots more places this could be done - this only addresses the cases where not doing so could lead to errors if the template variable was inconsistent with the run-time variable. Kill off the last pointless vestiges of the cblas dependency. Use hidden visibility where appropriate - only export the run() method. This cuts the size of the .so and allows the compiler to inline more aggressively, especially since we've also turned on LTO. Together, all of these changes result in a build with a single 901k extension module, instead of two 1.9MB extension modules. Set -std=c++14 in the build. This is required by some of the cleanups because of the use of the `nullptr` keyword. Older gcc will not default to c++11 or higher, so will fail to build nullptr. And c++17 doesn't work because ``` include/python2.7/stringobject.h:175:5: error: ISO C++17 does not allow 'register' storage class specifier [-Wregister] register Py_ssize_t *len /* pointer to length variable or NULL ^~~~~~~~~ ``` Update travis build to Xenial container. This updates gcc to 5.4, which has the required c++ standards support. --- .travis.yml | 2 +- Makefile | 23 +- README.md | 1 - setup.py | 44 +-- tsne/__init__.py | 16 +- tsne/bh_sne.pyx | 41 ++- tsne/bh_sne_3d.pyx | 28 -- tsne/bh_sne_src/Makefile | 23 +- tsne/bh_sne_src/main.cpp | 147 ++++++++ tsne/bh_sne_src/sptree.cpp | 13 +- tsne/bh_sne_src/sptree.h | 5 +- tsne/bh_sne_src/tsne.cpp | 690 +++++++++++++++++-------------------- tsne/bh_sne_src/tsne.h | 45 ++- tsne/bh_sne_src/vptree.h | 20 +- 14 files changed, 597 insertions(+), 501 deletions(-) delete mode 100644 tsne/bh_sne_3d.pyx create mode 100644 tsne/bh_sne_src/main.cpp diff --git a/.travis.yml b/.travis.yml index 3c978da..0ac713a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,11 @@ python: - "2.7" # - "3.4" +dist: xenial addons: apt: packages: - build-essential - - libatlas-base-dev sudo: false diff --git a/Makefile b/Makefile index 96a7c82..166b268 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,23 @@ -build: - python setup.py build_ext --inplace +.PHONY: build -install: - python setup.py build_ext --inplace - python setup.py install +build: setup.py tsne/bh_sne.pyx \ + $(wildcard tsne/bh_sne_src/*.h) \ + $(wildcard tsne/bh_sne_src/*.cpp) + python $< build_ext --inplace -sdist: - python setup.py sdist +install: setup.py build + python $< install + +sdist: setup.py tsne/bh_sne.pyx \ + $(wildcard tsne/bh_sne_src/*.h) \ + $(wildcard tsne/bh_sne_src/*.cpp) + python $< sdist + +test: build + cd tsne/tests && py.test -s -vv clean: + make -C tsne/bh_sne_src clean rm -rf *.pyc *.so build/ bh_sne.cpp rm -rf tsne/*.pyc tsne/*.so tsne/*.o tsne/build/ tsne/bh_sne.cpp rm -rf .pytest_cache/ diff --git a/README.md b/README.md index c0d577e..169dad0 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ Requirements * [numpy](numpy.scipy.org) > =1.7.1 * [scipy](http://www.scipy.org/) >= 0.12.0 * [cython](cython.org) >= 0.19.1 -* [cblas](http://www.netlib.org/blas/) or [openblas](https://github.com/xianyi/OpenBLAS). Tested version is v0.2.5 and v0.2.6 (not necessary for OSX). [Anaconda](http://continuum.io/downloads) is recommended. diff --git a/setup.py b/setup.py index bb99e24..d8de271 100644 --- a/setup.py +++ b/setup.py @@ -28,33 +28,37 @@ if v2 >= 10: # More than 10.10 - extra_compile_args=['-I/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers'] + extra_compile_args = [ + '-I/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers'] else: - extra_compile_args=['-I/System/Library/Frameworks/vecLib.framework/Headers'] + extra_compile_args = [ + '-I/System/Library/Frameworks/vecLib.framework/Headers'] ext_modules = [Extension(name='bh_sne', - sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], - include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=extra_compile_args + ['-ffast-math', '-O3'], - extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lcblas'], - language='c++')] + sources=['tsne/bh_sne_src/sptree.cpp', + 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], + include_dirs=[ + numpy.get_include(), 'tsne/bh_sne_src/'], + extra_compile_args=extra_compile_args + + ['-ffast-math', '-O3', '-std=c++14'], + extra_link_args=['-Wl,-framework', + '-Wl,Accelerate'], + language='c++')] else: # LINUX ext_modules = [Extension(name='bh_sne', - sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], - include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-flto'], - extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s', '-flto'], - language='c++'), - - Extension(name='bh_sne_3d', - sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'], - include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=['-msse3', '-O3', '-fPIC', '-w', '-DTSNE3D', '-flto'], - extra_link_args=['-Wl,-Bdynamic,--as-needed', '-lgcc_s', '-flto'], - language='c++')] + sources=['tsne/bh_sne_src/sptree.cpp', + 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], + include_dirs=[ + numpy.get_include(), 'tsne/bh_sne_src/'], + extra_compile_args=['-msse3', '-O3', '-Wall', '-flto', + '-fPIC', '-w', '-std=c++14'], + extra_link_args=['-flto', + '-Wl,-Bdynamic,--as-needed', '-lgcc_s'], + language='c++'), + ] ext_modules = cythonize(ext_modules) @@ -75,4 +79,4 @@ packages=find_packages(), ext_modules=ext_modules, install_requires=required -) + ) diff --git a/tsne/__init__.py b/tsne/__init__.py index c5b9d3c..a6fcb2b 100644 --- a/tsne/__init__.py +++ b/tsne/__init__.py @@ -1,10 +1,11 @@ # encoding: utf-8 from __future__ import division + import numpy as np import scipy.linalg as la import sys from bh_sne import BH_SNE -from bh_sne_3d import BH_SNE_3D + def bh_sne(data, pca_d=None, d=2, perplexity=30., theta=0.5, random_state=None, copy_data=False, init=None, @@ -79,17 +80,16 @@ def bh_sne(data, pca_d=None, d=2, perplexity=30., theta=0.5, if mom_switch_iter is None: mom_switch_iter = 250 - if d == 2: - tsne = BH_SNE() - elif d == 3: - tsne = BH_SNE_3D() - else: + if d != 2 and d != 3: raise Exception("TSNE dimensions must be 2 or 3") - Y = tsne.run(X, N, X.shape[1], d, perplexity, theta, seed, init=init, use_init=use_init, - max_iter=max_iter, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter) + Y = BH_SNE.run(X, d, + perplexity, theta, + seed, init=init, use_init=use_init, + max_iter=max_iter, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter) return Y + from ._version import get_versions __version__ = get_versions()['version'] del get_versions diff --git a/tsne/bh_sne.pyx b/tsne/bh_sne.pyx index e45dd13..6dada3d 100644 --- a/tsne/bh_sne.pyx +++ b/tsne/bh_sne.pyx @@ -5,24 +5,33 @@ cimport cython from libcpp cimport bool cdef extern from "tsne.h": - cdef cppclass TSNE: - TSNE() - void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, bool skip_random_init, double *init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter) + void _c_run "run" ( + double* X, int N, int D, double* Y, + int no_dims, double perplexity, + double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter) nogil cdef class BH_SNE: - cdef TSNE* thisptr # hold a C++ instance - - def __cinit__(self): - self.thisptr = new TSNE() - - def __dealloc__(self): - del self.thisptr - @cython.boundscheck(False) @cython.wraparound(False) - def run(self, X, N, D, d, perplexity, theta, seed, init, use_init, max_iter, stop_lying_iter, mom_switch_iter): - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray(X) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _init = np.ascontiguousarray(init) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros((N, d), dtype=np.float64) - self.thisptr.run(&_X[0,0], N, D, &Y[0,0], d, perplexity, theta, seed, False, &_init[0,0], use_init, max_iter, stop_lying_iter, mom_switch_iter) + @staticmethod + def run(X, int d, + double perplexity, double theta, + int seed, init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter): + cdef int N = X.shape[0] + cdef int D = X.shape[1] + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray( + X, dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _init = np.ascontiguousarray( + init, dtype=np.float64) + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros( + (N, d), dtype=np.float64, order='C') + with nogil: + _c_run(&_X[0,0], N, D, &Y[0,0], + d, perplexity, + theta, seed, + False, &_init[0,0], use_init, + max_iter, stop_lying_iter, mom_switch_iter) return Y diff --git a/tsne/bh_sne_3d.pyx b/tsne/bh_sne_3d.pyx deleted file mode 100644 index 315e107..0000000 --- a/tsne/bh_sne_3d.pyx +++ /dev/null @@ -1,28 +0,0 @@ -# distutils: language = c++ -import numpy as np -cimport numpy as np -cimport cython -from libcpp cimport bool - -cdef extern from "tsne.h": - cdef cppclass TSNE: - TSNE() - void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, bool skip_random_init, double *init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter) - -cdef class BH_SNE_3D: - cdef TSNE* thisptr # hold a C++ instance - - def __cinit__(self): - self.thisptr = new TSNE() - - def __dealloc__(self): - del self.thisptr - - @cython.boundscheck(False) - @cython.wraparound(False) - def run(self, X, N, D, d, perplexity, theta, seed, init, use_init, max_iter, stop_lying_iter, mom_switch_iter): - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray(X) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _init = np.ascontiguousarray(init) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros((N, d), dtype=np.float64) - self.thisptr.run(&_X[0,0], N, D, &Y[0,0], d, perplexity, theta, seed, False, &_init[0,0], use_init, max_iter, stop_lying_iter, mom_switch_iter) - return Y diff --git a/tsne/bh_sne_src/Makefile b/tsne/bh_sne_src/Makefile index 8c8a1c8..90d0f7f 100644 --- a/tsne/bh_sne_src/Makefile +++ b/tsne/bh_sne_src/Makefile @@ -2,25 +2,22 @@ #CFLAGS = -march=haswell -ffast-math -O3 -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize #CFLAGS = -march=haswell -ffast-math -O3 -CXX = g++ -CFLAGS = -ffast-math -O3 +CXX?=g++ +CFLAGS?=-ffast-math -O3 -Wall -all: bh_tsne bh_tsne_3d +all: bh_tsne -bh_tsne: tsne.o sptree.o - $(CXX) $(CFLAGS) tsne.o sptree.o -o bh_tsne - -bh_tsne_3d: tsne_3d.o sptree.o - $(CXX) $(CFLAGS) tsne_3d.o sptree.o -o bh_tsne_3d +bh_tsne: main.o tsne.o sptree.o + $(CXX) $(CFLAGS) $(LDFLAGS) $^ -o $@ sptree.o: sptree.cpp sptree.h - $(CXX) $(CFLAGS) -c sptree.cpp + $(CXX) $(CFLAGS) -c $< tsne.o: tsne.cpp tsne.h sptree.h vptree.h - $(CXX) $(CFLAGS) -c tsne.cpp + $(CXX) $(CFLAGS) -c $< -tsne_3d.o: tsne.cpp tsne.h sptree.h vptree.h - $(CXX) $(CFLAGS) -DTSNE3D -c tsne.cpp +main.o: main.cpp tsne.h + $(CXX) $(CFLAGS) -c $< clean: - rm -Rf *.o bh_tsne bh_tsne_3d + rm -Rf *.o bh_tsne diff --git a/tsne/bh_sne_src/main.cpp b/tsne/bh_sne_src/main.cpp new file mode 100644 index 0000000..3f1fec5 --- /dev/null +++ b/tsne/bh_sne_src/main.cpp @@ -0,0 +1,147 @@ +/* + * + * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the Delft University of Technology. + * 4. Neither the name of the Delft University of Technology nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + */ + +#include +#include +#include + +#include "tsne.h" + +using namespace std; + + +// Function that loads data from a t-SNE file +// Note: this function does a malloc that should be freed elsewhere +bool load_data(const char* dat_file, double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) { + + // Open file, read first 2 integers, allocate memory, and read the data + FILE *h; + if((h = fopen(dat_file, "r+b")) == nullptr) { + fprintf(stderr,"Error: could not open data file.\n"); + return false; + } + fread(n, sizeof(int), 1, h); // number of datapoints + fread(d, sizeof(int), 1, h); // original dimensionality + fread(theta, sizeof(double), 1, h); // gradient accuracy + fread(perplexity, sizeof(double), 1, h); // perplexity + fread(no_dims, sizeof(int), 1, h); // output dimensionality + fread(max_iter, sizeof(int),1,h); // maximum number of iterations + *data = (double*) malloc(*d * *n * sizeof(double)); + if(*data == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + fread(*data, sizeof(double), *n * *d, h); // the data + if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed + fclose(h); + fprintf(stderr,"Read the %i x %i data matrix successfully!\n", *n, *d); + return true; +} + +// Function that saves map to a t-SNE file +void save_data(const char* res_file, double* data, int* landmarks, double* costs, int n, int d) { + + // Open file, write first 2 integers and then the data + FILE *h; + if((h = fopen(res_file, "w+b")) == nullptr) { + fprintf(stderr,"Error: could not open data file.\n"); + return; + } + fwrite(&n, sizeof(int), 1, h); + fwrite(&d, sizeof(int), 1, h); + fwrite(data, sizeof(double), n * d, h); + fwrite(landmarks, sizeof(int), n, h); + fwrite(costs, sizeof(double), n, h); + fclose(h); + fprintf(stderr,"Wrote the %i x %i data matrix successfully!\n", n, d); +} + +void save_csv(const char* csv_file, double* Y, int N, int D) { + std::ofstream csv(csv_file); + + for (int d = 0; d < D; d++) { + csv << "TSNE" << d+1 << ","; + } + csv << "\n"; + + for (int n = 0; n < N; n++) { + int row_offset = n * D; + for (int d = 0; d < D; d++) { + csv << Y[row_offset + d] << ","; + } + csv << "\n"; + } + + csv.close(); +} + +// Function that runs the Barnes-Hut implementation of t-SNE +int main(int argc, char *argv[]) { + + // load input and output + std::string dat_file = "data.dat"; + std::string res_file = "result.dat"; + if (argc > 1) { + dat_file = argv[1]; + res_file = argv[2]; + } + + const char *dat_file_c = dat_file.c_str(); + const char *res_file_c = res_file.c_str(); + + // Define some variables + int origN, N, D, no_dims, max_iter; + double perplexity, theta, *data; + int rand_seed = -1; + + // Read the parameters and the dataset + if(load_data(dat_file_c, &data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) { + + // Make dummy landmarks + N = origN; + int* landmarks = (int*) malloc(N * sizeof(int)); + if(landmarks == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + for(int n = 0; n < N; n++) landmarks[n] = n; + + // Now fire up the SNE implementation + double* Y = (double*) malloc(N * no_dims * sizeof(double)); + double* costs = (double*) calloc(N, sizeof(double)); + if(Y == nullptr || costs == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, nullptr, false, max_iter); + + // Save the results + save_data(res_file_c, Y, landmarks, costs, N, no_dims); + + // Clean up the memory + free(data); data = nullptr; + free(Y); Y = nullptr; + free(costs); costs = nullptr; + free(landmarks); landmarks = nullptr; + } +} diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 2d187c6..46391b1 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -30,14 +30,15 @@ * */ -#include -#include -#include -#include + +#include +#include +#include #include -#include "sptree.h" +#include "sptree.h" +#pragma GCC visibility push(hidden) // Constructs cell template @@ -448,3 +449,5 @@ void SPTree::print() // declare templates explicitly template class SPTree<2>; template class SPTree<3>; + +#pragma GCC visibility pop diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index be42686..51c4303 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -30,12 +30,10 @@ * */ - #ifndef SPTREE_H #define SPTREE_H -using namespace std; - +#pragma GCC visibility push(hidden) template class Cell { @@ -119,4 +117,5 @@ struct SPTree<0> enum { no_children = 1 }; }; +#pragma GCC visibility pop #endif diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index 5fac463..327560a 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -30,194 +30,89 @@ * */ -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include + #include "vptree.h" #include "sptree.h" #include "tsne.h" -using namespace std; - -#ifdef TSNE3D -#define NDIMS 3 -#else -#define NDIMS 2 -#endif - -// Perform t-SNE -void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, - int max_iter, int stop_lying_iter, int mom_switch_iter - ) { - - // Set random seed - if (skip_random_init != true) { - if(rand_seed >= 0) { - fprintf(stderr,"Using random seed: %d\n", rand_seed); - srand((unsigned int) rand_seed); - } else { - fprintf(stderr,"Using current time as random seed...\n"); - srand(time(NULL)); - } - } - - // Determine whether we are using an exact algorithm - if(N - 1 < 3 * perplexity) { fprintf(stderr,"Perplexity too large for the number of data points!\n"); exit(1); } - fprintf(stderr,"Using no_dims = %d, perplexity = %f, and theta = %f\n", no_dims, perplexity, theta); - bool exact = (theta == .0) ? true : false; - - fprintf(stderr,"Using max_iter = %d, stop_lying_iter = %d, mom_switch_iter = %d\n", max_iter, stop_lying_iter, mom_switch_iter); - - // Set learning parameters - float total_time = .0; - clock_t start, end; - double momentum = .5, final_momentum = .8; - double eta = 200.0; - - // Allocate some memory - double* dY = (double*) malloc(N * no_dims * sizeof(double)); - double* uY = (double*) malloc(N * no_dims * sizeof(double)); - double* gains = (double*) malloc(N * no_dims * sizeof(double)); - if(dY == NULL || uY == NULL || gains == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int i = 0; i < N * no_dims; i++) uY[i] = .0; - for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0; - - // Normalize input data (to prevent numerical problems) - fprintf(stderr,"Computing input similarities...\n"); - start = clock(); - zeroMean(X, N, D); - double max_X = .0; - for(int i = 0; i < N * D; i++) { - if(fabs(X[i]) > max_X) max_X = fabs(X[i]); - } - for(int i = 0; i < N * D; i++) X[i] /= max_X; - - // Compute input similarities for exact t-SNE - double* P; unsigned int* row_P; unsigned int* col_P; double* val_P; - if(exact) { - - // Compute similarities - fprintf(stderr,"Exact?"); - P = (double*) malloc(N * N * sizeof(double)); - if(P == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeGaussianPerplexity(X, N, D, P, perplexity); - - // Symmetrize input similarities - fprintf(stderr,"Symmetrizing...\n"); - int nN = 0; - for(int n = 0; n < N; n++) { - int mN = (n + 1) * N; - for(int m = n + 1; m < N; m++) { - P[nN + m] += P[mN + n]; - P[mN + n] = P[nN + m]; - mN += N; - } - nN += N; - } - double sum_P = .0; - for(int i = 0; i < N * N; i++) sum_P += P[i]; - for(int i = 0; i < N * N; i++) P[i] /= sum_P; - } - - // Compute input similarities for approximate t-SNE - else { - - // Compute asymmetric pairwise input similarities - computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity)); - - // Symmetrize input similarities - symmetrizeMatrix(&row_P, &col_P, &val_P, N); - double sum_P = .0; - for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; - for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; - } - end = clock(); - - // Lie about the P-values - if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; } - else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; } - - // Initialize solution (randomly or with given coordinates) - if (!use_init && !skip_random_init) { - for(int i = 0; i < N * no_dims; i++) { - Y[i] = randn() * .0001; - } - } else if (use_init) { - for(int i = 0; i < N * no_dims; i++) { - Y[i] = init[i]; - } - } - - // Perform main training loop - if(exact) fprintf(stderr,"Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); - else fprintf(stderr,"Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); - start = clock(); - - for(int iter = 0; iter < max_iter; iter++) { - - // Compute (approximate) gradient - if(exact) computeExactGradient(P, Y, N, no_dims, dY); - else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta); - - // Update gains - for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); - for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01; +#pragma GCC visibility push(hidden) + +using std::array; +using std::vector; + +namespace TSNE { + +template +void run(double* X, int N, int D, double* Y, + double perplexity, + double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter); + +} // namespace TSNE + + +extern "C" { + +void DLL_PUBLIC run( + double* X, int N, int D, double* Y, + int no_dims, double perplexity, + double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter) { + assert(no_dims == 2 || no_dims == 3); + switch (no_dims) { + case 2: + TSNE::run<2>(X, N, D, Y, + perplexity, + theta, rand_seed, + skip_random_init, init, use_init, + max_iter, stop_lying_iter, mom_switch_iter); + break; + case 3: + TSNE::run<3>(X, N, D, Y, + perplexity, + theta, rand_seed, + skip_random_init, init, use_init, + max_iter, stop_lying_iter, mom_switch_iter); + break; + default: + throw "invalid dimension"; + } +} - // Perform gradient update (with momentum and gains) - for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; - for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i]; +} // extern "C" - // Make solution zero-mean - zeroMean(Y, N, no_dims); - // Stop lying about the P-values after a while, and switch momentum - if(iter == stop_lying_iter) { - if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; } - else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; } - } - if(iter == mom_switch_iter) momentum = final_momentum; - - // Print out progress - if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { - end = clock(); - double C = .0; - if(exact) C = evaluateError(P, Y, N, no_dims); - else C = evaluateError(row_P, col_P, val_P, Y, N, no_dims, theta); // doing approximate computation here! - if(iter == 0) - fprintf(stderr,"Iteration %d: error is %f\n", iter + 1, C); - else { - total_time += (float) (end - start) / CLOCKS_PER_SEC; - fprintf(stderr,"Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC); - } - start = clock(); - } - } - end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; +namespace TSNE { - // Clean up memory - free(dY); - free(uY); - free(gains); - if(exact) free(P); - else { - free(row_P); row_P = NULL; - free(col_P); col_P = NULL; - free(val_P); val_P = NULL; - } - fprintf(stderr,"Fitting performed in %4.2f seconds.\n", total_time); -} +namespace { +template +void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, double* dC, double theta); +void computeExactGradient(double* P, double* Y, int N, int D, double* dC); +double evaluateError(double* P, double* Y, int N, int D); +template +double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, double theta); +void zeroMean(double* X, int N, int D); +void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); +void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K); +void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); +double randn(); +void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); +static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) -void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta) +template +void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, double* dC, double theta) { // Construct space-partitioning tree on current map @@ -225,14 +120,14 @@ void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp // Compute all terms required for t-SNE gradient double sum_Q = .0; - double* pos_f = (double*) calloc(N * D, sizeof(double)); - double* neg_f = (double*) calloc(N * D, sizeof(double)); - if(pos_f == NULL || neg_f == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + double* pos_f = (double*) calloc(N * NDIMS, sizeof(double)); + double* neg_f = (double*) calloc(N * NDIMS, sizeof(double)); + if(pos_f == nullptr || neg_f == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); - for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q); + for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * NDIMS, &sum_Q); // Compute final t-SNE gradient - for(int i = 0; i < N * D; i++) { + for(int i = 0; i < N * NDIMS; i++) { dC[i] = pos_f[i] - (neg_f[i] / sum_Q); } free(pos_f); @@ -241,19 +136,19 @@ void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp } // Compute gradient of the t-SNE cost function (exact) -void TSNE::computeExactGradient(double* P, double* Y, int N, int D, double* dC) { +void computeExactGradient(double* P, double* Y, int N, int D, double* dC) { // Make sure the current gradient contains zeros for(int i = 0; i < N * D; i++) dC[i] = 0.0; // Compute the squared Euclidean distance matrix double* DD = (double*) malloc(N * N * sizeof(double)); - if(DD == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(DD == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } computeSquaredEuclideanDistance(Y, N, D, DD); // Compute Q-matrix and normalization sum double* Q = (double*) malloc(N * N * sizeof(double)); - if(Q == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(Q == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } double sum_Q = .0; int nN = 0; for(int n = 0; n < N; n++) { @@ -285,18 +180,18 @@ void TSNE::computeExactGradient(double* P, double* Y, int N, int D, double* dC) } // Free memory - free(DD); DD = NULL; - free(Q); Q = NULL; + free(DD); DD = nullptr; + free(Q); Q = nullptr; } // Evaluate t-SNE cost function (exactly) -double TSNE::evaluateError(double* P, double* Y, int N, int D) { +double evaluateError(double* P, double* Y, int N, int D) { // Compute the squared Euclidean distance matrix double* DD = (double*) malloc(N * N * sizeof(double)); double* Q = (double*) malloc(N * N * sizeof(double)); - if(DD == NULL || Q == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(DD == nullptr || Q == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } computeSquaredEuclideanDistance(Y, N, D, DD); // Compute Q-matrix and normalization sum @@ -327,111 +222,113 @@ double TSNE::evaluateError(double* P, double* Y, int N, int D) { } // Evaluate t-SNE cost function (approximately) -double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta) +template +double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, double theta) { // Get estimate of normalization term - SPTree* tree = new SPTree(Y, N); - double* buff = (double*) calloc(D, sizeof(double)); + array buff; + buff.fill(0); double sum_Q = .0; - for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q); + { + SPTree tree(Y, N); + for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, buff.data(), &sum_Q); + } // Loop over all edges to compute t-SNE error int ind1, ind2; double C = .0, Q; for(int n = 0; n < N; n++) { - ind1 = n * D; + ind1 = n * NDIMS; for(int i = row_P[n]; i < row_P[n + 1]; i++) { Q = .0; - ind2 = col_P[i] * D; - for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d]; - for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d]; - for(int d = 0; d < D; d++) Q += buff[d] * buff[d]; + ind2 = col_P[i] * NDIMS; + for(int d = 0; d < NDIMS; d++) buff[d] = Y[ind1 + d]; + for(int d = 0; d < NDIMS; d++) buff[d] -= Y[ind2 + d]; + for(int d = 0; d < NDIMS; d++) Q += buff[d] * buff[d]; Q = (1.0 / (1.0 + Q)) / sum_Q; C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); } } // Clean up memory - free(buff); - delete tree; return C; } // Compute input similarities with a fixed perplexity -void TSNE::computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { +void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { - // Compute the squared Euclidean distance matrix - double* DD = (double*) malloc(N * N * sizeof(double)); - if(DD == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeSquaredEuclideanDistance(X, N, D, DD); + // Compute the squared Euclidean distance matrix + double* DD = (double*) malloc(N * N * sizeof(double)); + if(DD == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + computeSquaredEuclideanDistance(X, N, D, DD); - // Compute the Gaussian kernel row by row + // Compute the Gaussian kernel row by row int nN = 0; - for(int n = 0; n < N; n++) { + for(int n = 0; n < N; n++) { - // Initialize some variables - bool found = false; - double beta = 1.0; - double min_beta = -DBL_MAX; - double max_beta = DBL_MAX; - double tol = 1e-5; + // Initialize some variables + bool found = false; + double beta = 1.0; + double min_beta = -DBL_MAX; + double max_beta = DBL_MAX; + double tol = 1e-5; double sum_P; - // Iterate until we found a good perplexity - int iter = 0; - while(!found && iter < 200) { - - // Compute Gaussian kernel row - for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]); - P[nN + n] = DBL_MIN; - - // Compute entropy of current row - sum_P = DBL_MIN; - for(int m = 0; m < N; m++) sum_P += P[nN + m]; - double H = 0.0; - for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]); - H = (H / sum_P) + log(sum_P); - - // Evaluate whether the entropy is within the tolerance level - double Hdiff = H - log(perplexity); - if(Hdiff < tol && -Hdiff < tol) { - found = true; - } - else { - if(Hdiff > 0) { - min_beta = beta; - if(max_beta == DBL_MAX || max_beta == -DBL_MAX) - beta *= 2.0; - else - beta = (beta + max_beta) / 2.0; - } - else { - max_beta = beta; - if(min_beta == -DBL_MAX || min_beta == DBL_MAX) - beta /= 2.0; - else - beta = (beta + min_beta) / 2.0; - } - } - - // Update iteration counter - iter++; - } + // Iterate until we found a good perplexity + int iter = 0; + while(!found && iter < 200) { + + // Compute Gaussian kernel row + for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]); + P[nN + n] = DBL_MIN; + + // Compute entropy of current row + sum_P = DBL_MIN; + for(int m = 0; m < N; m++) sum_P += P[nN + m]; + double H = 0.0; + for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]); + H = (H / sum_P) + log(sum_P); - // Row normalize P - for(int m = 0; m < N; m++) P[nN + m] /= sum_P; + // Evaluate whether the entropy is within the tolerance level + double Hdiff = H - log(perplexity); + if(Hdiff < tol && -Hdiff < tol) { + found = true; + } + else { + if(Hdiff > 0) { + min_beta = beta; + if(max_beta == DBL_MAX || max_beta == -DBL_MAX) + beta *= 2.0; + else + beta = (beta + max_beta) / 2.0; + } + else { + max_beta = beta; + if(min_beta == -DBL_MAX || min_beta == DBL_MAX) + beta /= 2.0; + else + beta = (beta + min_beta) / 2.0; + } + } + + // Update iteration counter + iter++; + } + + // Row normalize P + for(int m = 0; m < N; m++) P[nN + m] /= sum_P; nN += N; - } + } - // Clean up memory - free(DD); DD = NULL; + // Clean up memory + free(DD); DD = nullptr; } // Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free) -void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K) { +void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K) { if(perplexity > K) fprintf(stderr,"Perplexity should be lower than K!\n"); @@ -439,12 +336,12 @@ void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _ro *_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); *_col_P = (unsigned int*) calloc(N * K, sizeof(unsigned int)); *_val_P = (double*) calloc(N * K, sizeof(double)); - if(*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(*_row_P == nullptr || *_col_P == nullptr || *_val_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } unsigned int* row_P = *_row_P; unsigned int* col_P = *_col_P; double* val_P = *_val_P; double* cur_P = (double*) malloc((N - 1) * sizeof(double)); - if(cur_P == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(cur_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } row_P[0] = 0; for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int) K; @@ -517,8 +414,8 @@ void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _ro // Row-normalize current row of P and store in matrix for(unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P; for(unsigned int m = 0; m < K; m++) { - col_P[row_P[n] + m] = (unsigned int) indices[m + 1].index(); - val_P[row_P[n] + m] = cur_P[m]; + col_P[row_P[n] + m] = (unsigned int) indices[m + 1].index(); + val_P[row_P[n] + m] = cur_P[m]; } } @@ -530,7 +427,7 @@ void TSNE::computeGaussianPerplexity(double* X, int N, int D, unsigned int** _ro // Symmetrizes a sparse matrix -void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double** _val_P, int N) { +void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double** _val_P, int N) { // Get sparse matrix unsigned int* row_P = *_row_P; @@ -539,7 +436,7 @@ void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double // Count number of elements and row counts of symmetric matrix int* row_counts = (int*) calloc(N, sizeof(int)); - if(row_counts == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(row_counts == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } for(int n = 0; n < N; n++) { for(int i = row_P[n]; i < row_P[n + 1]; i++) { @@ -562,7 +459,7 @@ void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double unsigned int* sym_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); unsigned int* sym_col_P = (unsigned int*) malloc(no_elem * sizeof(unsigned int)); double* sym_val_P = (double*) malloc(no_elem * sizeof(double)); - if(sym_row_P == NULL || sym_col_P == NULL || sym_val_P == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(sym_row_P == nullptr || sym_col_P == nullptr || sym_val_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } // Construct new row indices for symmetric matrix sym_row_P[0] = 0; @@ -570,7 +467,7 @@ void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double // Fill the result matrix int* offset = (int*) calloc(N, sizeof(int)); - if(offset == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(offset == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } for(int n = 0; n < N; n++) { for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i]) @@ -613,12 +510,12 @@ void TSNE::symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double free(*_val_P); *_val_P = sym_val_P; // Free up some memery - free(offset); offset = NULL; - free(row_counts); row_counts = NULL; + free(offset); offset = nullptr; + free(row_counts); row_counts = nullptr; } // Compute squared Euclidean distance matrix -void TSNE::computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { +void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { const double* XnD = X; for(int n = 0; n < N; ++n, XnD += D) { const double* XmD = XnD + D; @@ -637,11 +534,11 @@ void TSNE::computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) // Makes data zero-mean -void TSNE::zeroMean(double* X, int N, int D) { +void zeroMean(double* X, int N, int D) { // Compute data mean double* mean = (double*) calloc(D, sizeof(double)); - if(mean == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + if(mean == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } int nD = 0; for(int n = 0; n < N; n++) { for(int d = 0; d < D; d++) { @@ -661,12 +558,12 @@ void TSNE::zeroMean(double* X, int N, int D) { } nD += D; } - free(mean); mean = NULL; + free(mean); mean = nullptr; } // Generates a Gaussian random number -double TSNE::randn() { +double randn() { double x, y, radius; do { x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; @@ -679,112 +576,173 @@ double TSNE::randn() { return x; } -// Function that loads data from a t-SNE file -// Note: this function does a malloc that should be freed elsewhere -bool TSNE::load_data(const char* dat_file, double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) { +} // namespace - // Open file, read first 2 integers, allocate memory, and read the data - FILE *h; - if((h = fopen(dat_file, "r+b")) == NULL) { - fprintf(stderr,"Error: could not open data file.\n"); - return false; - } - fread(n, sizeof(int), 1, h); // number of datapoints - fread(d, sizeof(int), 1, h); // original dimensionality - fread(theta, sizeof(double), 1, h); // gradient accuracy - fread(perplexity, sizeof(double), 1, h); // perplexity - fread(no_dims, sizeof(int), 1, h); // output dimensionality - fread(max_iter, sizeof(int),1,h); // maximum number of iterations - *data = (double*) malloc(*d * *n * sizeof(double)); - if(*data == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - fread(*data, sizeof(double), *n * *d, h); // the data - if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed - fclose(h); - fprintf(stderr,"Read the %i x %i data matrix successfully!\n", *n, *d); - return true; -} +// Perform t-SNE +template +void run(double* X, int N, int D, double* Y, + double perplexity, + double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter) { + + // Set random seed + if (skip_random_init != true) { + if(rand_seed >= 0) { + fprintf(stderr,"Using random seed: %d\n", rand_seed); + srand((unsigned int) rand_seed); + } else { + fprintf(stderr,"Using current time as random seed...\n"); + srand(time(nullptr)); + } + } + + // Determine whether we are using an exact algorithm + if(N - 1 < 3 * perplexity) { fprintf(stderr,"Perplexity too large for the number of data points!\n"); exit(1); } + fprintf(stderr,"Using no_dims = %d, perplexity = %f, and theta = %f\n", NDIMS, perplexity, theta); + bool exact = (theta == .0) ? true : false; -// Function that saves map to a t-SNE file -void TSNE::save_data(const char* res_file, double* data, int* landmarks, double* costs, int n, int d) { + fprintf(stderr,"Using max_iter = %d, stop_lying_iter = %d, mom_switch_iter = %d\n", max_iter, stop_lying_iter, mom_switch_iter); - // Open file, write first 2 integers and then the data - FILE *h; - if((h = fopen(res_file, "w+b")) == NULL) { - fprintf(stderr,"Error: could not open data file.\n"); - return; - } - fwrite(&n, sizeof(int), 1, h); - fwrite(&d, sizeof(int), 1, h); - fwrite(data, sizeof(double), n * d, h); - fwrite(landmarks, sizeof(int), n, h); - fwrite(costs, sizeof(double), n, h); - fclose(h); - fprintf(stderr,"Wrote the %i x %i data matrix successfully!\n", n, d); -} + // Set learning parameters + float total_time = .0; + clock_t start, end; + double momentum = .5, final_momentum = .8; + double eta = 200.0; -void TSNE::save_csv(const char* csv_file, double* Y, int N, int D) { - std::ofstream csv(csv_file); + // Allocate some memory + double* dY = (double*) malloc(N * NDIMS * sizeof(double)); + double* uY = (double*) malloc(N * NDIMS * sizeof(double)); + double* gains = (double*) malloc(N * NDIMS * sizeof(double)); + if(dY == nullptr || uY == nullptr || gains == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + for(int i = 0; i < N * NDIMS; i++) uY[i] = .0; + for(int i = 0; i < N * NDIMS; i++) gains[i] = 1.0; - for (int d = 0; d < D; d++) { - csv << "TSNE" << d+1 << ","; + // Normalize input data (to prevent numerical problems) + fprintf(stderr,"Computing input similarities...\n"); + start = clock(); + zeroMean(X, N, D); + double max_X = .0; + for(int i = 0; i < N * D; i++) { + if(fabs(X[i]) > max_X) max_X = fabs(X[i]); } - csv << "\n"; + for(int i = 0; i < N * D; i++) X[i] /= max_X; + + // Compute input similarities for exact t-SNE + double* P; unsigned int* row_P; unsigned int* col_P; double* val_P; + if(exact) { - for (int n = 0; n < N; n++) { - int row_offset = n * D; - for (int d = 0; d < D; d++) { - csv << Y[row_offset + d] << ","; + // Compute similarities + fprintf(stderr,"Exact?"); + P = (double*) malloc(N * N * sizeof(double)); + if(P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } + computeGaussianPerplexity(X, N, D, P, perplexity); + + // Symmetrize input similarities + fprintf(stderr,"Symmetrizing...\n"); + int nN = 0; + for(int n = 0; n < N; n++) { + int mN = (n + 1) * N; + for(int m = n + 1; m < N; m++) { + P[nN + m] += P[mN + n]; + P[mN + n] = P[nN + m]; + mN += N; + } + nN += N; } - csv << "\n"; + double sum_P = .0; + for(int i = 0; i < N * N; i++) sum_P += P[i]; + for(int i = 0; i < N * N; i++) P[i] /= sum_P; } - csv.close(); -} + // Compute input similarities for approximate t-SNE + else { + + // Compute asymmetric pairwise input similarities + computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity)); + + // Symmetrize input similarities + symmetrizeMatrix(&row_P, &col_P, &val_P, N); + double sum_P = .0; + for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; + for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; + } + end = clock(); + + // Lie about the P-values + if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; } + else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; } + + // Initialize solution (randomly or with given coordinates) + if (!use_init && !skip_random_init) { + for(int i = 0; i < N * NDIMS; i++) { + Y[i] = randn() * .0001; + } + } else if (use_init) { + for(int i = 0; i < N * NDIMS; i++) { + Y[i] = init[i]; + } + } + + // Perform main training loop + if(exact) fprintf(stderr,"Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); + else fprintf(stderr,"Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); + start = clock(); + + for(int iter = 0; iter < max_iter; iter++) { + + // Compute (approximate) gradient + if(exact) computeExactGradient(P, Y, N, NDIMS, dY); + else computeGradient(P, row_P, col_P, val_P, Y, N, dY, theta); + + // Update gains + for(int i = 0; i < N * NDIMS; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); + for(int i = 0; i < N * NDIMS; i++) if(gains[i] < .01) gains[i] = .01; + + // Perform gradient update (with momentum and gains) + for(int i = 0; i < N * NDIMS; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; + for(int i = 0; i < N * NDIMS; i++) Y[i] = Y[i] + uY[i]; + + // Make solution zero-mean + zeroMean(Y, N, NDIMS); -// Function that runs the Barnes-Hut implementation of t-SNE -int main(int argc, char *argv[]) { + // Stop lying about the P-values after a while, and switch momentum + if(iter == stop_lying_iter) { + if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; } + else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; } + } + if(iter == mom_switch_iter) momentum = final_momentum; - // load input and output - std::string dat_file = "data.dat"; - std::string res_file = "result.dat"; - if (argc > 1) { - dat_file = argv[1]; - res_file = argv[2]; + // Print out progress + if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { + end = clock(); + double C = .0; + if(exact) C = evaluateError(P, Y, N, NDIMS); + else C = evaluateError(row_P, col_P, val_P, Y, N, theta); // doing approximate computation here! + if(iter == 0) + fprintf(stderr,"Iteration %d: error is %f\n", iter + 1, C); + else { + total_time += (float) (end - start) / CLOCKS_PER_SEC; + fprintf(stderr,"Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC); + } + start = clock(); + } } + end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; - const char *dat_file_c = dat_file.c_str(); - const char *res_file_c = res_file.c_str(); - - // Define some variables - int origN, N, D, no_dims, max_iter, *landmarks; - double perc_landmarks; - double perplexity, theta, *data; - int rand_seed = -1; - TSNE* tsne = new TSNE(); - - // Read the parameters and the dataset - if(tsne->load_data(dat_file_c, &data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) { - - // Make dummy landmarks - N = origN; - int* landmarks = (int*) malloc(N * sizeof(int)); - if(landmarks == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int n = 0; n < N; n++) landmarks[n] = n; - - // Now fire up the SNE implementation - double* Y = (double*) malloc(N * no_dims * sizeof(double)); - double* costs = (double*) calloc(N, sizeof(double)); - if(Y == NULL || costs == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - tsne->run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, NULL, false, max_iter); - - // Save the results - tsne->save_data(res_file_c, Y, landmarks, costs, N, no_dims); - - // Clean up the memory - free(data); data = NULL; - free(Y); Y = NULL; - free(costs); costs = NULL; - free(landmarks); landmarks = NULL; + // Clean up memory + free(dY); + free(uY); + free(gains); + if(exact) free(P); + else { + free(row_P); row_P = nullptr; + free(col_P); col_P = nullptr; + free(val_P); val_P = nullptr; } - delete(tsne); + fprintf(stderr,"Fitting performed in %4.2f seconds.\n", total_time); } + +} // namespace TSNE + +#pragma GCC visibility pop diff --git a/tsne/bh_sne_src/tsne.h b/tsne/bh_sne_src/tsne.h index 93e203e..ceffd1b 100644 --- a/tsne/bh_sne_src/tsne.h +++ b/tsne/bh_sne_src/tsne.h @@ -34,31 +34,28 @@ #ifndef TSNE_H #define TSNE_H +#if defined _WIN32 || defined __CYGWIN__ + #ifdef __GNUC__ + #define DLL_PUBLIC __attribute__ ((dllexport)) + #else + #define DLL_PUBLIC __declspec(dllexport) + #endif +#else + #if __GNUC__ >= 4 + #define DLL_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define DLL_PUBLIC + #endif +#endif -static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } - - -class TSNE -{ -public: - void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250 - ); - bool load_data(const char* dat_file, double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter); - void save_data(const char* res_file, double* data, int* landmarks, double* costs, int n, int d); - void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); // should be static! - void save_csv(const char* csv_file, double* Y, int N, int D); +extern "C" { -private: - void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta); - void computeExactGradient(double* P, double* Y, int N, int D, double* dC); - double evaluateError(double* P, double* Y, int N, int D); - double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, int D, double theta); - void zeroMean(double* X, int N, int D); - void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); - void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K); - void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); - double randn(); -}; +void DLL_PUBLIC run( + double* X, int N, int D, + double* Y, int no_dims, + double perplexity, double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250); +} #endif diff --git a/tsne/bh_sne_src/vptree.h b/tsne/bh_sne_src/vptree.h index 9e301d6..50bb6ad 100644 --- a/tsne/bh_sne_src/vptree.h +++ b/tsne/bh_sne_src/vptree.h @@ -33,17 +33,18 @@ /* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */ -#include +#ifndef VPTREE_H +#define VPTREE_H + #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include - -#ifndef VPTREE_H -#define VPTREE_H +#pragma GCC visibility push(hidden) class DataPoint { @@ -269,4 +270,5 @@ class VpTree } }; -#endif +#pragma GCC visibility pop +#endif // !defined(VPTREE_H) From 69eb8ba1977bdbb307a31e7f674e3e97b64d1bc6 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Tue, 26 Feb 2019 10:58:24 -0800 Subject: [PATCH 04/17] Remove TSNE namespace, leave anonymous namespace. --- tsne/bh_sne_src/Makefile | 2 +- tsne/bh_sne_src/tsne.cpp | 75 ++++++++++++++++++---------------------- 2 files changed, 34 insertions(+), 43 deletions(-) diff --git a/tsne/bh_sne_src/Makefile b/tsne/bh_sne_src/Makefile index 90d0f7f..95534dc 100644 --- a/tsne/bh_sne_src/Makefile +++ b/tsne/bh_sne_src/Makefile @@ -3,7 +3,7 @@ #CFLAGS = -march=haswell -ffast-math -O3 CXX?=g++ -CFLAGS?=-ffast-math -O3 -Wall +CFLAGS?=-ffast-math -O3 -Wall -std=c++14 all: bh_tsne diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index 327560a..c677229 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -48,7 +48,7 @@ using std::array; using std::vector; -namespace TSNE { +namespace { template void run(double* X, int N, int D, double* Y, @@ -57,44 +57,6 @@ void run(double* X, int N, int D, double* Y, bool skip_random_init, double *init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter); -} // namespace TSNE - - -extern "C" { - -void DLL_PUBLIC run( - double* X, int N, int D, double* Y, - int no_dims, double perplexity, - double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, - int max_iter, int stop_lying_iter, int mom_switch_iter) { - assert(no_dims == 2 || no_dims == 3); - switch (no_dims) { - case 2: - TSNE::run<2>(X, N, D, Y, - perplexity, - theta, rand_seed, - skip_random_init, init, use_init, - max_iter, stop_lying_iter, mom_switch_iter); - break; - case 3: - TSNE::run<3>(X, N, D, Y, - perplexity, - theta, rand_seed, - skip_random_init, init, use_init, - max_iter, stop_lying_iter, mom_switch_iter); - break; - default: - throw "invalid dimension"; - } -} - -} // extern "C" - - -namespace TSNE { - -namespace { template void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, double* dC, double theta); void computeExactGradient(double* P, double* Y, int N, int D, double* dC); @@ -576,8 +538,6 @@ double randn() { return x; } -} // namespace - // Perform t-SNE template void run(double* X, int N, int D, double* Y, @@ -743,6 +703,37 @@ void run(double* X, int N, int D, double* Y, fprintf(stderr,"Fitting performed in %4.2f seconds.\n", total_time); } -} // namespace TSNE +} // namespace + +extern "C" { + +void DLL_PUBLIC run( + double* X, int N, int D, double* Y, + int no_dims, double perplexity, + double theta, int rand_seed, + bool skip_random_init, double *init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter) { + assert(no_dims == 2 || no_dims == 3); + switch (no_dims) { + case 2: + run<2>(X, N, D, Y, + perplexity, + theta, rand_seed, + skip_random_init, init, use_init, + max_iter, stop_lying_iter, mom_switch_iter); + break; + case 3: + run<3>(X, N, D, Y, + perplexity, + theta, rand_seed, + skip_random_init, init, use_init, + max_iter, stop_lying_iter, mom_switch_iter); + break; + default: + throw "invalid dimension"; + } +} + +} // extern "C" #pragma GCC visibility pop From c33068fbf5d7d20879a5353d6c72be26d6a5cd9e Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Tue, 26 Feb 2019 11:25:27 -0800 Subject: [PATCH 05/17] clang-format c++ code. --- .clang-format | 6 + tsne/bh_sne_src/main.cpp | 210 +++--- tsne/bh_sne_src/sptree.cpp | 648 +++++++++--------- tsne/bh_sne_src/sptree.h | 170 ++--- tsne/bh_sne_src/tsne.cpp | 1288 ++++++++++++++++++++---------------- tsne/bh_sne_src/tsne.h | 49 +- tsne/bh_sne_src/vptree.h | 473 ++++++------- 7 files changed, 1506 insertions(+), 1338 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..1c96eda --- /dev/null +++ b/.clang-format @@ -0,0 +1,6 @@ +Language: Cpp +BasedOnStyle: Google +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +PenaltyBreakComment: 100 diff --git a/tsne/bh_sne_src/main.cpp b/tsne/bh_sne_src/main.cpp index 3f1fec5..5951436 100644 --- a/tsne/bh_sne_src/main.cpp +++ b/tsne/bh_sne_src/main.cpp @@ -12,7 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. + * This product includes software developed by the Delft University of + * Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. @@ -20,128 +21,143 @@ * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ -#include #include +#include #include #include "tsne.h" using namespace std; - // Function that loads data from a t-SNE file // Note: this function does a malloc that should be freed elsewhere -bool load_data(const char* dat_file, double** data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) { - - // Open file, read first 2 integers, allocate memory, and read the data - FILE *h; - if((h = fopen(dat_file, "r+b")) == nullptr) { - fprintf(stderr,"Error: could not open data file.\n"); - return false; - } - fread(n, sizeof(int), 1, h); // number of datapoints - fread(d, sizeof(int), 1, h); // original dimensionality - fread(theta, sizeof(double), 1, h); // gradient accuracy - fread(perplexity, sizeof(double), 1, h); // perplexity - fread(no_dims, sizeof(int), 1, h); // output dimensionality - fread(max_iter, sizeof(int),1,h); // maximum number of iterations - *data = (double*) malloc(*d * *n * sizeof(double)); - if(*data == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - fread(*data, sizeof(double), *n * *d, h); // the data - if(!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed - fclose(h); - fprintf(stderr,"Read the %i x %i data matrix successfully!\n", *n, *d); - return true; +bool load_data(const char* dat_file, double** data, int* n, int* d, + int* no_dims, double* theta, double* perplexity, int* rand_seed, + int* max_iter) { + // Open file, read first 2 integers, allocate memory, and read the data + FILE* h; + if ((h = fopen(dat_file, "r+b")) == nullptr) { + fprintf(stderr, "Error: could not open data file.\n"); + return false; + } + fread(n, sizeof(int), 1, h); // number of datapoints + fread(d, sizeof(int), 1, h); // original dimensionality + fread(theta, sizeof(double), 1, h); // gradient accuracy + fread(perplexity, sizeof(double), 1, h); // perplexity + fread(no_dims, sizeof(int), 1, h); // output dimensionality + fread(max_iter, sizeof(int), 1, h); // maximum number of iterations + *data = (double*)malloc(*d * *n * sizeof(double)); + if (*data == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + fread(*data, sizeof(double), *n * *d, h); // the data + if (!feof(h)) + fread(rand_seed, sizeof(int), 1, h); // random seed + fclose(h); + fprintf(stderr, "Read the %i x %i data matrix successfully!\n", *n, *d); + return true; } // Function that saves map to a t-SNE file -void save_data(const char* res_file, double* data, int* landmarks, double* costs, int n, int d) { - - // Open file, write first 2 integers and then the data - FILE *h; - if((h = fopen(res_file, "w+b")) == nullptr) { - fprintf(stderr,"Error: could not open data file.\n"); - return; - } - fwrite(&n, sizeof(int), 1, h); - fwrite(&d, sizeof(int), 1, h); - fwrite(data, sizeof(double), n * d, h); - fwrite(landmarks, sizeof(int), n, h); - fwrite(costs, sizeof(double), n, h); - fclose(h); - fprintf(stderr,"Wrote the %i x %i data matrix successfully!\n", n, d); +void save_data(const char* res_file, double* data, int* landmarks, + double* costs, int n, int d) { + // Open file, write first 2 integers and then the data + FILE* h; + if ((h = fopen(res_file, "w+b")) == nullptr) { + fprintf(stderr, "Error: could not open data file.\n"); + return; + } + fwrite(&n, sizeof(int), 1, h); + fwrite(&d, sizeof(int), 1, h); + fwrite(data, sizeof(double), n * d, h); + fwrite(landmarks, sizeof(int), n, h); + fwrite(costs, sizeof(double), n, h); + fclose(h); + fprintf(stderr, "Wrote the %i x %i data matrix successfully!\n", n, d); } void save_csv(const char* csv_file, double* Y, int N, int D) { - std::ofstream csv(csv_file); + std::ofstream csv(csv_file); + + for (int d = 0; d < D; d++) { + csv << "TSNE" << d + 1 << ","; + } + csv << "\n"; + for (int n = 0; n < N; n++) { + int row_offset = n * D; for (int d = 0; d < D; d++) { - csv << "TSNE" << d+1 << ","; + csv << Y[row_offset + d] << ","; } csv << "\n"; + } - for (int n = 0; n < N; n++) { - int row_offset = n * D; - for (int d = 0; d < D; d++) { - csv << Y[row_offset + d] << ","; - } - csv << "\n"; - } - - csv.close(); + csv.close(); } // Function that runs the Barnes-Hut implementation of t-SNE -int main(int argc, char *argv[]) { - - // load input and output - std::string dat_file = "data.dat"; - std::string res_file = "result.dat"; - if (argc > 1) { - dat_file = argv[1]; - res_file = argv[2]; +int main(int argc, char* argv[]) { + // load input and output + std::string dat_file = "data.dat"; + std::string res_file = "result.dat"; + if (argc > 1) { + dat_file = argv[1]; + res_file = argv[2]; + } + + const char* dat_file_c = dat_file.c_str(); + const char* res_file_c = res_file.c_str(); + + // Define some variables + int origN, N, D, no_dims, max_iter; + double perplexity, theta, *data; + int rand_seed = -1; + + // Read the parameters and the dataset + if (load_data(dat_file_c, &data, &origN, &D, &no_dims, &theta, &perplexity, + &rand_seed, &max_iter)) { + // Make dummy landmarks + N = origN; + int* landmarks = (int*)malloc(N * sizeof(int)); + if (landmarks == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); } - - const char *dat_file_c = dat_file.c_str(); - const char *res_file_c = res_file.c_str(); - - // Define some variables - int origN, N, D, no_dims, max_iter; - double perplexity, theta, *data; - int rand_seed = -1; - - // Read the parameters and the dataset - if(load_data(dat_file_c, &data, &origN, &D, &no_dims, &theta, &perplexity, &rand_seed, &max_iter)) { - - // Make dummy landmarks - N = origN; - int* landmarks = (int*) malloc(N * sizeof(int)); - if(landmarks == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int n = 0; n < N; n++) landmarks[n] = n; - - // Now fire up the SNE implementation - double* Y = (double*) malloc(N * no_dims * sizeof(double)); - double* costs = (double*) calloc(N, sizeof(double)); - if(Y == nullptr || costs == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, nullptr, false, max_iter); - - // Save the results - save_data(res_file_c, Y, landmarks, costs, N, no_dims); - - // Clean up the memory - free(data); data = nullptr; - free(Y); Y = nullptr; - free(costs); costs = nullptr; - free(landmarks); landmarks = nullptr; + for (int n = 0; n < N; n++) + landmarks[n] = n; + + // Now fire up the SNE implementation + double* Y = (double*)malloc(N * no_dims * sizeof(double)); + double* costs = (double*)calloc(N, sizeof(double)); + if (Y == nullptr || costs == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); } + run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, nullptr, + false, max_iter); + + // Save the results + save_data(res_file_c, Y, landmarks, costs, N, no_dims); + + // Clean up the memory + free(data); + data = nullptr; + free(Y); + Y = nullptr; + free(costs); + costs = nullptr; + free(landmarks); + landmarks = nullptr; + } } diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 46391b1..070278e 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -12,7 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. + * This product includes software developed by the Delft University of + * Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. @@ -20,430 +21,431 @@ * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ - #include -#include -#include #include +#include +#include #include "sptree.h" #pragma GCC visibility push(hidden) // Constructs cell -template +template Cell::Cell() { } -template +template Cell::Cell(double* inp_corner, double* inp_width) { - for(int d = 0; d < NDims; d++) setCorner(d, inp_corner[d]); - for(int d = 0; d < NDims; d++) setWidth( d, inp_width[d]); + for (int d = 0; d < NDims; d++) + setCorner(d, inp_corner[d]); + for (int d = 0; d < NDims; d++) + setWidth(d, inp_width[d]); } // Destructs cell -template +template Cell::~Cell() { } -template +template double Cell::getCorner(unsigned int d) { - return corner[d]; + return corner[d]; } -template +template double Cell::getWidth(unsigned int d) { - return width[d]; + return width[d]; } -template +template void Cell::setCorner(unsigned int d, double val) { - corner[d] = val; + corner[d] = val; } -template +template void Cell::setWidth(unsigned int d, double val) { - width[d] = val; + width[d] = val; } // Checks whether a point lies in a cell -template -bool Cell::containsPoint(double point[]) -{ - for(int d = 0; d < NDims; d++) { - if(corner[d] - width[d] > point[d]) return false; - if(corner[d] + width[d] < point[d]) return false; - } - return true; +template +bool Cell::containsPoint(double point[]) { + for (int d = 0; d < NDims; d++) { + if (corner[d] - width[d] > point[d]) + return false; + if (corner[d] + width[d] < point[d]) + return false; + } + return true; } - // Default constructor for SPTree -- build tree, too! -template -SPTree::SPTree(double* inp_data, unsigned int N) -{ - - // Compute mean, width, and height of current map (boundaries of SPTree) - int nD = 0; - double* mean_Y = (double*) calloc(NDims, sizeof(double)); - double* min_Y = (double*) malloc(NDims * sizeof(double)); - double* max_Y = (double*) malloc(NDims * sizeof(double)); - - for(unsigned int d = 0; d < NDims; d++) { - min_Y[d] = DBL_MAX; - max_Y[d] = -DBL_MAX; +template +SPTree::SPTree(double* inp_data, unsigned int N) { + // Compute mean, width, and height of current map (boundaries of SPTree) + int nD = 0; + double* mean_Y = (double*)calloc(NDims, sizeof(double)); + double* min_Y = (double*)malloc(NDims * sizeof(double)); + double* max_Y = (double*)malloc(NDims * sizeof(double)); + + for (unsigned int d = 0; d < NDims; d++) { + min_Y[d] = DBL_MAX; + max_Y[d] = -DBL_MAX; + } + + for (unsigned int n = 0; n < N; n++) { + for (unsigned int d = 0; d < NDims; d++) { + mean_Y[d] += inp_data[n * NDims + d]; + if (inp_data[nD + d] < min_Y[d]) + min_Y[d] = inp_data[nD + d]; + if (inp_data[nD + d] > max_Y[d]) + max_Y[d] = inp_data[nD + d]; } - - for(unsigned int n = 0; n < N; n++) { - for(unsigned int d = 0; d < NDims; d++) { - mean_Y[d] += inp_data[n * NDims + d]; - if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d]; - if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d]; - } - nD += NDims; - } - - for(int d = 0; d < NDims; d++) mean_Y[d] /= (double) N; - - // Construct SPTree - double* width = (double*) malloc(NDims * sizeof(double)); - for(int d = 0; d < NDims; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; - init(NULL, inp_data, mean_Y, width); - fill(N); - - // Clean up memory - free(mean_Y); - free(max_Y); - free(min_Y); - free(width); + nD += NDims; + } + + for (int d = 0; d < NDims; d++) + mean_Y[d] /= (double)N; + + // Construct SPTree + double* width = (double*)malloc(NDims * sizeof(double)); + for (int d = 0; d < NDims; d++) + width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; + init(NULL, inp_data, mean_Y, width); + fill(N); + + // Clean up memory + free(mean_Y); + free(max_Y); + free(min_Y); + free(width); } - -// Constructor for SPTree with particular size and parent -- build the tree, too! -template -SPTree::SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width) -{ - init(NULL, inp_data, inp_corner, inp_width); - fill(N); +// Constructor for SPTree with particular size and parent -- build the tree, +// too! +template +SPTree::SPTree(double* inp_data, unsigned int N, double* inp_corner, + double* inp_width) { + init(NULL, inp_data, inp_corner, inp_width); + fill(N); } - // Constructor for SPTree with particular size (do not fill the tree) -template -SPTree::SPTree(double* inp_data, double* inp_corner, double* inp_width) -{ - init(NULL, inp_data, inp_corner, inp_width); +template +SPTree::SPTree(double* inp_data, double* inp_corner, double* inp_width) { + init(NULL, inp_data, inp_corner, inp_width); } - // Constructor for SPTree with particular size and parent (do not fill tree) -template -SPTree::SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width) { - init(inp_parent, inp_data, inp_corner, inp_width); +template +SPTree::SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, + double* inp_width) { + init(inp_parent, inp_data, inp_corner, inp_width); } - -// Constructor for SPTree with particular size and parent -- build the tree, too! -template -SPTree::SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, double* inp_corner, double* inp_width) -{ - init(inp_parent, inp_data, inp_corner, inp_width); - fill(N); +// Constructor for SPTree with particular size and parent -- build the tree, +// too! +template +SPTree::SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, + double* inp_corner, double* inp_width) { + init(inp_parent, inp_data, inp_corner, inp_width); + fill(N); } - // Main initialization function -template -void SPTree::init(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width) -{ - parent = inp_parent; - data = inp_data; - is_leaf = true; - size = 0; - cum_size = 0; - - for(unsigned int d = 0; d < NDims; d++) boundary.setCorner(d, inp_corner[d]); - for(unsigned int d = 0; d < NDims; d++) boundary.setWidth( d, inp_width[d]); - - for(unsigned int i = 0; i < no_children; i++) children[i] = NULL; - for(unsigned int d = 0; d < NDims; d++) center_of_mass[d] = .0; +template +void SPTree::init(SPTree* inp_parent, double* inp_data, + double* inp_corner, double* inp_width) { + parent = inp_parent; + data = inp_data; + is_leaf = true; + size = 0; + cum_size = 0; + + for (unsigned int d = 0; d < NDims; d++) + boundary.setCorner(d, inp_corner[d]); + for (unsigned int d = 0; d < NDims; d++) + boundary.setWidth(d, inp_width[d]); + + for (unsigned int i = 0; i < no_children; i++) + children[i] = NULL; + for (unsigned int d = 0; d < NDims; d++) + center_of_mass[d] = .0; } - // Destructor for SPTree -template -SPTree::~SPTree() -{ - for(unsigned int i = 0; i < no_children; i++) { - if(children[i] != NULL) delete children[i]; - } +template +SPTree::~SPTree() { + for (unsigned int i = 0; i < no_children; i++) { + if (children[i] != NULL) + delete children[i]; + } } - // Update the data underlying this tree -template -void SPTree::setData(double* inp_data) -{ - data = inp_data; +template +void SPTree::setData(double* inp_data) { + data = inp_data; } - // Get the parent of the current tree -template -SPTree* SPTree::getParent() -{ - return parent; +template +SPTree* SPTree::getParent() { + return parent; } - // Insert a point into the SPTree -template -bool SPTree::insert(unsigned int new_index) -{ - // Ignore objects which do not belong in this quad tree - double* point = data + new_index * NDims; - if(!boundary.containsPoint(point)) - return false; - - // Online update of cumulative size and center-of-mass - cum_size++; - double mult1 = (double) (cum_size - 1) / (double) cum_size; - double mult2 = 1.0 / (double) cum_size; - - for(unsigned int d = 0; d < NDims; d++) { - center_of_mass[d] = center_of_mass[d] * mult1 + mult2 * point[d]; - } +template +bool SPTree::insert(unsigned int new_index) { + // Ignore objects which do not belong in this quad tree + double* point = data + new_index * NDims; + if (!boundary.containsPoint(point)) + return false; - // If there is space in this quad tree and it is a leaf, add the object here - if(is_leaf && size < QT_NODE_CAPACITY) { - index[size] = new_index; - size++; - return true; - } + // Online update of cumulative size and center-of-mass + cum_size++; + double mult1 = (double)(cum_size - 1) / (double)cum_size; + double mult2 = 1.0 / (double)cum_size; - // Don't add duplicates for now (this is not very nice) - bool any_duplicate = false; - for(unsigned int n = 0; n < size; n++) { - bool duplicate = true; - for(unsigned int d = 0; d < NDims; d++) { - if(point[d] != data[index[n] * NDims + d]) { duplicate = false; break; } - } - any_duplicate = any_duplicate | duplicate; + for (unsigned int d = 0; d < NDims; d++) { + center_of_mass[d] = center_of_mass[d] * mult1 + mult2 * point[d]; + } + + // If there is space in this quad tree and it is a leaf, add the object here + if (is_leaf && size < QT_NODE_CAPACITY) { + index[size] = new_index; + size++; + return true; + } + + // Don't add duplicates for now (this is not very nice) + bool any_duplicate = false; + for (unsigned int n = 0; n < size; n++) { + bool duplicate = true; + for (unsigned int d = 0; d < NDims; d++) { + if (point[d] != data[index[n] * NDims + d]) { + duplicate = false; + break; + } } - if(any_duplicate) return true; + any_duplicate = any_duplicate | duplicate; + } + if (any_duplicate) + return true; - // Otherwise, we need to subdivide the current cell - if(is_leaf) subdivide(); + // Otherwise, we need to subdivide the current cell + if (is_leaf) + subdivide(); - // Find out where the point can be inserted - for(unsigned int i = 0; i < no_children; i++) { - if(children[i]->insert(new_index)) return true; - } + // Find out where the point can be inserted + for (unsigned int i = 0; i < no_children; i++) { + if (children[i]->insert(new_index)) + return true; + } - // Otherwise, the point cannot be inserted (this should never happen) - return false; + // Otherwise, the point cannot be inserted (this should never happen) + return false; } - -// Create four children which fully divide this cell into four quads of equal area -template +// Create four children which fully divide this cell into four quads of equal +// area +template void SPTree::subdivide() { - - // Create new children - double new_corner[NDims]; - double new_width[NDims]; - for(unsigned int i = 0; i < no_children; i++) { - unsigned int div = 1; - for(unsigned int d = 0; d < NDims; d++) { - new_width[d] = .5 * boundary.getWidth(d); - if((i / div) % 2 == 1) new_corner[d] = boundary.getCorner(d) - .5 * boundary.getWidth(d); - else new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); - div *= 2; - } - children[i] = new SPTree(this, data, new_corner, new_width); + // Create new children + double new_corner[NDims]; + double new_width[NDims]; + for (unsigned int i = 0; i < no_children; i++) { + unsigned int div = 1; + for (unsigned int d = 0; d < NDims; d++) { + new_width[d] = .5 * boundary.getWidth(d); + if ((i / div) % 2 == 1) + new_corner[d] = boundary.getCorner(d) - .5 * boundary.getWidth(d); + else + new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); + div *= 2; } - - // Move existing points to correct children - for(unsigned int i = 0; i < size; i++) { - bool success = false; - for(unsigned int j = 0; j < no_children; j++) { - if(!success) success = children[j]->insert(index[i]); - } - index[i] = -1; + children[i] = new SPTree(this, data, new_corner, new_width); + } + + // Move existing points to correct children + for (unsigned int i = 0; i < size; i++) { + bool success = false; + for (unsigned int j = 0; j < no_children; j++) { + if (!success) + success = children[j]->insert(index[i]); } + index[i] = -1; + } - // Empty parent node - size = 0; - is_leaf = false; + // Empty parent node + size = 0; + is_leaf = false; } - // Build SPTree on dataset -template -void SPTree::fill(unsigned int N) -{ - for(unsigned int i = 0; i < N; i++) insert(i); +template +void SPTree::fill(unsigned int N) { + for (unsigned int i = 0; i < N; i++) + insert(i); } - // Checks whether the specified tree is correct -template -bool SPTree::isCorrect() -{ - for(unsigned int n = 0; n < size; n++) { - double* point = data + index[n] * NDims; - if(!boundary.containsPoint(point)) return false; - } - if(!is_leaf) { - bool correct = true; - for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect(); - return correct; - } - else return true; +template +bool SPTree::isCorrect() { + for (unsigned int n = 0; n < size; n++) { + double* point = data + index[n] * NDims; + if (!boundary.containsPoint(point)) + return false; + } + if (!is_leaf) { + bool correct = true; + for (int i = 0; i < no_children; i++) + correct = correct && children[i]->isCorrect(); + return correct; + } else + return true; } - - // Build a list of all indices in SPTree -template -void SPTree::getAllIndices(unsigned int* indices) -{ - getAllIndices(indices, 0); +template +void SPTree::getAllIndices(unsigned int* indices) { + getAllIndices(indices, 0); } - // Build a list of all indices in SPTree -template -unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc) -{ - - // Gather indices in current quadrant - for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i]; - loc += size; - - // Gather indices in children - if(!is_leaf) { - for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc); - } - return loc; +template +unsigned int SPTree::getAllIndices(unsigned int* indices, + unsigned int loc) { + // Gather indices in current quadrant + for (unsigned int i = 0; i < size; i++) + indices[loc + i] = index[i]; + loc += size; + + // Gather indices in children + if (!is_leaf) { + for (int i = 0; i < no_children; i++) + loc = children[i]->getAllIndices(indices, loc); + } + return loc; } -template +template unsigned int SPTree::getDepth() { - if(is_leaf) return 1; - int depth = 0; - for(unsigned int i = 0; i < no_children; i++) depth = fmax(depth, children[i]->getDepth()); - return 1 + depth; + if (is_leaf) + return 1; + int depth = 0; + for (unsigned int i = 0; i < no_children; i++) + depth = fmax(depth, children[i]->getDepth()); + return 1 + depth; } - // Compute non-edge forces using Barnes-Hut algorithm -template -void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) -{ - - // Make sure that we spend no time on empty nodes or self-interactions - if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return; - - // Compute distance between point and center-of-mass - double sqdist = .0; - unsigned int ind = point_index * NDims; +template +void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, + double neg_f[], double* sum_Q) { + // Make sure that we spend no time on empty nodes or self-interactions + if (cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) + return; + + // Compute distance between point and center-of-mass + double sqdist = .0; + unsigned int ind = point_index * NDims; + + for (unsigned int d = 0; d < NDims; d++) { + buff[d] = data[ind + d] - center_of_mass[d]; + sqdist += buff[d] * buff[d]; + } + + // Check whether we can use this node as a "summary" + double max_width = 0.0; + double cur_width; + for (unsigned int d = 0; d < NDims; d++) { + cur_width = boundary.getWidth(d); + max_width = (max_width > cur_width) ? max_width : cur_width; + } + if (is_leaf || max_width / sqrt(sqdist) < theta) { + // Compute and add t-SNE force between point and current node + sqdist = 1.0 / (1.0 + sqdist); + double mult = cum_size * sqdist; + *sum_Q += mult; + mult *= sqdist; + for (unsigned int d = 0; d < NDims; d++) + neg_f[d] += mult * buff[d]; + } else { + // Recursively apply Barnes-Hut to children + for (unsigned int i = 0; i < no_children; i++) + children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); + } +} - for(unsigned int d = 0; d < NDims; d++) { - buff[d] = data[ind + d] - center_of_mass[d]; +// Computes edge forces +template +void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, + double* val_P, int N, double* pos_f) { + // Loop over all edges in the graph + unsigned int ind1 = 0; + unsigned int ind2 = 0; + double sqdist; + for (unsigned int n = 0; n < N; n++) { + for (unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { + // Compute pairwise distance and Q-value + sqdist = 1.0; + ind2 = col_P[i] * NDims; + + for (unsigned int d = 0; d < NDims; d++) { + buff[d] = data[ind1 + d] - data[ind2 + d]; sqdist += buff[d] * buff[d]; - } + } - // Check whether we can use this node as a "summary" - double max_width = 0.0; - double cur_width; - for(unsigned int d = 0; d < NDims; d++) { - cur_width = boundary.getWidth(d); - max_width = (max_width > cur_width) ? max_width : cur_width; - } - if(is_leaf || max_width / sqrt(sqdist) < theta) { - - // Compute and add t-SNE force between point and current node - sqdist = 1.0 / (1.0 + sqdist); - double mult = cum_size * sqdist; - *sum_Q += mult; - mult *= sqdist; - for(unsigned int d = 0; d < NDims; d++) neg_f[d] += mult * buff[d]; - } - else { + sqdist = val_P[i] / sqdist; - // Recursively apply Barnes-Hut to children - for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); + // Sum positive force + for (unsigned int d = 0; d < NDims; d++) + pos_f[ind1 + d] += sqdist * buff[d]; } + ind1 += NDims; + } } - -// Computes edge forces -template -void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f) -{ - - // Loop over all edges in the graph - unsigned int ind1 = 0; - unsigned int ind2 = 0; - double sqdist; - for(unsigned int n = 0; n < N; n++) { - for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { - - // Compute pairwise distance and Q-value - sqdist = 1.0; - ind2 = col_P[i] * NDims; - - for(unsigned int d = 0; d < NDims; d++) { - buff[d] = data[ind1 + d] - data[ind2 + d]; - sqdist += buff[d] * buff[d]; - } - - sqdist = val_P[i] / sqdist; - - // Sum positive force - for(unsigned int d = 0; d < NDims; d++) pos_f[ind1 + d] += sqdist * buff[d]; - } - ind1 += NDims; - } -} - - // Print out tree -template -void SPTree::print() -{ - if(cum_size == 0) { - fprintf(stderr,"Empty node\n"); - return; - } - - if(is_leaf) { - fprintf(stderr,"Leaf node; data = ["); - for(int i = 0; i < size; i++) { - double* point = data + index[i] * NDims; - for(int d = 0; d < NDims; d++) fprintf(stderr,"%f, ", point[d]); - fprintf(stderr," (index = %d)", index[i]); - if(i < size - 1) fprintf(stderr,"\n"); - else fprintf(stderr,"]\n"); - } - } - else { - fprintf(stderr,"Intersection node with center-of-mass = ["); - for(int d = 0; d < NDims; d++) fprintf(stderr,"%f, ", center_of_mass[d]); - fprintf(stderr,"]; children are:\n"); - for(int i = 0; i < no_children; i++) children[i]->print(); +template +void SPTree::print() { + if (cum_size == 0) { + fprintf(stderr, "Empty node\n"); + return; + } + + if (is_leaf) { + fprintf(stderr, "Leaf node; data = ["); + for (int i = 0; i < size; i++) { + double* point = data + index[i] * NDims; + for (int d = 0; d < NDims; d++) + fprintf(stderr, "%f, ", point[d]); + fprintf(stderr, " (index = %d)", index[i]); + if (i < size - 1) + fprintf(stderr, "\n"); + else + fprintf(stderr, "]\n"); } + } else { + fprintf(stderr, "Intersection node with center-of-mass = ["); + for (int d = 0; d < NDims; d++) + fprintf(stderr, "%f, ", center_of_mass[d]); + fprintf(stderr, "]; children are:\n"); + for (int i = 0; i < no_children; i++) + children[i]->print(); + } } // declare templates explicitly diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 51c4303..9a9deae 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -12,7 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. + * This product includes software developed by the Delft University of + * Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. @@ -20,13 +21,13 @@ * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ @@ -35,86 +36,91 @@ #pragma GCC visibility push(hidden) -template +template class Cell { - double corner[NDims]; - double width[NDims]; - - -public: - Cell(); - Cell(double* inp_corner, double* inp_width); - ~Cell(); - - double getCorner(unsigned int d); - double getWidth(unsigned int d); - void setCorner(unsigned int d, double val); - void setWidth(unsigned int d, double val); - bool containsPoint(double point[]); + double corner[NDims]; + double width[NDims]; + + public: + Cell(); + Cell(double* inp_corner, double* inp_width); + ~Cell(); + + double getCorner(unsigned int d); + double getWidth(unsigned int d); + void setCorner(unsigned int d, double val); + void setWidth(unsigned int d, double val); + bool containsPoint(double point[]); }; -template -class SPTree -{ -public: - enum { no_children = 2 * SPTree::no_children }; - -private: - // Fixed constants - static const unsigned int QT_NODE_CAPACITY = 1; - - // A buffer we use when doing force computations - double buff[NDims]; - - // Properties of this node in the tree - SPTree* parent; - unsigned int dimension; - bool is_leaf; - unsigned int size; - unsigned int cum_size; - - // Axis-aligned bounding box stored as a center with half-dimensions to represent the boundaries of this quad tree - Cell boundary; - - // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children - double* data; - double center_of_mass[NDims]; - unsigned int index[QT_NODE_CAPACITY]; - - // Children - SPTree* children[no_children]; - -public: - SPTree(double* inp_data, unsigned int N); - SPTree(double* inp_data, double* inp_corner, double* inp_width); - SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width); - SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, double* inp_corner, double* inp_width); - SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width); - ~SPTree(); - void setData(double* inp_data); - SPTree* getParent(); - void construct(Cell boundary); - bool insert(unsigned int new_index); - void subdivide(); - bool isCorrect(); - void rebuildTree(); - void getAllIndices(unsigned int* indices); - unsigned int getDepth(); - void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q); - void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f); - void print(); - -private: - void init(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width); - void fill(unsigned int N); - unsigned int getAllIndices(unsigned int* indices, unsigned int loc); - bool isChild(unsigned int test_index, unsigned int start, unsigned int end); +template +class SPTree { + public: + enum { no_children = 2 * SPTree::no_children }; + + private: + // Fixed constants + static const unsigned int QT_NODE_CAPACITY = 1; + + // A buffer we use when doing force computations + double buff[NDims]; + + // Properties of this node in the tree + SPTree* parent; + unsigned int dimension; + bool is_leaf; + unsigned int size; + unsigned int cum_size; + + // Axis-aligned bounding box stored as a center with half-dimensions to + // represent the boundaries of this quad tree + Cell boundary; + + // Indices in this space-partitioning tree node, corresponding center-of-mass, + // and list of all children + double* data; + double center_of_mass[NDims]; + unsigned int index[QT_NODE_CAPACITY]; + + // Children + SPTree* children[no_children]; + + public: + SPTree(double* inp_data, unsigned int N); + SPTree(double* inp_data, double* inp_corner, double* inp_width); + SPTree(double* inp_data, unsigned int N, double* inp_corner, + double* inp_width); + SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, + double* inp_corner, double* inp_width); + SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, + double* inp_width); + ~SPTree(); + void setData(double* inp_data); + SPTree* getParent(); + void construct(Cell boundary); + bool insert(unsigned int new_index); + void subdivide(); + bool isCorrect(); + void rebuildTree(); + void getAllIndices(unsigned int* indices); + unsigned int getDepth(); + void computeNonEdgeForces(unsigned int point_index, double theta, + double neg_f[], double* sum_Q); + void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, + double* val_P, int N, double* pos_f); + void print(); + + private: + void init(SPTree* inp_parent, double* inp_data, double* inp_corner, + double* inp_width); + void fill(unsigned int N); + unsigned int getAllIndices(unsigned int* indices, unsigned int loc); + bool isChild(unsigned int test_index, unsigned int start, unsigned int end); }; template <> -struct SPTree<0> -{ - enum { no_children = 1 }; +struct SPTree<0> { + enum { no_children = 1 }; }; #pragma GCC visibility pop diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index c677229..66e064d 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -12,7 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. + * This product includes software developed by the Delft University of + * Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. @@ -20,13 +21,13 @@ * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ @@ -39,9 +40,9 @@ #include #include -#include "vptree.h" #include "sptree.h" #include "tsne.h" +#include "vptree.h" #pragma GCC visibility push(hidden) @@ -51,683 +52,794 @@ using std::vector; namespace { template -void run(double* X, int N, int D, double* Y, - double perplexity, - double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, +void run(double* X, int N, int D, double* Y, double perplexity, double theta, + int rand_seed, bool skip_random_init, double* init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter); template -void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, double* dC, double theta); +void computeGradient(double* P, unsigned int* inp_row_P, + unsigned int* inp_col_P, double* inp_val_P, double* Y, + int N, double* dC, double theta); void computeExactGradient(double* P, double* Y, int N, int D, double* dC); double evaluateError(double* P, double* Y, int N, int D); template -double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, double theta); +double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, + double* Y, int N, double theta); void zeroMean(double* X, int N, int D); -void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); -void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K); +void computeGaussianPerplexity(double* X, int N, int D, double* P, + double perplexity); +void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, + unsigned int** _col_P, double** _val_P, + double perplexity, int K); void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); double randn(); -void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, double** val_P, int N); +void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, + double** val_P, int N); -static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } +static inline double sign(double x) { + return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); +} // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) template -void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, int N, double* dC, double theta) -{ - - // Construct space-partitioning tree on current map - SPTree* tree = new SPTree(Y, N); - - // Compute all terms required for t-SNE gradient - double sum_Q = .0; - double* pos_f = (double*) calloc(N * NDIMS, sizeof(double)); - double* neg_f = (double*) calloc(N * NDIMS, sizeof(double)); - if(pos_f == nullptr || neg_f == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); - for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * NDIMS, &sum_Q); - - // Compute final t-SNE gradient - for(int i = 0; i < N * NDIMS; i++) { - dC[i] = pos_f[i] - (neg_f[i] / sum_Q); - } - free(pos_f); - free(neg_f); - delete tree; +void computeGradient(double* P, unsigned int* inp_row_P, + unsigned int* inp_col_P, double* inp_val_P, double* Y, + int N, double* dC, double theta) { + // Construct space-partitioning tree on current map + SPTree* tree = new SPTree(Y, N); + + // Compute all terms required for t-SNE gradient + double sum_Q = .0; + double* pos_f = (double*)calloc(N * NDIMS, sizeof(double)); + double* neg_f = (double*)calloc(N * NDIMS, sizeof(double)); + if (pos_f == nullptr || neg_f == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); + for (int n = 0; n < N; n++) + tree->computeNonEdgeForces(n, theta, neg_f + n * NDIMS, &sum_Q); + + // Compute final t-SNE gradient + for (int i = 0; i < N * NDIMS; i++) { + dC[i] = pos_f[i] - (neg_f[i] / sum_Q); + } + free(pos_f); + free(neg_f); + delete tree; } // Compute gradient of the t-SNE cost function (exact) void computeExactGradient(double* P, double* Y, int N, int D, double* dC) { + // Make sure the current gradient contains zeros + for (int i = 0; i < N * D; i++) + dC[i] = 0.0; + + // Compute the squared Euclidean distance matrix + double* DD = (double*)malloc(N * N * sizeof(double)); + if (DD == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + computeSquaredEuclideanDistance(Y, N, D, DD); - // Make sure the current gradient contains zeros - for(int i = 0; i < N * D; i++) dC[i] = 0.0; - - // Compute the squared Euclidean distance matrix - double* DD = (double*) malloc(N * N * sizeof(double)); - if(DD == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeSquaredEuclideanDistance(Y, N, D, DD); + // Compute Q-matrix and normalization sum + double* Q = (double*)malloc(N * N * sizeof(double)); + if (Q == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + double sum_Q = .0; + int nN = 0; + for (int n = 0; n < N; n++) { + for (int m = 0; m < N; m++) { + if (n != m) { + Q[nN + m] = 1 / (1 + DD[nN + m]); + sum_Q += Q[nN + m]; + } + } + nN += N; + } - // Compute Q-matrix and normalization sum - double* Q = (double*) malloc(N * N * sizeof(double)); - if(Q == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - double sum_Q = .0; - int nN = 0; - for(int n = 0; n < N; n++) { - for(int m = 0; m < N; m++) { - if(n != m) { - Q[nN + m] = 1 / (1 + DD[nN + m]); - sum_Q += Q[nN + m]; - } + // Perform the computation of the gradient + nN = 0; + int nD = 0; + for (int n = 0; n < N; n++) { + int mD = 0; + for (int m = 0; m < N; m++) { + if (n != m) { + double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m]; + for (int d = 0; d < D; d++) { + dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult; } - nN += N; + } + mD += D; } + nN += N; + nD += D; + } - // Perform the computation of the gradient - nN = 0; - int nD = 0; - for(int n = 0; n < N; n++) { - int mD = 0; - for(int m = 0; m < N; m++) { - if(n != m) { - double mult = (P[nN + m] - (Q[nN + m] / sum_Q)) * Q[nN + m]; - for(int d = 0; d < D; d++) { - dC[nD + d] += (Y[nD + d] - Y[mD + d]) * mult; - } - } - mD += D; - } - nN += N; - nD += D; - } - - // Free memory - free(DD); DD = nullptr; - free(Q); Q = nullptr; + // Free memory + free(DD); + DD = nullptr; + free(Q); + Q = nullptr; } - // Evaluate t-SNE cost function (exactly) double evaluateError(double* P, double* Y, int N, int D) { - - // Compute the squared Euclidean distance matrix - double* DD = (double*) malloc(N * N * sizeof(double)); - double* Q = (double*) malloc(N * N * sizeof(double)); - if(DD == nullptr || Q == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeSquaredEuclideanDistance(Y, N, D, DD); - - // Compute Q-matrix and normalization sum - int nN = 0; - double sum_Q = DBL_MIN; - for(int n = 0; n < N; n++) { - for(int m = 0; m < N; m++) { - if(n != m) { - Q[nN + m] = 1 / (1 + DD[nN + m]); - sum_Q += Q[nN + m]; - } - else Q[nN + m] = DBL_MIN; - } - nN += N; + // Compute the squared Euclidean distance matrix + double* DD = (double*)malloc(N * N * sizeof(double)); + double* Q = (double*)malloc(N * N * sizeof(double)); + if (DD == nullptr || Q == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + computeSquaredEuclideanDistance(Y, N, D, DD); + + // Compute Q-matrix and normalization sum + int nN = 0; + double sum_Q = DBL_MIN; + for (int n = 0; n < N; n++) { + for (int m = 0; m < N; m++) { + if (n != m) { + Q[nN + m] = 1 / (1 + DD[nN + m]); + sum_Q += Q[nN + m]; + } else + Q[nN + m] = DBL_MIN; } - for(int i = 0; i < N * N; i++) Q[i] /= sum_Q; + nN += N; + } + for (int i = 0; i < N * N; i++) + Q[i] /= sum_Q; - // Sum t-SNE error - double C = .0; - for(int n = 0; n < N * N; n++) { - C += P[n] * log((P[n] + FLT_MIN) / (Q[n] + FLT_MIN)); - } + // Sum t-SNE error + double C = .0; + for (int n = 0; n < N * N; n++) { + C += P[n] * log((P[n] + FLT_MIN) / (Q[n] + FLT_MIN)); + } - // Clean up memory - free(DD); - free(Q); - return C; + // Clean up memory + free(DD); + free(Q); + return C; } // Evaluate t-SNE cost function (approximately) template -double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, int N, double theta) -{ - - // Get estimate of normalization term - array buff; - buff.fill(0); - double sum_Q = .0; - { - SPTree tree(Y, N); - for(int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, buff.data(), &sum_Q); - } +double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, + double* Y, int N, double theta) { + // Get estimate of normalization term + array buff; + buff.fill(0); + double sum_Q = .0; + { + SPTree tree(Y, N); + for (int n = 0; n < N; n++) + tree.computeNonEdgeForces(n, theta, buff.data(), &sum_Q); + } - // Loop over all edges to compute t-SNE error - int ind1, ind2; - double C = .0, Q; - for(int n = 0; n < N; n++) { - ind1 = n * NDIMS; - for(int i = row_P[n]; i < row_P[n + 1]; i++) { - Q = .0; - ind2 = col_P[i] * NDIMS; - for(int d = 0; d < NDIMS; d++) buff[d] = Y[ind1 + d]; - for(int d = 0; d < NDIMS; d++) buff[d] -= Y[ind2 + d]; - for(int d = 0; d < NDIMS; d++) Q += buff[d] * buff[d]; - Q = (1.0 / (1.0 + Q)) / sum_Q; - C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); - } + // Loop over all edges to compute t-SNE error + int ind1, ind2; + double C = .0, Q; + for (int n = 0; n < N; n++) { + ind1 = n * NDIMS; + for (int i = row_P[n]; i < row_P[n + 1]; i++) { + Q = .0; + ind2 = col_P[i] * NDIMS; + for (int d = 0; d < NDIMS; d++) + buff[d] = Y[ind1 + d]; + for (int d = 0; d < NDIMS; d++) + buff[d] -= Y[ind2 + d]; + for (int d = 0; d < NDIMS; d++) + Q += buff[d] * buff[d]; + Q = (1.0 / (1.0 + Q)) / sum_Q; + C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); } + } - // Clean up memory - return C; + // Clean up memory + return C; } - // Compute input similarities with a fixed perplexity -void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { - - // Compute the squared Euclidean distance matrix - double* DD = (double*) malloc(N * N * sizeof(double)); - if(DD == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeSquaredEuclideanDistance(X, N, D, DD); - - // Compute the Gaussian kernel row by row - int nN = 0; - for(int n = 0; n < N; n++) { - - // Initialize some variables - bool found = false; - double beta = 1.0; - double min_beta = -DBL_MAX; - double max_beta = DBL_MAX; - double tol = 1e-5; - double sum_P; - - // Iterate until we found a good perplexity - int iter = 0; - while(!found && iter < 200) { - - // Compute Gaussian kernel row - for(int m = 0; m < N; m++) P[nN + m] = exp(-beta * DD[nN + m]); - P[nN + n] = DBL_MIN; - - // Compute entropy of current row - sum_P = DBL_MIN; - for(int m = 0; m < N; m++) sum_P += P[nN + m]; - double H = 0.0; - for(int m = 0; m < N; m++) H += beta * (DD[nN + m] * P[nN + m]); - H = (H / sum_P) + log(sum_P); - - // Evaluate whether the entropy is within the tolerance level - double Hdiff = H - log(perplexity); - if(Hdiff < tol && -Hdiff < tol) { - found = true; - } - else { - if(Hdiff > 0) { - min_beta = beta; - if(max_beta == DBL_MAX || max_beta == -DBL_MAX) - beta *= 2.0; - else - beta = (beta + max_beta) / 2.0; - } - else { - max_beta = beta; - if(min_beta == -DBL_MAX || min_beta == DBL_MAX) - beta /= 2.0; - else - beta = (beta + min_beta) / 2.0; - } - } - - // Update iteration counter - iter++; +void computeGaussianPerplexity(double* X, int N, int D, double* P, + double perplexity) { + // Compute the squared Euclidean distance matrix + double* DD = (double*)malloc(N * N * sizeof(double)); + if (DD == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + computeSquaredEuclideanDistance(X, N, D, DD); + + // Compute the Gaussian kernel row by row + int nN = 0; + for (int n = 0; n < N; n++) { + // Initialize some variables + bool found = false; + double beta = 1.0; + double min_beta = -DBL_MAX; + double max_beta = DBL_MAX; + double tol = 1e-5; + double sum_P; + + // Iterate until we found a good perplexity + int iter = 0; + while (!found && iter < 200) { + // Compute Gaussian kernel row + for (int m = 0; m < N; m++) + P[nN + m] = exp(-beta * DD[nN + m]); + P[nN + n] = DBL_MIN; + + // Compute entropy of current row + sum_P = DBL_MIN; + for (int m = 0; m < N; m++) + sum_P += P[nN + m]; + double H = 0.0; + for (int m = 0; m < N; m++) + H += beta * (DD[nN + m] * P[nN + m]); + H = (H / sum_P) + log(sum_P); + + // Evaluate whether the entropy is within the tolerance level + double Hdiff = H - log(perplexity); + if (Hdiff < tol && -Hdiff < tol) { + found = true; + } else { + if (Hdiff > 0) { + min_beta = beta; + if (max_beta == DBL_MAX || max_beta == -DBL_MAX) + beta *= 2.0; + else + beta = (beta + max_beta) / 2.0; + } else { + max_beta = beta; + if (min_beta == -DBL_MAX || min_beta == DBL_MAX) + beta /= 2.0; + else + beta = (beta + min_beta) / 2.0; } + } - // Row normalize P - for(int m = 0; m < N; m++) P[nN + m] /= sum_P; - nN += N; + // Update iteration counter + iter++; } - // Clean up memory - free(DD); DD = nullptr; -} + // Row normalize P + for (int m = 0; m < N; m++) + P[nN + m] /= sum_P; + nN += N; + } + // Clean up memory + free(DD); + DD = nullptr; +} -// Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free) -void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, unsigned int** _col_P, double** _val_P, double perplexity, int K) { - - if(perplexity > K) fprintf(stderr,"Perplexity should be lower than K!\n"); - - // Allocate the memory we need - *_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); - *_col_P = (unsigned int*) calloc(N * K, sizeof(unsigned int)); - *_val_P = (double*) calloc(N * K, sizeof(double)); - if(*_row_P == nullptr || *_col_P == nullptr || *_val_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - unsigned int* row_P = *_row_P; - unsigned int* col_P = *_col_P; - double* val_P = *_val_P; - double* cur_P = (double*) malloc((N - 1) * sizeof(double)); - if(cur_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - row_P[0] = 0; - for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int) K; - - // Build ball tree on data set - VpTree* tree = new VpTree(); - vector obj_X(N, DataPoint(D, -1, X)); - for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D); - tree->create(obj_X); - - // Loop over all points to find nearest neighbors - fprintf(stderr,"Building tree...\n"); - vector indices; - vector distances; - for(int n = 0; n < N; n++) { - - if(n % 10000 == 0) fprintf(stderr," - point %d of %d\n", n, N); - - // Find nearest neighbors - indices.clear(); - distances.clear(); - tree->search(obj_X[n], K + 1, &indices, &distances); - - // Initialize some variables for binary search - bool found = false; - double beta = 1.0; - double min_beta = -DBL_MAX; - double max_beta = DBL_MAX; - double tol = 1e-5; - - // Iterate until we found a good perplexity - int iter = 0; double sum_P; - while(!found && iter < 200) { - - // Compute Gaussian kernel row - for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1] * distances[m + 1]); - - // Compute entropy of current row - sum_P = DBL_MIN; - for(int m = 0; m < K; m++) sum_P += cur_P[m]; - double H = .0; - for(int m = 0; m < K; m++) H += beta * (distances[m + 1] * distances[m + 1] * cur_P[m]); - H = (H / sum_P) + log(sum_P); - - // Evaluate whether the entropy is within the tolerance level - double Hdiff = H - log(perplexity); - if(Hdiff < tol && -Hdiff < tol) { - found = true; - } - else { - if(Hdiff > 0) { - min_beta = beta; - if(max_beta == DBL_MAX || max_beta == -DBL_MAX) - beta *= 2.0; - else - beta = (beta + max_beta) / 2.0; - } - else { - max_beta = beta; - if(min_beta == -DBL_MAX || min_beta == DBL_MAX) - beta /= 2.0; - else - beta = (beta + min_beta) / 2.0; - } - } - - // Update iteration counter - iter++; +// Compute input similarities with a fixed perplexity using ball trees (this +// function allocates memory another function should free) +void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, + unsigned int** _col_P, double** _val_P, + double perplexity, int K) { + if (perplexity > K) + fprintf(stderr, "Perplexity should be lower than K!\n"); + + // Allocate the memory we need + *_row_P = (unsigned int*)malloc((N + 1) * sizeof(unsigned int)); + *_col_P = (unsigned int*)calloc(N * K, sizeof(unsigned int)); + *_val_P = (double*)calloc(N * K, sizeof(double)); + if (*_row_P == nullptr || *_col_P == nullptr || *_val_P == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + unsigned int* row_P = *_row_P; + unsigned int* col_P = *_col_P; + double* val_P = *_val_P; + double* cur_P = (double*)malloc((N - 1) * sizeof(double)); + if (cur_P == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + row_P[0] = 0; + for (int n = 0; n < N; n++) + row_P[n + 1] = row_P[n] + (unsigned int)K; + + // Build ball tree on data set + VpTree* tree = + new VpTree(); + vector obj_X(N, DataPoint(D, -1, X)); + for (int n = 0; n < N; n++) + obj_X[n] = DataPoint(D, n, X + n * D); + tree->create(obj_X); + + // Loop over all points to find nearest neighbors + fprintf(stderr, "Building tree...\n"); + vector indices; + vector distances; + for (int n = 0; n < N; n++) { + if (n % 10000 == 0) + fprintf(stderr, " - point %d of %d\n", n, N); + + // Find nearest neighbors + indices.clear(); + distances.clear(); + tree->search(obj_X[n], K + 1, &indices, &distances); + + // Initialize some variables for binary search + bool found = false; + double beta = 1.0; + double min_beta = -DBL_MAX; + double max_beta = DBL_MAX; + double tol = 1e-5; + + // Iterate until we found a good perplexity + int iter = 0; + double sum_P; + while (!found && iter < 200) { + // Compute Gaussian kernel row + for (int m = 0; m < K; m++) + cur_P[m] = exp(-beta * distances[m + 1] * distances[m + 1]); + + // Compute entropy of current row + sum_P = DBL_MIN; + for (int m = 0; m < K; m++) + sum_P += cur_P[m]; + double H = .0; + for (int m = 0; m < K; m++) + H += beta * (distances[m + 1] * distances[m + 1] * cur_P[m]); + H = (H / sum_P) + log(sum_P); + + // Evaluate whether the entropy is within the tolerance level + double Hdiff = H - log(perplexity); + if (Hdiff < tol && -Hdiff < tol) { + found = true; + } else { + if (Hdiff > 0) { + min_beta = beta; + if (max_beta == DBL_MAX || max_beta == -DBL_MAX) + beta *= 2.0; + else + beta = (beta + max_beta) / 2.0; + } else { + max_beta = beta; + if (min_beta == -DBL_MAX || min_beta == DBL_MAX) + beta /= 2.0; + else + beta = (beta + min_beta) / 2.0; } + } - // Row-normalize current row of P and store in matrix - for(unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P; - for(unsigned int m = 0; m < K; m++) { - col_P[row_P[n] + m] = (unsigned int) indices[m + 1].index(); - val_P[row_P[n] + m] = cur_P[m]; - } + // Update iteration counter + iter++; } - // Clean up memory - obj_X.clear(); - free(cur_P); - delete tree; -} + // Row-normalize current row of P and store in matrix + for (unsigned int m = 0; m < K; m++) + cur_P[m] /= sum_P; + for (unsigned int m = 0; m < K; m++) { + col_P[row_P[n] + m] = (unsigned int)indices[m + 1].index(); + val_P[row_P[n] + m] = cur_P[m]; + } + } + // Clean up memory + obj_X.clear(); + free(cur_P); + delete tree; +} // Symmetrizes a sparse matrix -void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, double** _val_P, int N) { - - // Get sparse matrix - unsigned int* row_P = *_row_P; - unsigned int* col_P = *_col_P; - double* val_P = *_val_P; - - // Count number of elements and row counts of symmetric matrix - int* row_counts = (int*) calloc(N, sizeof(int)); - if(row_counts == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int n = 0; n < N; n++) { - for(int i = row_P[n]; i < row_P[n + 1]; i++) { - - // Check whether element (col_P[i], n) is present - bool present = false; - for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { - if(col_P[m] == n) present = true; - } - if(present) row_counts[n]++; - else { - row_counts[n]++; - row_counts[col_P[i]]++; - } - } +void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, + double** _val_P, int N) { + // Get sparse matrix + unsigned int* row_P = *_row_P; + unsigned int* col_P = *_col_P; + double* val_P = *_val_P; + + // Count number of elements and row counts of symmetric matrix + int* row_counts = (int*)calloc(N, sizeof(int)); + if (row_counts == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + for (int n = 0; n < N; n++) { + for (int i = row_P[n]; i < row_P[n + 1]; i++) { + // Check whether element (col_P[i], n) is present + bool present = false; + for (int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { + if (col_P[m] == n) + present = true; + } + if (present) + row_counts[n]++; + else { + row_counts[n]++; + row_counts[col_P[i]]++; + } } - int no_elem = 0; - for(int n = 0; n < N; n++) no_elem += row_counts[n]; - - // Allocate memory for symmetrized matrix - unsigned int* sym_row_P = (unsigned int*) malloc((N + 1) * sizeof(unsigned int)); - unsigned int* sym_col_P = (unsigned int*) malloc(no_elem * sizeof(unsigned int)); - double* sym_val_P = (double*) malloc(no_elem * sizeof(double)); - if(sym_row_P == nullptr || sym_col_P == nullptr || sym_val_P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - - // Construct new row indices for symmetric matrix - sym_row_P[0] = 0; - for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + (unsigned int) row_counts[n]; - - // Fill the result matrix - int* offset = (int*) calloc(N, sizeof(int)); - if(offset == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int n = 0; n < N; n++) { - for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i]) - - // Check whether element (col_P[i], n) is present - bool present = false; - for(unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { - if(col_P[m] == n) { - present = true; - if(n <= col_P[i]) { // make sure we do not add elements twice - sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; - sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; - sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m]; - sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m]; - } - } - } - - // If (col_P[i], n) is not present, there is no addition involved - if(!present) { - sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; - sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; - sym_val_P[sym_row_P[n] + offset[n]] = val_P[i]; - sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i]; - } - - // Update offsets - if(!present || (present && n <= col_P[i])) { - offset[n]++; - if(col_P[i] != n) offset[col_P[i]]++; - } + } + int no_elem = 0; + for (int n = 0; n < N; n++) + no_elem += row_counts[n]; + + // Allocate memory for symmetrized matrix + unsigned int* sym_row_P = + (unsigned int*)malloc((N + 1) * sizeof(unsigned int)); + unsigned int* sym_col_P = + (unsigned int*)malloc(no_elem * sizeof(unsigned int)); + double* sym_val_P = (double*)malloc(no_elem * sizeof(double)); + if (sym_row_P == nullptr || sym_col_P == nullptr || sym_val_P == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + + // Construct new row indices for symmetric matrix + sym_row_P[0] = 0; + for (int n = 0; n < N; n++) + sym_row_P[n + 1] = sym_row_P[n] + (unsigned int)row_counts[n]; + + // Fill the result matrix + int* offset = (int*)calloc(N, sizeof(int)); + if (offset == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + for (int n = 0; n < N; n++) { + for (unsigned int i = row_P[n]; i < row_P[n + 1]; + i++) { // considering element(n, col_P[i]) + + // Check whether element (col_P[i], n) is present + bool present = false; + for (unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { + if (col_P[m] == n) { + present = true; + if (n <= col_P[i]) { // make sure we do not add elements twice + sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; + sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; + sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m]; + sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = + val_P[i] + val_P[m]; + } } - } + } - // Divide the result by two - for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0; + // If (col_P[i], n) is not present, there is no addition involved + if (!present) { + sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; + sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; + sym_val_P[sym_row_P[n] + offset[n]] = val_P[i]; + sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i]; + } - // Return symmetrized matrices - free(*_row_P); *_row_P = sym_row_P; - free(*_col_P); *_col_P = sym_col_P; - free(*_val_P); *_val_P = sym_val_P; + // Update offsets + if (!present || (present && n <= col_P[i])) { + offset[n]++; + if (col_P[i] != n) + offset[col_P[i]]++; + } + } + } - // Free up some memery - free(offset); offset = nullptr; - free(row_counts); row_counts = nullptr; + // Divide the result by two + for (int i = 0; i < no_elem; i++) + sym_val_P[i] /= 2.0; + + // Return symmetrized matrices + free(*_row_P); + *_row_P = sym_row_P; + free(*_col_P); + *_col_P = sym_col_P; + free(*_val_P); + *_val_P = sym_val_P; + + // Free up some memery + free(offset); + offset = nullptr; + free(row_counts); + row_counts = nullptr; } // Compute squared Euclidean distance matrix void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { - const double* XnD = X; - for(int n = 0; n < N; ++n, XnD += D) { - const double* XmD = XnD + D; - double* curr_elem = &DD[n*N + n]; - *curr_elem = 0.0; - double* curr_elem_sym = curr_elem + N; - for(int m = n + 1; m < N; ++m, XmD+=D, curr_elem_sym+=N) { - *(++curr_elem) = 0.0; - for(int d = 0; d < D; ++d) { - *curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]); - } - *curr_elem_sym = *curr_elem; - } + const double* XnD = X; + for (int n = 0; n < N; ++n, XnD += D) { + const double* XmD = XnD + D; + double* curr_elem = &DD[n * N + n]; + *curr_elem = 0.0; + double* curr_elem_sym = curr_elem + N; + for (int m = n + 1; m < N; ++m, XmD += D, curr_elem_sym += N) { + *(++curr_elem) = 0.0; + for (int d = 0; d < D; ++d) { + *curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]); + } + *curr_elem_sym = *curr_elem; } + } } - // Makes data zero-mean void zeroMean(double* X, int N, int D) { + // Compute data mean + double* mean = (double*)calloc(D, sizeof(double)); + if (mean == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + int nD = 0; + for (int n = 0; n < N; n++) { + for (int d = 0; d < D; d++) { + mean[d] += X[nD + d]; + } + nD += D; + } + for (int d = 0; d < D; d++) { + mean[d] /= (double)N; + } - // Compute data mean - double* mean = (double*) calloc(D, sizeof(double)); - if(mean == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - int nD = 0; - for(int n = 0; n < N; n++) { - for(int d = 0; d < D; d++) { - mean[d] += X[nD + d]; - } - nD += D; - } - for(int d = 0; d < D; d++) { - mean[d] /= (double) N; - } - - // Subtract data mean - nD = 0; - for(int n = 0; n < N; n++) { - for(int d = 0; d < D; d++) { - X[nD + d] -= mean[d]; - } - nD += D; - } - free(mean); mean = nullptr; + // Subtract data mean + nD = 0; + for (int n = 0; n < N; n++) { + for (int d = 0; d < D; d++) { + X[nD + d] -= mean[d]; + } + nD += D; + } + free(mean); + mean = nullptr; } - // Generates a Gaussian random number double randn() { - double x, y, radius; - do { - x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; - y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; - radius = (x * x) + (y * y); - } while((radius >= 1.0) || (radius == 0.0)); - radius = sqrt(-2 * log(radius) / radius); - x *= radius; - y *= radius; - return x; + double x, y, radius; + do { + x = 2 * (rand() / ((double)RAND_MAX + 1)) - 1; + y = 2 * (rand() / ((double)RAND_MAX + 1)) - 1; + radius = (x * x) + (y * y); + } while ((radius >= 1.0) || (radius == 0.0)); + radius = sqrt(-2 * log(radius) / radius); + x *= radius; + y *= radius; + return x; } // Perform t-SNE template -void run(double* X, int N, int D, double* Y, - double perplexity, - double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, +void run(double* X, int N, int D, double* Y, double perplexity, double theta, + int rand_seed, bool skip_random_init, double* init, bool use_init, int max_iter, int stop_lying_iter, int mom_switch_iter) { - - // Set random seed - if (skip_random_init != true) { - if(rand_seed >= 0) { - fprintf(stderr,"Using random seed: %d\n", rand_seed); - srand((unsigned int) rand_seed); - } else { - fprintf(stderr,"Using current time as random seed...\n"); - srand(time(nullptr)); - } - } - - // Determine whether we are using an exact algorithm - if(N - 1 < 3 * perplexity) { fprintf(stderr,"Perplexity too large for the number of data points!\n"); exit(1); } - fprintf(stderr,"Using no_dims = %d, perplexity = %f, and theta = %f\n", NDIMS, perplexity, theta); - bool exact = (theta == .0) ? true : false; - - fprintf(stderr,"Using max_iter = %d, stop_lying_iter = %d, mom_switch_iter = %d\n", max_iter, stop_lying_iter, mom_switch_iter); - - // Set learning parameters - float total_time = .0; - clock_t start, end; - double momentum = .5, final_momentum = .8; - double eta = 200.0; - - // Allocate some memory - double* dY = (double*) malloc(N * NDIMS * sizeof(double)); - double* uY = (double*) malloc(N * NDIMS * sizeof(double)); - double* gains = (double*) malloc(N * NDIMS * sizeof(double)); - if(dY == nullptr || uY == nullptr || gains == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - for(int i = 0; i < N * NDIMS; i++) uY[i] = .0; - for(int i = 0; i < N * NDIMS; i++) gains[i] = 1.0; - - // Normalize input data (to prevent numerical problems) - fprintf(stderr,"Computing input similarities...\n"); - start = clock(); - zeroMean(X, N, D); - double max_X = .0; - for(int i = 0; i < N * D; i++) { - if(fabs(X[i]) > max_X) max_X = fabs(X[i]); - } - for(int i = 0; i < N * D; i++) X[i] /= max_X; - - // Compute input similarities for exact t-SNE - double* P; unsigned int* row_P; unsigned int* col_P; double* val_P; - if(exact) { - - // Compute similarities - fprintf(stderr,"Exact?"); - P = (double*) malloc(N * N * sizeof(double)); - if(P == nullptr) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); } - computeGaussianPerplexity(X, N, D, P, perplexity); - - // Symmetrize input similarities - fprintf(stderr,"Symmetrizing...\n"); - int nN = 0; - for(int n = 0; n < N; n++) { - int mN = (n + 1) * N; - for(int m = n + 1; m < N; m++) { - P[nN + m] += P[mN + n]; - P[mN + n] = P[nN + m]; - mN += N; - } - nN += N; - } - double sum_P = .0; - for(int i = 0; i < N * N; i++) sum_P += P[i]; - for(int i = 0; i < N * N; i++) P[i] /= sum_P; + // Set random seed + if (skip_random_init != true) { + if (rand_seed >= 0) { + fprintf(stderr, "Using random seed: %d\n", rand_seed); + srand((unsigned int)rand_seed); + } else { + fprintf(stderr, "Using current time as random seed...\n"); + srand(time(nullptr)); } + } - // Compute input similarities for approximate t-SNE - else { - - // Compute asymmetric pairwise input similarities - computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity)); - - // Symmetrize input similarities - symmetrizeMatrix(&row_P, &col_P, &val_P, N); - double sum_P = .0; - for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; - for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; + // Determine whether we are using an exact algorithm + if (N - 1 < 3 * perplexity) { + fprintf(stderr, "Perplexity too large for the number of data points!\n"); + exit(1); + } + fprintf(stderr, "Using no_dims = %d, perplexity = %f, and theta = %f\n", + NDIMS, perplexity, theta); + bool exact = (theta == .0) ? true : false; + + fprintf(stderr, + "Using max_iter = %d, stop_lying_iter = %d, mom_switch_iter = %d\n", + max_iter, stop_lying_iter, mom_switch_iter); + + // Set learning parameters + float total_time = .0; + clock_t start, end; + double momentum = .5, final_momentum = .8; + double eta = 200.0; + + // Allocate some memory + double* dY = (double*)malloc(N * NDIMS * sizeof(double)); + double* uY = (double*)malloc(N * NDIMS * sizeof(double)); + double* gains = (double*)malloc(N * NDIMS * sizeof(double)); + if (dY == nullptr || uY == nullptr || gains == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); + } + for (int i = 0; i < N * NDIMS; i++) + uY[i] = .0; + for (int i = 0; i < N * NDIMS; i++) + gains[i] = 1.0; + + // Normalize input data (to prevent numerical problems) + fprintf(stderr, "Computing input similarities...\n"); + start = clock(); + zeroMean(X, N, D); + double max_X = .0; + for (int i = 0; i < N * D; i++) { + if (fabs(X[i]) > max_X) + max_X = fabs(X[i]); + } + for (int i = 0; i < N * D; i++) + X[i] /= max_X; + + // Compute input similarities for exact t-SNE + double* P; + unsigned int* row_P; + unsigned int* col_P; + double* val_P; + if (exact) { + // Compute similarities + fprintf(stderr, "Exact?"); + P = (double*)malloc(N * N * sizeof(double)); + if (P == nullptr) { + fprintf(stderr, "Memory allocation failed!\n"); + exit(1); } - end = clock(); - - // Lie about the P-values - if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; } - else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; } + computeGaussianPerplexity(X, N, D, P, perplexity); - // Initialize solution (randomly or with given coordinates) - if (!use_init && !skip_random_init) { - for(int i = 0; i < N * NDIMS; i++) { - Y[i] = randn() * .0001; - } - } else if (use_init) { - for(int i = 0; i < N * NDIMS; i++) { - Y[i] = init[i]; + // Symmetrize input similarities + fprintf(stderr, "Symmetrizing...\n"); + int nN = 0; + for (int n = 0; n < N; n++) { + int mN = (n + 1) * N; + for (int m = n + 1; m < N; m++) { + P[nN + m] += P[mN + n]; + P[mN + n] = P[nN + m]; + mN += N; } + nN += N; } + double sum_P = .0; + for (int i = 0; i < N * N; i++) + sum_P += P[i]; + for (int i = 0; i < N * N; i++) + P[i] /= sum_P; + } - // Perform main training loop - if(exact) fprintf(stderr,"Input similarities computed in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); - else fprintf(stderr,"Input similarities computed in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); - start = clock(); - - for(int iter = 0; iter < max_iter; iter++) { - - // Compute (approximate) gradient - if(exact) computeExactGradient(P, Y, N, NDIMS, dY); - else computeGradient(P, row_P, col_P, val_P, Y, N, dY, theta); - - // Update gains - for(int i = 0; i < N * NDIMS; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); - for(int i = 0; i < N * NDIMS; i++) if(gains[i] < .01) gains[i] = .01; - - // Perform gradient update (with momentum and gains) - for(int i = 0; i < N * NDIMS; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; - for(int i = 0; i < N * NDIMS; i++) Y[i] = Y[i] + uY[i]; + // Compute input similarities for approximate t-SNE + else { + // Compute asymmetric pairwise input similarities + computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, + (int)(3 * perplexity)); + + // Symmetrize input similarities + symmetrizeMatrix(&row_P, &col_P, &val_P, N); + double sum_P = .0; + for (int i = 0; i < row_P[N]; i++) + sum_P += val_P[i]; + for (int i = 0; i < row_P[N]; i++) + val_P[i] /= sum_P; + } + end = clock(); + + // Lie about the P-values + if (exact) { + for (int i = 0; i < N * N; i++) + P[i] *= 12.0; + } else { + for (int i = 0; i < row_P[N]; i++) + val_P[i] *= 12.0; + } - // Make solution zero-mean - zeroMean(Y, N, NDIMS); + // Initialize solution (randomly or with given coordinates) + if (!use_init && !skip_random_init) { + for (int i = 0; i < N * NDIMS; i++) { + Y[i] = randn() * .0001; + } + } else if (use_init) { + for (int i = 0; i < N * NDIMS; i++) { + Y[i] = init[i]; + } + } - // Stop lying about the P-values after a while, and switch momentum - if(iter == stop_lying_iter) { - if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; } - else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; } - } - if(iter == mom_switch_iter) momentum = final_momentum; - - // Print out progress - if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { - end = clock(); - double C = .0; - if(exact) C = evaluateError(P, Y, N, NDIMS); - else C = evaluateError(row_P, col_P, val_P, Y, N, theta); // doing approximate computation here! - if(iter == 0) - fprintf(stderr,"Iteration %d: error is %f\n", iter + 1, C); - else { - total_time += (float) (end - start) / CLOCKS_PER_SEC; - fprintf(stderr,"Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC); - } - start = clock(); - } + // Perform main training loop + if (exact) + fprintf(stderr, + "Input similarities computed in %4.2f seconds!\nLearning " + "embedding...\n", + (float)(end - start) / CLOCKS_PER_SEC); + else + fprintf(stderr, + "Input similarities computed in %4.2f seconds (sparsity = " + "%f)!\nLearning embedding...\n", + (float)(end - start) / CLOCKS_PER_SEC, + (double)row_P[N] / ((double)N * (double)N)); + start = clock(); + + for (int iter = 0; iter < max_iter; iter++) { + // Compute (approximate) gradient + if (exact) + computeExactGradient(P, Y, N, NDIMS, dY); + else + computeGradient(P, row_P, col_P, val_P, Y, N, dY, theta); + + // Update gains + for (int i = 0; i < N * NDIMS; i++) + gains[i] = + (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); + for (int i = 0; i < N * NDIMS; i++) + if (gains[i] < .01) + gains[i] = .01; + + // Perform gradient update (with momentum and gains) + for (int i = 0; i < N * NDIMS; i++) + uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; + for (int i = 0; i < N * NDIMS; i++) + Y[i] = Y[i] + uY[i]; + + // Make solution zero-mean + zeroMean(Y, N, NDIMS); + + // Stop lying about the P-values after a while, and switch momentum + if (iter == stop_lying_iter) { + if (exact) { + for (int i = 0; i < N * N; i++) + P[i] /= 12.0; + } else { + for (int i = 0; i < row_P[N]; i++) + val_P[i] /= 12.0; + } } - end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; - - // Clean up memory - free(dY); - free(uY); - free(gains); - if(exact) free(P); - else { - free(row_P); row_P = nullptr; - free(col_P); col_P = nullptr; - free(val_P); val_P = nullptr; + if (iter == mom_switch_iter) + momentum = final_momentum; + + // Print out progress + if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { + end = clock(); + double C = .0; + if (exact) + C = evaluateError(P, Y, N, NDIMS); + else + C = evaluateError(row_P, col_P, val_P, Y, N, + theta); // doing approximate computation here! + if (iter == 0) + fprintf(stderr, "Iteration %d: error is %f\n", iter + 1, C); + else { + total_time += (float)(end - start) / CLOCKS_PER_SEC; + fprintf(stderr, + "Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", + iter, C, (float)(end - start) / CLOCKS_PER_SEC); + } + start = clock(); } - fprintf(stderr,"Fitting performed in %4.2f seconds.\n", total_time); + } + end = clock(); + total_time += (float)(end - start) / CLOCKS_PER_SEC; + + // Clean up memory + free(dY); + free(uY); + free(gains); + if (exact) + free(P); + else { + free(row_P); + row_P = nullptr; + free(col_P); + col_P = nullptr; + free(val_P); + val_P = nullptr; + } + fprintf(stderr, "Fitting performed in %4.2f seconds.\n", total_time); } } // namespace extern "C" { -void DLL_PUBLIC run( - double* X, int N, int D, double* Y, - int no_dims, double perplexity, - double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, - int max_iter, int stop_lying_iter, int mom_switch_iter) { +void DLL_PUBLIC run(double* X, int N, int D, double* Y, int no_dims, + double perplexity, double theta, int rand_seed, + bool skip_random_init, double* init, bool use_init, + int max_iter, int stop_lying_iter, int mom_switch_iter) { assert(no_dims == 2 || no_dims == 3); switch (no_dims) { case 2: - run<2>(X, N, D, Y, - perplexity, - theta, rand_seed, - skip_random_init, init, use_init, - max_iter, stop_lying_iter, mom_switch_iter); + run<2>(X, N, D, Y, perplexity, theta, rand_seed, skip_random_init, init, + use_init, max_iter, stop_lying_iter, mom_switch_iter); break; case 3: - run<3>(X, N, D, Y, - perplexity, - theta, rand_seed, - skip_random_init, init, use_init, - max_iter, stop_lying_iter, mom_switch_iter); + run<3>(X, N, D, Y, perplexity, theta, rand_seed, skip_random_init, init, + use_init, max_iter, stop_lying_iter, mom_switch_iter); break; default: throw "invalid dimension"; diff --git a/tsne/bh_sne_src/tsne.h b/tsne/bh_sne_src/tsne.h index ceffd1b..6ec8e05 100644 --- a/tsne/bh_sne_src/tsne.h +++ b/tsne/bh_sne_src/tsne.h @@ -12,7 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. + * This product includes software developed by the Delft University of + * Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. @@ -20,42 +21,40 @@ * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ - #ifndef TSNE_H #define TSNE_H #if defined _WIN32 || defined __CYGWIN__ - #ifdef __GNUC__ - #define DLL_PUBLIC __attribute__ ((dllexport)) - #else - #define DLL_PUBLIC __declspec(dllexport) - #endif +#ifdef __GNUC__ +#define DLL_PUBLIC __attribute__((dllexport)) +#else +#define DLL_PUBLIC __declspec(dllexport) +#endif #else - #if __GNUC__ >= 4 - #define DLL_PUBLIC __attribute__ ((visibility ("default"))) - #else - #define DLL_PUBLIC - #endif +#if __GNUC__ >= 4 +#define DLL_PUBLIC __attribute__((visibility("default"))) +#else +#define DLL_PUBLIC +#endif #endif extern "C" { -void DLL_PUBLIC run( - double* X, int N, int D, - double* Y, int no_dims, - double perplexity, double theta, int rand_seed, - bool skip_random_init, double *init, bool use_init, - int max_iter=1000, int stop_lying_iter=250, int mom_switch_iter=250); +void DLL_PUBLIC run(double* X, int N, int D, double* Y, int no_dims, + double perplexity, double theta, int rand_seed, + bool skip_random_init, double* init, bool use_init, + int max_iter = 1000, int stop_lying_iter = 250, + int mom_switch_iter = 250); } #endif diff --git a/tsne/bh_sne_src/vptree.h b/tsne/bh_sne_src/vptree.h index 50bb6ad..302a2fa 100644 --- a/tsne/bh_sne_src/vptree.h +++ b/tsne/bh_sne_src/vptree.h @@ -12,26 +12,27 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the Delft University of Technology. - * 4. Neither the name of the Delft University of Technology nor the names of - * its contributors may be used to endorse or promote products derived from + * This product includes software developed by the Delft University of + * Technology. + * 4. Neither the name of the Delft University of Technology nor the names of + * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ - -/* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */ +/* This code was adopted with minor modifications from Steve Hanov's great + * tutorial at http://stevehanov.ca/blog/index.php?id=130 */ #ifndef VPTREE_H #define VPTREE_H @@ -46,229 +47,255 @@ #pragma GCC visibility push(hidden) -class DataPoint -{ - int _ind; - -public: - double* _x; - int _D; - DataPoint() { - _D = 1; - _ind = -1; - _x = NULL; +class DataPoint { + int _ind; + + public: + double* _x; + int _D; + DataPoint() { + _D = 1; + _ind = -1; + _x = NULL; + } + DataPoint(int D, int ind, double* x) { + _D = D; + _ind = ind; + _x = (double*)malloc(_D * sizeof(double)); + for (int d = 0; d < _D; d++) + _x[d] = x[d]; + } + DataPoint(const DataPoint& other) { // this makes a deep copy -- should not + // free anything + if (this != &other) { + _D = other.dimensionality(); + _ind = other.index(); + _x = (double*)malloc(_D * sizeof(double)); + for (int d = 0; d < _D; d++) + _x[d] = other.x(d); } - DataPoint(int D, int ind, double* x) { - _D = D; - _ind = ind; - _x = (double*) malloc(_D * sizeof(double)); - for(int d = 0; d < _D; d++) _x[d] = x[d]; + } + ~DataPoint() { + if (_x != NULL) + free(_x); + } + DataPoint& operator=(const DataPoint& other) { // asignment should free old + // object + if (this != &other) { + if (_x != NULL) + free(_x); + _D = other.dimensionality(); + _ind = other.index(); + _x = (double*)malloc(_D * sizeof(double)); + for (int d = 0; d < _D; d++) + _x[d] = other.x(d); } - DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything - if(this != &other) { - _D = other.dimensionality(); - _ind = other.index(); - _x = (double*) malloc(_D * sizeof(double)); - for(int d = 0; d < _D; d++) _x[d] = other.x(d); - } + return *this; + } + int index() const { + return _ind; + } + int dimensionality() const { + return _D; + } + double x(int d) const { + return _x[d]; + } +}; + +double euclidean_distance(const DataPoint& t1, const DataPoint& t2) { + double dd = .0; + double* x1 = t1._x; + double* x2 = t2._x; + double diff; + for (int d = 0; d < t1._D; d++) { + diff = (x1[d] - x2[d]); + dd += diff * diff; + } + return sqrt(dd); +} + +template +class VpTree { + public: + // Default constructor + VpTree() : _root(0) { + } + + // Destructor + ~VpTree() { + delete _root; + } + + // Function to create a new VpTree from data + void create(const std::vector& items) { + delete _root; + _items = items; + _root = buildFromPoints(0, items.size()); + } + + // Function that uses the tree to find the k nearest neighbors of target + void search(const T& target, int k, std::vector* results, + std::vector* distances) { + // Use a priority queue to store intermediate results on + std::priority_queue heap; + + // Variable that tracks the distance to the farthest point in our results + _tau = DBL_MAX; + + // Perform the search + search(_root, target, k, heap); + + // Gather final results + results->clear(); + distances->clear(); + while (!heap.empty()) { + results->push_back(_items[heap.top().index]); + distances->push_back(heap.top().dist); + heap.pop(); } - ~DataPoint() { if(_x != NULL) free(_x); } - DataPoint& operator= (const DataPoint& other) { // asignment should free old object - if(this != &other) { - if(_x != NULL) free(_x); - _D = other.dimensionality(); - _ind = other.index(); - _x = (double*) malloc(_D * sizeof(double)); - for(int d = 0; d < _D; d++) _x[d] = other.x(d); - } - return *this; + + // Results are in reverse order + std::reverse(results->begin(), results->end()); + std::reverse(distances->begin(), distances->end()); + } + + private: + std::vector _items; + double _tau; + + // Single node of a VP tree (has a point and radius; left children are closer + // to point than the radius) + struct Node { + int index; // index of point in node + double threshold; // radius(?) + Node* left; // points closer by than threshold + Node* right; // points farther away than threshold + + Node() : index(0), threshold(0.), left(0), right(0) { } - int index() const { return _ind; } - int dimensionality() const { return _D; } - double x(int d) const { return _x[d]; } -}; -double euclidean_distance(const DataPoint &t1, const DataPoint &t2) { - double dd = .0; - double* x1 = t1._x; - double* x2 = t2._x; - double diff; - for(int d = 0; d < t1._D; d++) { - diff = (x1[d] - x2[d]); - dd += diff * diff; + ~Node() { // destructor + delete left; + delete right; } - return sqrt(dd); -} + } * _root; + + // An item on the intermediate result queue + struct HeapItem { + HeapItem(int index, double dist) : index(index), dist(dist) { + } + int index; + double dist; + bool operator<(const HeapItem& o) const { + return dist < o.dist; + } + }; + // Distance comparator for use in std::nth_element + struct DistanceComparator { + const T& item; + DistanceComparator(const T& item) : item(item) { + } + bool operator()(const T& a, const T& b) { + return distance(item, a) < distance(item, b); + } + }; -template -class VpTree -{ -public: - - // Default constructor - VpTree() : _root(0) {} - - // Destructor - ~VpTree() { - delete _root; + // Function that (recursively) fills the tree + Node* buildFromPoints(int lower, int upper) { + if (upper == lower) { // indicates that we're done here! + return NULL; } - // Function to create a new VpTree from data - void create(const std::vector& items) { - delete _root; - _items = items; - _root = buildFromPoints(0, items.size()); + // Lower index is center of current node + Node* node = new Node(); + node->index = lower; + + if (upper - lower > 1) { // if we did not arrive at leaf yet + + // Choose an arbitrary point and move it to the start + int i = (int)((double)rand() / RAND_MAX * (upper - lower - 1)) + lower; + std::swap(_items[lower], _items[i]); + + // Partition around the median distance + int median = (upper + lower) / 2; + std::nth_element(_items.begin() + lower + 1, _items.begin() + median, + _items.begin() + upper, + DistanceComparator(_items[lower])); + + // Threshold of the new node will be the distance to the median + node->threshold = distance(_items[lower], _items[median]); + + // Recursively build tree + node->index = lower; + node->left = buildFromPoints(lower + 1, median); + node->right = buildFromPoints(median, upper); } - - // Function that uses the tree to find the k nearest neighbors of target - void search(const T& target, int k, std::vector* results, std::vector* distances) - { - - // Use a priority queue to store intermediate results on - std::priority_queue heap; - - // Variable that tracks the distance to the farthest point in our results - _tau = DBL_MAX; - - // Perform the search - search(_root, target, k, heap); - - // Gather final results - results->clear(); distances->clear(); - while(!heap.empty()) { - results->push_back(_items[heap.top().index]); - distances->push_back(heap.top().dist); - heap.pop(); - } - - // Results are in reverse order - std::reverse(results->begin(), results->end()); - std::reverse(distances->begin(), distances->end()); + + // Return result + return node; + } + + // Helper function that searches the tree + void search(Node* node, const T& target, int k, + std::priority_queue& heap) { + if (node == NULL) + return; // indicates that we're done here + + // Compute distance between target and current node + double dist = distance(_items[node->index], target); + + // If current node within radius tau + if (dist < _tau) { + if (heap.size() == k) { + // remove furthest node from result list (if we already + // have k results) + heap.pop(); + } + // add current node to result list + heap.push(HeapItem(node->index, dist)); + if (heap.size() == k) { + // update value of tau (farthest point in result list) + _tau = heap.top().dist; + } } - -private: - std::vector _items; - double _tau; - - // Single node of a VP tree (has a point and radius; left children are closer to point than the radius) - struct Node - { - int index; // index of point in node - double threshold; // radius(?) - Node* left; // points closer by than threshold - Node* right; // points farther away than threshold - - Node() : - index(0), threshold(0.), left(0), right(0) {} - - ~Node() { // destructor - delete left; - delete right; - } - }* _root; - - - // An item on the intermediate result queue - struct HeapItem { - HeapItem( int index, double dist) : - index(index), dist(dist) {} - int index; - double dist; - bool operator<(const HeapItem& o) const { - return dist < o.dist; - } - }; - - // Distance comparator for use in std::nth_element - struct DistanceComparator - { - const T& item; - DistanceComparator(const T& item) : item(item) {} - bool operator()(const T& a, const T& b) { - return distance(item, a) < distance(item, b); - } - }; - - // Function that (recursively) fills the tree - Node* buildFromPoints( int lower, int upper ) - { - if (upper == lower) { // indicates that we're done here! - return NULL; - } - - // Lower index is center of current node - Node* node = new Node(); - node->index = lower; - - if (upper - lower > 1) { // if we did not arrive at leaf yet - - // Choose an arbitrary point and move it to the start - int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower; - std::swap(_items[lower], _items[i]); - - // Partition around the median distance - int median = (upper + lower) / 2; - std::nth_element(_items.begin() + lower + 1, - _items.begin() + median, - _items.begin() + upper, - DistanceComparator(_items[lower])); - - // Threshold of the new node will be the distance to the median - node->threshold = distance(_items[lower], _items[median]); - - // Recursively build tree - node->index = lower; - node->left = buildFromPoints(lower + 1, median); - node->right = buildFromPoints(median, upper); - } - - // Return result - return node; + + // Return if we arrived at a leaf + if (node->left == NULL && node->right == NULL) { + return; } - - // Helper function that searches the tree - void search(Node* node, const T& target, int k, std::priority_queue& heap) - { - if(node == NULL) return; // indicates that we're done here - - // Compute distance between target and current node - double dist = distance(_items[node->index], target); - - // If current node within radius tau - if(dist < _tau) { - if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results) - heap.push(HeapItem(node->index, dist)); // add current node to result list - if(heap.size() == k) _tau = heap.top().dist; // update value of tau (farthest point in result list) - } - - // Return if we arrived at a leaf - if(node->left == NULL && node->right == NULL) { - return; - } - - // If the target lies within the radius of ball - if(dist < node->threshold) { - if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first - search(node->left, target, k, heap); - } - - if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child - search(node->right, target, k, heap); - } - - // If the target lies outsize the radius of the ball - } else { - if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first - search(node->right, target, k, heap); - } - - if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child - search(node->left, target, k, heap); - } - } + + // If the target lies within the radius of ball + if (dist < node->threshold) { + if (dist - _tau <= node->threshold) { + // if there can still be neighbors inside the + // ball, recursively search left child first + search(node->left, target, k, heap); + } + + if (dist + _tau >= node->threshold) { + // if there can still be neighbors outside the + // ball, recursively search right child + search(node->right, target, k, heap); + } + + // If the target lies outsize the radius of the ball + } else { + if (dist + _tau >= node->threshold) { + // if there can still be neighbors outside the + // ball, recursively search right child first + search(node->right, target, k, heap); + } + + if (dist - _tau <= node->threshold) { + // if there can still be neighbors inside the + // ball, recursively search left child + search(node->left, target, k, heap); + } } + } }; - + #pragma GCC visibility pop #endif // !defined(VPTREE_H) From 9be2fb5077c5cf2bc3add80d109f6c66ceefa844 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Tue, 26 Feb 2019 12:58:36 -0800 Subject: [PATCH 06/17] Replace uses of malloc/free with RAII types. Also flow down templating to a few more places, and use arrays instead of vectors where possible. --- tsne/bh_sne_src/main.cpp | 55 ++---- tsne/bh_sne_src/sptree.cpp | 33 ++-- tsne/bh_sne_src/sptree.h | 4 +- tsne/bh_sne_src/tsne.cpp | 333 ++++++++++++++++--------------------- tsne/bh_sne_src/vptree.h | 46 +++-- 5 files changed, 194 insertions(+), 277 deletions(-) diff --git a/tsne/bh_sne_src/main.cpp b/tsne/bh_sne_src/main.cpp index 5951436..6fe7ec5 100644 --- a/tsne/bh_sne_src/main.cpp +++ b/tsne/bh_sne_src/main.cpp @@ -32,16 +32,15 @@ */ #include -#include #include +#include #include "tsne.h" using namespace std; // Function that loads data from a t-SNE file -// Note: this function does a malloc that should be freed elsewhere -bool load_data(const char* dat_file, double** data, int* n, int* d, +bool load_data(const char* dat_file, vector* data, int* n, int* d, int* no_dims, double* theta, double* perplexity, int* rand_seed, int* max_iter) { // Open file, read first 2 integers, allocate memory, and read the data @@ -56,12 +55,8 @@ bool load_data(const char* dat_file, double** data, int* n, int* d, fread(perplexity, sizeof(double), 1, h); // perplexity fread(no_dims, sizeof(int), 1, h); // output dimensionality fread(max_iter, sizeof(int), 1, h); // maximum number of iterations - *data = (double*)malloc(*d * *n * sizeof(double)); - if (*data == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - fread(*data, sizeof(double), *n * *d, h); // the data + data->resize(*d * *n); + fread(data->data(), sizeof(double), *n * *d, h); // the data if (!feof(h)) fread(rand_seed, sizeof(int), 1, h); // random seed fclose(h); @@ -70,8 +65,9 @@ bool load_data(const char* dat_file, double** data, int* n, int* d, } // Function that saves map to a t-SNE file -void save_data(const char* res_file, double* data, int* landmarks, - double* costs, int n, int d) { +void save_data(const char* res_file, const vector& data, + const vector& landmarks, const vector& costs, int n, + int d) { // Open file, write first 2 integers and then the data FILE* h; if ((h = fopen(res_file, "w+b")) == nullptr) { @@ -80,9 +76,9 @@ void save_data(const char* res_file, double* data, int* landmarks, } fwrite(&n, sizeof(int), 1, h); fwrite(&d, sizeof(int), 1, h); - fwrite(data, sizeof(double), n * d, h); - fwrite(landmarks, sizeof(int), n, h); - fwrite(costs, sizeof(double), n, h); + fwrite(data.data(), sizeof(double), data.size(), h); + fwrite(landmarks.data(), sizeof(int), landmarks.size(), h); + fwrite(costs.data(), sizeof(double), costs.size(), h); fclose(h); fprintf(stderr, "Wrote the %i x %i data matrix successfully!\n", n, d); } @@ -121,7 +117,8 @@ int main(int argc, char* argv[]) { // Define some variables int origN, N, D, no_dims, max_iter; - double perplexity, theta, *data; + double perplexity, theta; + vector data; int rand_seed = -1; // Read the parameters and the dataset @@ -129,35 +126,17 @@ int main(int argc, char* argv[]) { &rand_seed, &max_iter)) { // Make dummy landmarks N = origN; - int* landmarks = (int*)malloc(N * sizeof(int)); - if (landmarks == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + vector landmarks(N); for (int n = 0; n < N; n++) landmarks[n] = n; // Now fire up the SNE implementation - double* Y = (double*)malloc(N * no_dims * sizeof(double)); - double* costs = (double*)calloc(N, sizeof(double)); - if (Y == nullptr || costs == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - run(data, N, D, Y, no_dims, perplexity, theta, rand_seed, false, nullptr, - false, max_iter); + vector Y(N * no_dims); + vector costs(N); + run(data.data(), N, D, Y.data(), no_dims, perplexity, theta, rand_seed, + false, nullptr, false, max_iter); // Save the results save_data(res_file_c, Y, landmarks, costs, N, no_dims); - - // Clean up the memory - free(data); - data = nullptr; - free(Y); - Y = nullptr; - free(costs); - costs = nullptr; - free(landmarks); - landmarks = nullptr; } } diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 070278e..1cd8ad9 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -35,11 +35,14 @@ #include #include #include +#include #include "sptree.h" #pragma GCC visibility push(hidden) +using std::array; + // Constructs cell template Cell::Cell() { @@ -95,14 +98,12 @@ template SPTree::SPTree(double* inp_data, unsigned int N) { // Compute mean, width, and height of current map (boundaries of SPTree) int nD = 0; - double* mean_Y = (double*)calloc(NDims, sizeof(double)); - double* min_Y = (double*)malloc(NDims * sizeof(double)); - double* max_Y = (double*)malloc(NDims * sizeof(double)); - - for (unsigned int d = 0; d < NDims; d++) { - min_Y[d] = DBL_MAX; - max_Y[d] = -DBL_MAX; - } + array mean_Y; + mean_Y.fill(0.0); + array min_Y; + array max_Y; + min_Y.fill(DBL_MAX); + max_Y.fill(-DBL_MAX); for (unsigned int n = 0; n < N; n++) { for (unsigned int d = 0; d < NDims; d++) { @@ -119,17 +120,11 @@ SPTree::SPTree(double* inp_data, unsigned int N) { mean_Y[d] /= (double)N; // Construct SPTree - double* width = (double*)malloc(NDims * sizeof(double)); + array width; for (int d = 0; d < NDims; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; - init(NULL, inp_data, mean_Y, width); + init(NULL, inp_data, mean_Y.data(), width.data()); fill(N); - - // Clean up memory - free(mean_Y); - free(max_Y); - free(min_Y); - free(width); } // Constructor for SPTree with particular size and parent -- build the tree, @@ -391,8 +386,10 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, // Computes edge forces template -void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, - double* val_P, int N, double* pos_f) { +void SPTree::computeEdgeForces(const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, int N, + double* pos_f) { // Loop over all edges in the graph unsigned int ind1 = 0; unsigned int ind2 = 0; diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 9a9deae..8bd9dea 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -106,8 +106,8 @@ class SPTree { unsigned int getDepth(); void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q); - void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, - double* val_P, int N, double* pos_f); + void computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, + const double* val_P, int N, double* pos_f); void print(); private: diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index 66e064d..d31c63f 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "sptree.h" @@ -46,7 +47,13 @@ #pragma GCC visibility push(hidden) +using std::abs; using std::array; +using std::exp; +using std::fprintf; +using std::log; +using std::move; +using std::sqrt; using std::vector; namespace { @@ -57,24 +64,34 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, int max_iter, int stop_lying_iter, int mom_switch_iter); template -void computeGradient(double* P, unsigned int* inp_row_P, - unsigned int* inp_col_P, double* inp_val_P, double* Y, - int N, double* dC, double theta); -void computeExactGradient(double* P, double* Y, int N, int D, double* dC); -double evaluateError(double* P, double* Y, int N, int D); +void computeGradient(const vector& inp_row_P, + const vector& inp_col_P, + const vector& inp_val_P, double* Y, int N, + vector& dC, double theta); +template +void computeExactGradient(const vector& P, double* Y, int N, + double* dC); +template +double evaluateError(const vector& P, double* Y, int N); template -double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, - double* Y, int N, double theta); +double evaluateError(const vector& row_P, + const vector& col_P, + const vector& val_P, double* Y, int N, + double theta); void zeroMean(double* X, int N, int D); +template +void zeroMean(double* X, int N); void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); -void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, - unsigned int** _col_P, double** _val_P, - double perplexity, int K); +void computeGaussianPerplexity(double* X, int N, int D, + vector* _row_P, + vector* _col_P, + vector* _val_P, double perplexity, + int K); void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); double randn(); -void symmetrizeMatrix(unsigned int** row_P, unsigned int** col_P, - double** val_P, int N); +void symmetrizeMatrix(vector* row_P, vector* col_P, + vector* val_P, int N); static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); @@ -82,53 +99,41 @@ static inline double sign(double x) { // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) template -void computeGradient(double* P, unsigned int* inp_row_P, - unsigned int* inp_col_P, double* inp_val_P, double* Y, - int N, double* dC, double theta) { +void computeGradient(const vector& inp_row_P, + const vector& inp_col_P, + const vector& inp_val_P, double* Y, int N, + vector& dC, double theta) { // Construct space-partitioning tree on current map - SPTree* tree = new SPTree(Y, N); + SPTree tree(Y, N); // Compute all terms required for t-SNE gradient double sum_Q = .0; - double* pos_f = (double*)calloc(N * NDIMS, sizeof(double)); - double* neg_f = (double*)calloc(N * NDIMS, sizeof(double)); - if (pos_f == nullptr || neg_f == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); + vector pos_f(N * NDIMS); + vector neg_f(N * NDIMS); + tree.computeEdgeForces(inp_row_P.data(), inp_col_P.data(), inp_val_P.data(), + N, pos_f.data()); for (int n = 0; n < N; n++) - tree->computeNonEdgeForces(n, theta, neg_f + n * NDIMS, &sum_Q); + tree.computeNonEdgeForces(n, theta, neg_f.data() + n * NDIMS, &sum_Q); // Compute final t-SNE gradient for (int i = 0; i < N * NDIMS; i++) { dC[i] = pos_f[i] - (neg_f[i] / sum_Q); } - free(pos_f); - free(neg_f); - delete tree; } // Compute gradient of the t-SNE cost function (exact) -void computeExactGradient(double* P, double* Y, int N, int D, double* dC) { +template +void computeExactGradient(const vector& P, double* Y, int N, + vector& dC) { // Make sure the current gradient contains zeros - for (int i = 0; i < N * D; i++) - dC[i] = 0.0; + dC.assign(N * D, 0.0); // Compute the squared Euclidean distance matrix - double* DD = (double*)malloc(N * N * sizeof(double)); - if (DD == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - computeSquaredEuclideanDistance(Y, N, D, DD); + vector DD(N * N); + computeSquaredEuclideanDistance(Y, N, D, DD.data()); // Compute Q-matrix and normalization sum - double* Q = (double*)malloc(N * N * sizeof(double)); - if (Q == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + vector Q(N * N); double sum_Q = .0; int nN = 0; for (int n = 0; n < N; n++) { @@ -158,24 +163,15 @@ void computeExactGradient(double* P, double* Y, int N, int D, double* dC) { nN += N; nD += D; } - - // Free memory - free(DD); - DD = nullptr; - free(Q); - Q = nullptr; } // Evaluate t-SNE cost function (exactly) -double evaluateError(double* P, double* Y, int N, int D) { +template +double evaluateError(const vector& P, double* Y, int N) { // Compute the squared Euclidean distance matrix - double* DD = (double*)malloc(N * N * sizeof(double)); - double* Q = (double*)malloc(N * N * sizeof(double)); - if (DD == nullptr || Q == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - computeSquaredEuclideanDistance(Y, N, D, DD); + vector DD(N * N); + vector Q(N * N); + computeSquaredEuclideanDistance(Y, N, D, DD.data()); // Compute Q-matrix and normalization sum int nN = 0; @@ -198,17 +194,15 @@ double evaluateError(double* P, double* Y, int N, int D) { for (int n = 0; n < N * N; n++) { C += P[n] * log((P[n] + FLT_MIN) / (Q[n] + FLT_MIN)); } - - // Clean up memory - free(DD); - free(Q); return C; } // Evaluate t-SNE cost function (approximately) template -double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, - double* Y, int N, double theta) { +double evaluateError(const vector& row_P, + const vector& col_P, + const vector& val_P, double* Y, int N, + double theta) { // Get estimate of normalization term array buff; buff.fill(0); @@ -246,12 +240,8 @@ double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { // Compute the squared Euclidean distance matrix - double* DD = (double*)malloc(N * N * sizeof(double)); - if (DD == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - computeSquaredEuclideanDistance(X, N, D, DD); + vector DD(N * N); + computeSquaredEuclideanDistance(X, N, D, DD.data()); // Compute the Gaussian kernel row by row int nN = 0; @@ -310,47 +300,35 @@ void computeGaussianPerplexity(double* X, int N, int D, double* P, P[nN + m] /= sum_P; nN += N; } - - // Clean up memory - free(DD); - DD = nullptr; } -// Compute input similarities with a fixed perplexity using ball trees (this -// function allocates memory another function should free) -void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, - unsigned int** _col_P, double** _val_P, - double perplexity, int K) { +// Compute input similarities with a fixed perplexity using ball trees. +void computeGaussianPerplexity(double* X, int N, int D, + vector* _row_P, + vector* _col_P, + vector* _val_P, double perplexity, + int K) { if (perplexity > K) fprintf(stderr, "Perplexity should be lower than K!\n"); // Allocate the memory we need - *_row_P = (unsigned int*)malloc((N + 1) * sizeof(unsigned int)); - *_col_P = (unsigned int*)calloc(N * K, sizeof(unsigned int)); - *_val_P = (double*)calloc(N * K, sizeof(double)); - if (*_row_P == nullptr || *_col_P == nullptr || *_val_P == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - unsigned int* row_P = *_row_P; - unsigned int* col_P = *_col_P; - double* val_P = *_val_P; - double* cur_P = (double*)malloc((N - 1) * sizeof(double)); - if (cur_P == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + _row_P->resize(N + 1); + _col_P->resize(N * K); + _val_P->resize(N * K); + vector& row_P = *_row_P; + vector& col_P = *_col_P; + vector& val_P = *_val_P; + vector cur_P(N - 1); row_P[0] = 0; for (int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + (unsigned int)K; // Build ball tree on data set - VpTree* tree = - new VpTree(); + VpTree tree; vector obj_X(N, DataPoint(D, -1, X)); for (int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D); - tree->create(obj_X); + tree.create(obj_X); // Loop over all points to find nearest neighbors fprintf(stderr, "Building tree...\n"); @@ -363,7 +341,7 @@ void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, // Find nearest neighbors indices.clear(); distances.clear(); - tree->search(obj_X[n], K + 1, &indices, &distances); + tree.search(obj_X[n], K + 1, &indices, &distances); // Initialize some variables for binary search bool found = false; @@ -421,27 +399,19 @@ void computeGaussianPerplexity(double* X, int N, int D, unsigned int** _row_P, val_P[row_P[n] + m] = cur_P[m]; } } - - // Clean up memory - obj_X.clear(); - free(cur_P); - delete tree; } // Symmetrizes a sparse matrix -void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, - double** _val_P, int N) { +void symmetrizeMatrix(vector* _row_P, + vector* _col_P, vector* _val_P, + int N) { // Get sparse matrix - unsigned int* row_P = *_row_P; - unsigned int* col_P = *_col_P; - double* val_P = *_val_P; + vector& row_P = *_row_P; + vector& col_P = *_col_P; + vector& val_P = *_val_P; // Count number of elements and row counts of symmetric matrix - int* row_counts = (int*)calloc(N, sizeof(int)); - if (row_counts == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + vector row_counts(N); for (int n = 0; n < N; n++) { for (int i = row_P[n]; i < row_P[n + 1]; i++) { // Check whether element (col_P[i], n) is present @@ -463,15 +433,9 @@ void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, no_elem += row_counts[n]; // Allocate memory for symmetrized matrix - unsigned int* sym_row_P = - (unsigned int*)malloc((N + 1) * sizeof(unsigned int)); - unsigned int* sym_col_P = - (unsigned int*)malloc(no_elem * sizeof(unsigned int)); - double* sym_val_P = (double*)malloc(no_elem * sizeof(double)); - if (sym_row_P == nullptr || sym_col_P == nullptr || sym_val_P == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + vector sym_row_P(N + 1); + vector sym_col_P(no_elem); + vector sym_val_P(no_elem); // Construct new row indices for symmetric matrix sym_row_P[0] = 0; @@ -479,14 +443,10 @@ void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, sym_row_P[n + 1] = sym_row_P[n] + (unsigned int)row_counts[n]; // Fill the result matrix - int* offset = (int*)calloc(N, sizeof(int)); - if (offset == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } + vector offset(N); for (int n = 0; n < N; n++) { - for (unsigned int i = row_P[n]; i < row_P[n + 1]; - i++) { // considering element(n, col_P[i]) + for (unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { + // considering element(n, col_P[i]) // Check whether element (col_P[i], n) is present bool present = false; @@ -525,18 +485,9 @@ void symmetrizeMatrix(unsigned int** _row_P, unsigned int** _col_P, sym_val_P[i] /= 2.0; // Return symmetrized matrices - free(*_row_P); - *_row_P = sym_row_P; - free(*_col_P); - *_col_P = sym_col_P; - free(*_val_P); - *_val_P = sym_val_P; - - // Free up some memery - free(offset); - offset = nullptr; - free(row_counts); - row_counts = nullptr; + *_row_P = move(sym_row_P); + *_col_P = move(sym_col_P); + *_val_P = move(sym_val_P); } // Compute squared Euclidean distance matrix @@ -558,13 +509,36 @@ void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { } // Makes data zero-mean -void zeroMean(double* X, int N, int D) { +template +void zeroMean(double* X, int N) { // Compute data mean - double* mean = (double*)calloc(D, sizeof(double)); - if (mean == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); + array mean; + mean.fill(0); + int nD = 0; + for (int n = 0; n < N; n++) { + for (int d = 0; d < D; d++) { + mean[d] += X[nD + d]; + } + nD += D; + } + for (int d = 0; d < D; d++) { + mean[d] /= (double)N; + } + + // Subtract data mean + nD = 0; + for (int n = 0; n < N; n++) { + for (int d = 0; d < D; d++) { + X[nD + d] -= mean[d]; + } + nD += D; } +} + +// Makes data zero-mean +void zeroMean(double* X, int N, int D) { + // Compute data mean + vector mean(D); int nD = 0; for (int n = 0; n < N; n++) { for (int d = 0; d < D; d++) { @@ -584,8 +558,6 @@ void zeroMean(double* X, int N, int D) { } nD += D; } - free(mean); - mean = nullptr; } // Generates a Gaussian random number @@ -638,17 +610,9 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, double eta = 200.0; // Allocate some memory - double* dY = (double*)malloc(N * NDIMS * sizeof(double)); - double* uY = (double*)malloc(N * NDIMS * sizeof(double)); - double* gains = (double*)malloc(N * NDIMS * sizeof(double)); - if (dY == nullptr || uY == nullptr || gains == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - for (int i = 0; i < N * NDIMS; i++) - uY[i] = .0; - for (int i = 0; i < N * NDIMS; i++) - gains[i] = 1.0; + vector dY(N * NDIMS); + vector uY(N * NDIMS); + vector gains(N * NDIMS, 1.0); // Normalize input data (to prevent numerical problems) fprintf(stderr, "Computing input similarities...\n"); @@ -656,26 +620,23 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, zeroMean(X, N, D); double max_X = .0; for (int i = 0; i < N * D; i++) { - if (fabs(X[i]) > max_X) - max_X = fabs(X[i]); + auto ax = abs(X[i]); + if (ax > max_X) + max_X = ax; } for (int i = 0; i < N * D; i++) X[i] /= max_X; // Compute input similarities for exact t-SNE - double* P; - unsigned int* row_P; - unsigned int* col_P; - double* val_P; + vector P; + vector row_P; + vector col_P; + vector val_P; if (exact) { // Compute similarities fprintf(stderr, "Exact?"); - P = (double*)malloc(N * N * sizeof(double)); - if (P == nullptr) { - fprintf(stderr, "Memory allocation failed!\n"); - exit(1); - } - computeGaussianPerplexity(X, N, D, P, perplexity); + P.resize(N * N); + computeGaussianPerplexity(X, N, D, P.data(), perplexity); // Symmetrize input similarities fprintf(stderr, "Symmetrizing...\n"); @@ -694,10 +655,9 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, sum_P += P[i]; for (int i = 0; i < N * N; i++) P[i] /= sum_P; - } + } else { + // Compute input similarities for approximate t-SNE - // Compute input similarities for approximate t-SNE - else { // Compute asymmetric pairwise input similarities computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int)(3 * perplexity)); @@ -749,9 +709,9 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, for (int iter = 0; iter < max_iter; iter++) { // Compute (approximate) gradient if (exact) - computeExactGradient(P, Y, N, NDIMS, dY); + computeExactGradient(P, Y, N, dY); else - computeGradient(P, row_P, col_P, val_P, Y, N, dY, theta); + computeGradient(row_P, col_P, val_P, Y, N, dY, theta); // Update gains for (int i = 0; i < N * NDIMS; i++) @@ -768,7 +728,7 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, Y[i] = Y[i] + uY[i]; // Make solution zero-mean - zeroMean(Y, N, NDIMS); + zeroMean(Y, N); // Stop lying about the P-values after a while, and switch momentum if (iter == stop_lying_iter) { @@ -787,11 +747,12 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, if (iter > 0 && (iter % 50 == 0 || iter == max_iter - 1)) { end = clock(); double C = .0; - if (exact) - C = evaluateError(P, Y, N, NDIMS); - else - C = evaluateError(row_P, col_P, val_P, Y, N, - theta); // doing approximate computation here! + if (exact) { + C = evaluateError(P, Y, N); + } else { + // doing approximate computation here! + C = evaluateError(row_P, col_P, val_P, Y, N, theta); + } if (iter == 0) fprintf(stderr, "Iteration %d: error is %f\n", iter + 1, C); else { @@ -806,20 +767,6 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, end = clock(); total_time += (float)(end - start) / CLOCKS_PER_SEC; - // Clean up memory - free(dY); - free(uY); - free(gains); - if (exact) - free(P); - else { - free(row_P); - row_P = nullptr; - free(col_P); - col_P = nullptr; - free(val_P); - val_P = nullptr; - } fprintf(stderr, "Fitting performed in %4.2f seconds.\n", total_time); } diff --git a/tsne/bh_sne_src/vptree.h b/tsne/bh_sne_src/vptree.h index 302a2fa..2092ed5 100644 --- a/tsne/bh_sne_src/vptree.h +++ b/tsne/bh_sne_src/vptree.h @@ -58,7 +58,7 @@ class DataPoint { _ind = -1; _x = NULL; } - DataPoint(int D, int ind, double* x) { + DataPoint(int D, int ind, const double* x) { _D = D; _ind = ind; _x = (double*)malloc(_D * sizeof(double)); @@ -119,17 +119,13 @@ template class VpTree { public: // Default constructor - VpTree() : _root(0) { - } + VpTree() = default; // Destructor - ~VpTree() { - delete _root; - } + ~VpTree() = default; // Function to create a new VpTree from data void create(const std::vector& items) { - delete _root; _items = items; _root = buildFromPoints(0, items.size()); } @@ -144,7 +140,7 @@ class VpTree { _tau = DBL_MAX; // Perform the search - search(_root, target, k, heap); + search(_root.get(), target, k, heap); // Gather final results results->clear(); @@ -167,19 +163,17 @@ class VpTree { // Single node of a VP tree (has a point and radius; left children are closer // to point than the radius) struct Node { - int index; // index of point in node - double threshold; // radius(?) - Node* left; // points closer by than threshold - Node* right; // points farther away than threshold + int index; // index of point in node + double threshold; // radius(?) + std::unique_ptr left; // points closer by than threshold + std::unique_ptr right; // points farther away than threshold - Node() : index(0), threshold(0.), left(0), right(0) { + Node() : index(0), threshold(0.) { } - ~Node() { // destructor - delete left; - delete right; - } - } * _root; + ~Node() = default; + }; + std::unique_ptr _root; // An item on the intermediate result queue struct HeapItem { @@ -203,13 +197,13 @@ class VpTree { }; // Function that (recursively) fills the tree - Node* buildFromPoints(int lower, int upper) { + std::unique_ptr buildFromPoints(int lower, int upper) { if (upper == lower) { // indicates that we're done here! return NULL; } // Lower index is center of current node - Node* node = new Node(); + std::unique_ptr node = std::make_unique(); node->index = lower; if (upper - lower > 1) { // if we did not arrive at leaf yet @@ -238,7 +232,7 @@ class VpTree { } // Helper function that searches the tree - void search(Node* node, const T& target, int k, + void search(const Node* node, const T& target, int k, std::priority_queue& heap) { if (node == NULL) return; // indicates that we're done here @@ -262,7 +256,7 @@ class VpTree { } // Return if we arrived at a leaf - if (node->left == NULL && node->right == NULL) { + if (!node->left && !node->right) { return; } @@ -271,13 +265,13 @@ class VpTree { if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the // ball, recursively search left child first - search(node->left, target, k, heap); + search(node->left.get(), target, k, heap); } if (dist + _tau >= node->threshold) { // if there can still be neighbors outside the // ball, recursively search right child - search(node->right, target, k, heap); + search(node->right.get(), target, k, heap); } // If the target lies outsize the radius of the ball @@ -285,13 +279,13 @@ class VpTree { if (dist + _tau >= node->threshold) { // if there can still be neighbors outside the // ball, recursively search right child first - search(node->right, target, k, heap); + search(node->right.get(), target, k, heap); } if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the // ball, recursively search left child - search(node->left, target, k, heap); + search(node->left.get(), target, k, heap); } } } From 4fea1277f8dab79de64db5a234d50ae7ceca06e8 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 11:58:03 -0700 Subject: [PATCH 07/17] Specialize for QT_NODE_CAPACITY=1. --- tsne/bh_sne_src/sptree.cpp | 37 +++++++++++++++++-------------------- tsne/bh_sne_src/sptree.h | 5 +---- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 1cd8ad9..5b44ab1 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -31,11 +31,11 @@ * */ +#include #include #include #include #include -#include #include "sptree.h" @@ -218,18 +218,18 @@ bool SPTree::insert(unsigned int new_index) { } // If there is space in this quad tree and it is a leaf, add the object here - if (is_leaf && size < QT_NODE_CAPACITY) { - index[size] = new_index; + if (is_leaf && size < 1) { + index = new_index; size++; return true; } // Don't add duplicates for now (this is not very nice) bool any_duplicate = false; - for (unsigned int n = 0; n < size; n++) { + if (size > 0) { bool duplicate = true; for (unsigned int d = 0; d < NDims; d++) { - if (point[d] != data[index[n] * NDims + d]) { + if (point[d] != data[index * NDims + d]) { duplicate = false; break; } @@ -274,13 +274,13 @@ void SPTree::subdivide() { } // Move existing points to correct children - for (unsigned int i = 0; i < size; i++) { + if (size > 0) { bool success = false; for (unsigned int j = 0; j < no_children; j++) { if (!success) - success = children[j]->insert(index[i]); + success = children[j]->insert(index); } - index[i] = -1; + index = -1; } // Empty parent node @@ -298,8 +298,8 @@ void SPTree::fill(unsigned int N) { // Checks whether the specified tree is correct template bool SPTree::isCorrect() { - for (unsigned int n = 0; n < size; n++) { - double* point = data + index[n] * NDims; + if (size > 0) { + double* point = data + index * NDims; if (!boundary.containsPoint(point)) return false; } @@ -323,8 +323,8 @@ template unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc) { // Gather indices in current quadrant - for (unsigned int i = 0; i < size; i++) - indices[loc + i] = index[i]; + if (size > 0) + indices[loc] = index; loc += size; // Gather indices in children @@ -350,7 +350,7 @@ template void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) { // Make sure that we spend no time on empty nodes or self-interactions - if (cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) + if (cum_size == 0 || (is_leaf && size == 1 && index == point_index)) return; // Compute distance between point and center-of-mass @@ -425,15 +425,12 @@ void SPTree::print() { if (is_leaf) { fprintf(stderr, "Leaf node; data = ["); - for (int i = 0; i < size; i++) { - double* point = data + index[i] * NDims; + if (size > 0) { + double* point = data + index * NDims; for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", point[d]); - fprintf(stderr, " (index = %d)", index[i]); - if (i < size - 1) - fprintf(stderr, "\n"); - else - fprintf(stderr, "]\n"); + fprintf(stderr, " (index = %d)", index); + fprintf(stderr, "]\n"); } } else { fprintf(stderr, "Intersection node with center-of-mass = ["); diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 8bd9dea..59ff230 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -59,9 +59,6 @@ class SPTree { enum { no_children = 2 * SPTree::no_children }; private: - // Fixed constants - static const unsigned int QT_NODE_CAPACITY = 1; - // A buffer we use when doing force computations double buff[NDims]; @@ -80,7 +77,7 @@ class SPTree { // and list of all children double* data; double center_of_mass[NDims]; - unsigned int index[QT_NODE_CAPACITY]; + unsigned int index; // Children SPTree* children[no_children]; From 4c3309f364777aa94bc1b99babde501a4c841743 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 12:19:14 -0700 Subject: [PATCH 08/17] Remove some useless members from sptree nodes. --- tsne/bh_sne_src/sptree.cpp | 66 ++++++++++++-------------------------- tsne/bh_sne_src/sptree.h | 14 +------- 2 files changed, 21 insertions(+), 59 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 5b44ab1..0033d43 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -123,49 +123,31 @@ SPTree::SPTree(double* inp_data, unsigned int N) { array width; for (int d = 0; d < NDims; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; - init(NULL, inp_data, mean_Y.data(), width.data()); + init(inp_data, mean_Y.data(), width.data()); fill(N); } -// Constructor for SPTree with particular size and parent -- build the tree, +// Constructor for SPTree with particular size -- build the tree, // too! template SPTree::SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width) { - init(NULL, inp_data, inp_corner, inp_width); + init(inp_data, inp_corner, inp_width); fill(N); } // Constructor for SPTree with particular size (do not fill the tree) template SPTree::SPTree(double* inp_data, double* inp_corner, double* inp_width) { - init(NULL, inp_data, inp_corner, inp_width); -} - -// Constructor for SPTree with particular size and parent (do not fill tree) -template -SPTree::SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, - double* inp_width) { - init(inp_parent, inp_data, inp_corner, inp_width); -} - -// Constructor for SPTree with particular size and parent -- build the tree, -// too! -template -SPTree::SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, - double* inp_corner, double* inp_width) { - init(inp_parent, inp_data, inp_corner, inp_width); - fill(N); + init(inp_data, inp_corner, inp_width); } // Main initialization function template -void SPTree::init(SPTree* inp_parent, double* inp_data, - double* inp_corner, double* inp_width) { - parent = inp_parent; +void SPTree::init(double* inp_data, double* inp_corner, + double* inp_width) { data = inp_data; is_leaf = true; - size = 0; cum_size = 0; for (unsigned int d = 0; d < NDims; d++) @@ -194,12 +176,6 @@ void SPTree::setData(double* inp_data) { data = inp_data; } -// Get the parent of the current tree -template -SPTree* SPTree::getParent() { - return parent; -} - // Insert a point into the SPTree template bool SPTree::insert(unsigned int new_index) { @@ -218,15 +194,13 @@ bool SPTree::insert(unsigned int new_index) { } // If there is space in this quad tree and it is a leaf, add the object here - if (is_leaf && size < 1) { + if (is_leaf && cum_size == 1) { index = new_index; - size++; return true; } // Don't add duplicates for now (this is not very nice) - bool any_duplicate = false; - if (size > 0) { + if (is_leaf) { bool duplicate = true; for (unsigned int d = 0; d < NDims; d++) { if (point[d] != data[index * NDims + d]) { @@ -234,10 +208,9 @@ bool SPTree::insert(unsigned int new_index) { break; } } - any_duplicate = any_duplicate | duplicate; + if (duplicate) + return true; } - if (any_duplicate) - return true; // Otherwise, we need to subdivide the current cell if (is_leaf) @@ -270,11 +243,11 @@ void SPTree::subdivide() { new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); div *= 2; } - children[i] = new SPTree(this, data, new_corner, new_width); + children[i] = new SPTree(data, new_corner, new_width); } // Move existing points to correct children - if (size > 0) { + if (is_leaf && cum_size > 0) { bool success = false; for (unsigned int j = 0; j < no_children; j++) { if (!success) @@ -284,7 +257,6 @@ void SPTree::subdivide() { } // Empty parent node - size = 0; is_leaf = false; } @@ -298,7 +270,7 @@ void SPTree::fill(unsigned int N) { // Checks whether the specified tree is correct template bool SPTree::isCorrect() { - if (size > 0) { + if (is_leaf && cum_size > 0) { double* point = data + index * NDims; if (!boundary.containsPoint(point)) return false; @@ -323,9 +295,9 @@ template unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc) { // Gather indices in current quadrant - if (size > 0) - indices[loc] = index; - loc += size; + if (is_leaf && cum_size > 0) { + indices[++loc] = index; + } // Gather indices in children if (!is_leaf) { @@ -350,13 +322,14 @@ template void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) { // Make sure that we spend no time on empty nodes or self-interactions - if (cum_size == 0 || (is_leaf && size == 1 && index == point_index)) + if (cum_size == 0 || (is_leaf && cum_size >= 1 && index == point_index)) return; // Compute distance between point and center-of-mass double sqdist = .0; unsigned int ind = point_index * NDims; + array buff; for (unsigned int d = 0; d < NDims; d++) { buff[d] = data[ind + d] - center_of_mass[d]; sqdist += buff[d] * buff[d]; @@ -400,6 +373,7 @@ void SPTree::computeEdgeForces(const unsigned int* row_P, sqdist = 1.0; ind2 = col_P[i] * NDims; + array buff; for (unsigned int d = 0; d < NDims; d++) { buff[d] = data[ind1 + d] - data[ind2 + d]; sqdist += buff[d] * buff[d]; @@ -425,7 +399,7 @@ void SPTree::print() { if (is_leaf) { fprintf(stderr, "Leaf node; data = ["); - if (size > 0) { + if (is_leaf && cum_size > 0) { double* point = data + index * NDims; for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", point[d]); diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 59ff230..d061b87 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -59,14 +59,8 @@ class SPTree { enum { no_children = 2 * SPTree::no_children }; private: - // A buffer we use when doing force computations - double buff[NDims]; - // Properties of this node in the tree - SPTree* parent; - unsigned int dimension; bool is_leaf; - unsigned int size; unsigned int cum_size; // Axis-aligned bounding box stored as a center with half-dimensions to @@ -87,13 +81,8 @@ class SPTree { SPTree(double* inp_data, double* inp_corner, double* inp_width); SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width); - SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, - double* inp_corner, double* inp_width); - SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, - double* inp_width); ~SPTree(); void setData(double* inp_data); - SPTree* getParent(); void construct(Cell boundary); bool insert(unsigned int new_index); void subdivide(); @@ -108,8 +97,7 @@ class SPTree { void print(); private: - void init(SPTree* inp_parent, double* inp_data, double* inp_corner, - double* inp_width); + void init(double* inp_data, double* inp_corner, double* inp_width); void fill(unsigned int N); unsigned int getAllIndices(unsigned int* indices, unsigned int loc); bool isChild(unsigned int test_index, unsigned int start, unsigned int end); From 0f7ef1e40128608792465c9967b71abf9bc3a5f9 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 12:29:51 -0700 Subject: [PATCH 09/17] Flatten sptree child memory. Allocate an array of children, rather than having an array of child pointers. Cleanup useless methods, constness. --- tsne/bh_sne_src/sptree.cpp | 157 ++++++++++++++----------------------- tsne/bh_sne_src/sptree.h | 70 +++++++++-------- 2 files changed, 98 insertions(+), 129 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 0033d43..5548183 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -32,42 +32,33 @@ */ #include +#include #include #include #include -#include #include "sptree.h" #pragma GCC visibility push(hidden) using std::array; +using std::make_unique; -// Constructs cell template -Cell::Cell() { -} - -template -Cell::Cell(double* inp_corner, double* inp_width) { +Cell::Cell(const double* inp_corner, const double* inp_width) { for (int d = 0; d < NDims; d++) setCorner(d, inp_corner[d]); for (int d = 0; d < NDims; d++) setWidth(d, inp_width[d]); } -// Destructs cell -template -Cell::~Cell() { -} - template -double Cell::getCorner(unsigned int d) { +double Cell::getCorner(unsigned int d) const { return corner[d]; } template -double Cell::getWidth(unsigned int d) { +double Cell::getWidth(unsigned int d) const { return width[d]; } @@ -83,7 +74,8 @@ void Cell::setWidth(unsigned int d, double val) { // Checks whether a point lies in a cell template -bool Cell::containsPoint(double point[]) { +bool Cell::containsPoint(const double point[]) const { + assert(point); for (int d = 0; d < NDims; d++) { if (corner[d] - width[d] > point[d]) return false; @@ -95,7 +87,7 @@ bool Cell::containsPoint(double point[]) { // Default constructor for SPTree -- build tree, too! template -SPTree::SPTree(double* inp_data, unsigned int N) { +SPTree::SPTree(const double* inp_data, unsigned int N) { // Compute mean, width, and height of current map (boundaries of SPTree) int nD = 0; array mean_Y; @@ -127,27 +119,11 @@ SPTree::SPTree(double* inp_data, unsigned int N) { fill(N); } -// Constructor for SPTree with particular size -- build the tree, -// too! -template -SPTree::SPTree(double* inp_data, unsigned int N, double* inp_corner, - double* inp_width) { - init(inp_data, inp_corner, inp_width); - fill(N); -} - -// Constructor for SPTree with particular size (do not fill the tree) -template -SPTree::SPTree(double* inp_data, double* inp_corner, double* inp_width) { - init(inp_data, inp_corner, inp_width); -} - // Main initialization function template -void SPTree::init(double* inp_data, double* inp_corner, - double* inp_width) { +void SPTree::init(const double* inp_data, const double* inp_corner, + const double* inp_width) { data = inp_data; - is_leaf = true; cum_size = 0; for (unsigned int d = 0; d < NDims; d++) @@ -155,21 +131,10 @@ void SPTree::init(double* inp_data, double* inp_corner, for (unsigned int d = 0; d < NDims; d++) boundary.setWidth(d, inp_width[d]); - for (unsigned int i = 0; i < no_children; i++) - children[i] = NULL; for (unsigned int d = 0; d < NDims; d++) center_of_mass[d] = .0; } -// Destructor for SPTree -template -SPTree::~SPTree() { - for (unsigned int i = 0; i < no_children; i++) { - if (children[i] != NULL) - delete children[i]; - } -} - // Update the data underlying this tree template void SPTree::setData(double* inp_data) { @@ -180,12 +145,12 @@ void SPTree::setData(double* inp_data) { template bool SPTree::insert(unsigned int new_index) { // Ignore objects which do not belong in this quad tree - double* point = data + new_index * NDims; + const double* point = data + new_index * NDims; if (!boundary.containsPoint(point)) return false; // Online update of cumulative size and center-of-mass - cum_size++; + ++cum_size; double mult1 = (double)(cum_size - 1) / (double)cum_size; double mult2 = 1.0 / (double)cum_size; @@ -194,13 +159,13 @@ bool SPTree::insert(unsigned int new_index) { } // If there is space in this quad tree and it is a leaf, add the object here - if (is_leaf && cum_size == 1) { + if (cum_size == 1) { index = new_index; return true; } // Don't add duplicates for now (this is not very nice) - if (is_leaf) { + if (!children) { bool duplicate = true; for (unsigned int d = 0; d < NDims; d++) { if (point[d] != data[index * NDims + d]) { @@ -213,15 +178,13 @@ bool SPTree::insert(unsigned int new_index) { } // Otherwise, we need to subdivide the current cell - if (is_leaf) + if (!children) subdivide(); // Find out where the point can be inserted - for (unsigned int i = 0; i < no_children; i++) { - if (children[i]->insert(new_index)) + for (auto& child : *children) + if (child.insert(new_index)) return true; - } - // Otherwise, the point cannot be inserted (this should never happen) return false; } @@ -230,34 +193,33 @@ bool SPTree::insert(unsigned int new_index) { // area template void SPTree::subdivide() { + assert(!children); // Create new children double new_corner[NDims]; double new_width[NDims]; + for (unsigned int d = 0; d < NDims; d++) { + new_width[d] = .5 * boundary.getWidth(d); + } + children = make_unique, no_children>>(); for (unsigned int i = 0; i < no_children; i++) { unsigned int div = 1; for (unsigned int d = 0; d < NDims; d++) { - new_width[d] = .5 * boundary.getWidth(d); if ((i / div) % 2 == 1) new_corner[d] = boundary.getCorner(d) - .5 * boundary.getWidth(d); else new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); div *= 2; } - children[i] = new SPTree(data, new_corner, new_width); + (*children)[i].init(data, new_corner, new_width); } // Move existing points to correct children - if (is_leaf && cum_size > 0) { - bool success = false; - for (unsigned int j = 0; j < no_children; j++) { - if (!success) - success = children[j]->insert(index); + for (auto& child : *children) { + if (child.insert(index)) { + break; } - index = -1; } - - // Empty parent node - is_leaf = false; + index = -1; } // Build SPTree on dataset @@ -269,60 +231,61 @@ void SPTree::fill(unsigned int N) { // Checks whether the specified tree is correct template -bool SPTree::isCorrect() { - if (is_leaf && cum_size > 0) { - double* point = data + index * NDims; +bool SPTree::isCorrect() const { + if (!children && cum_size > 0) { + const double* point = data + index * NDims; if (!boundary.containsPoint(point)) return false; } - if (!is_leaf) { - bool correct = true; - for (int i = 0; i < no_children; i++) - correct = correct && children[i]->isCorrect(); - return correct; - } else - return true; + if (children) { + for (auto& child : *children) { + if (!child.isCorrect()) { + return false; + } + } + } + return true; } // Build a list of all indices in SPTree template -void SPTree::getAllIndices(unsigned int* indices) { +void SPTree::getAllIndices(unsigned int* indices) const { getAllIndices(indices, 0); } // Build a list of all indices in SPTree template unsigned int SPTree::getAllIndices(unsigned int* indices, - unsigned int loc) { + unsigned int loc) const { // Gather indices in current quadrant - if (is_leaf && cum_size > 0) { + if (!children && cum_size > 0) { indices[++loc] = index; } // Gather indices in children - if (!is_leaf) { - for (int i = 0; i < no_children; i++) - loc = children[i]->getAllIndices(indices, loc); + if (children) { + for (auto& child : *children) + loc = child.getAllIndices(indices, loc); } return loc; } template -unsigned int SPTree::getDepth() { - if (is_leaf) +unsigned int SPTree::getDepth() const { + if (!children) return 1; int depth = 0; - for (unsigned int i = 0; i < no_children; i++) - depth = fmax(depth, children[i]->getDepth()); + for (auto& child : *children) + depth = fmax(depth, child.getDepth()); return 1 + depth; } // Compute non-edge forces using Barnes-Hut algorithm template void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, - double neg_f[], double* sum_Q) { + double neg_f[], double* sum_Q) const { // Make sure that we spend no time on empty nodes or self-interactions - if (cum_size == 0 || (is_leaf && cum_size >= 1 && index == point_index)) + if (cum_size == 0 || (!children && cum_size >= 1 && index == point_index)) return; // Compute distance between point and center-of-mass @@ -342,7 +305,7 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, cur_width = boundary.getWidth(d); max_width = (max_width > cur_width) ? max_width : cur_width; } - if (is_leaf || max_width / sqrt(sqdist) < theta) { + if (!children || max_width / sqrt(sqdist) < theta) { // Compute and add t-SNE force between point and current node sqdist = 1.0 / (1.0 + sqdist); double mult = cum_size * sqdist; @@ -352,8 +315,8 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, neg_f[d] += mult * buff[d]; } else { // Recursively apply Barnes-Hut to children - for (unsigned int i = 0; i < no_children; i++) - children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); + for (auto& child : *children) + child.computeNonEdgeForces(point_index, theta, neg_f, sum_Q); } } @@ -362,7 +325,7 @@ template void SPTree::computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, const double* val_P, int N, - double* pos_f) { + double* pos_f) const { // Loop over all edges in the graph unsigned int ind1 = 0; unsigned int ind2 = 0; @@ -391,16 +354,16 @@ void SPTree::computeEdgeForces(const unsigned int* row_P, // Print out tree template -void SPTree::print() { +void SPTree::print() const { if (cum_size == 0) { fprintf(stderr, "Empty node\n"); return; } - if (is_leaf) { + if (!children) { fprintf(stderr, "Leaf node; data = ["); - if (is_leaf && cum_size > 0) { - double* point = data + index * NDims; + if (!children && cum_size > 0) { + const double* point = data + index * NDims; for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", point[d]); fprintf(stderr, " (index = %d)", index); @@ -411,8 +374,8 @@ void SPTree::print() { for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", center_of_mass[d]); fprintf(stderr, "]; children are:\n"); - for (int i = 0; i < no_children; i++) - children[i]->print(); + for (auto& child : *children) + child.print(); } } diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index d061b87..5527108 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -34,23 +34,26 @@ #ifndef SPTREE_H #define SPTREE_H +#include +#include + #pragma GCC visibility push(hidden) template class Cell { - double corner[NDims]; - double width[NDims]; + std::array corner; + std::array width; public: - Cell(); - Cell(double* inp_corner, double* inp_width); - ~Cell(); + Cell() = default; + Cell(const double* inp_corner, const double* inp_width); + ~Cell() = default; - double getCorner(unsigned int d); - double getWidth(unsigned int d); + double getCorner(unsigned int d) const; + double getWidth(unsigned int d) const; void setCorner(unsigned int d, double val); void setWidth(unsigned int d, double val); - bool containsPoint(double point[]); + bool containsPoint(const double point[]) const; }; template @@ -59,48 +62,51 @@ class SPTree { enum { no_children = 2 * SPTree::no_children }; private: - // Properties of this node in the tree - bool is_leaf; - unsigned int cum_size; - // Axis-aligned bounding box stored as a center with half-dimensions to // represent the boundaries of this quad tree Cell boundary; // Indices in this space-partitioning tree node, corresponding center-of-mass, // and list of all children - double* data; - double center_of_mass[NDims]; - unsigned int index; + const double* data; + std::array center_of_mass; // Children - SPTree* children[no_children]; + std::unique_ptr, no_children>> children; + + // Properties of this node in the tree + unsigned int cum_size; + unsigned int index; + + SPTree(const SPTree&) = delete; + SPTree& operator=(const SPTree&) = delete; public: - SPTree(double* inp_data, unsigned int N); - SPTree(double* inp_data, double* inp_corner, double* inp_width); - SPTree(double* inp_data, unsigned int N, double* inp_corner, - double* inp_width); - ~SPTree(); + SPTree() = default; + SPTree(SPTree&&) = default; + SPTree(const double* inp_data, unsigned int N); + ~SPTree() = default; void setData(double* inp_data); void construct(Cell boundary); - bool insert(unsigned int new_index); - void subdivide(); - bool isCorrect(); + bool isCorrect() const; void rebuildTree(); - void getAllIndices(unsigned int* indices); - unsigned int getDepth(); + void getAllIndices(unsigned int* indices) const; + unsigned int getDepth() const; void computeNonEdgeForces(unsigned int point_index, double theta, - double neg_f[], double* sum_Q); + double neg_f[], double* sum_Q) const; void computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, - const double* val_P, int N, double* pos_f); - void print(); + const double* val_P, int N, double* pos_f) const; + void print() const; private: - void init(double* inp_data, double* inp_corner, double* inp_width); + void init(const double* inp_data, const double* inp_corner, + const double* inp_width); void fill(unsigned int N); - unsigned int getAllIndices(unsigned int* indices, unsigned int loc); - bool isChild(unsigned int test_index, unsigned int start, unsigned int end); + bool insert(unsigned int new_index); + void subdivide(); + unsigned int getAllIndices(unsigned int* indices, unsigned int loc) const; + bool isChild(unsigned int test_index, unsigned int start, + unsigned int end) const; }; template <> From d4d04ccc84cd99912e61657a1c63c795124a1222 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 13:17:05 -0700 Subject: [PATCH 10/17] Do not carry a copy of data pointer in every node. This decreases the in-memory size of the tree, which significantly increases the speed since it's easier for the CPU to fit it in cache. There's more that can be done with this - in particular, the boundary. But that's more complicated. --- tsne/bh_sne_src/sptree.cpp | 88 +++++++++++++++++++++----------------- tsne/bh_sne_src/sptree.h | 51 +++++++++++++--------- 2 files changed, 79 insertions(+), 60 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index 5548183..a928b71 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -87,7 +87,7 @@ bool Cell::containsPoint(const double point[]) const { // Default constructor for SPTree -- build tree, too! template -SPTree::SPTree(const double* inp_data, unsigned int N) { +SPTree::SPTree(const double* inp_data, unsigned int N) : data(inp_data) { // Compute mean, width, and height of current map (boundaries of SPTree) int nD = 0; array mean_Y; @@ -115,15 +115,14 @@ SPTree::SPTree(const double* inp_data, unsigned int N) { array width; for (int d = 0; d < NDims; d++) width[d] = fmax(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; - init(inp_data, mean_Y.data(), width.data()); + node.init(mean_Y.data(), width.data()); fill(N); } // Main initialization function template -void SPTree::init(const double* inp_data, const double* inp_corner, - const double* inp_width) { - data = inp_data; +void SPTree::Node::init(const double* inp_corner, + const double* inp_width) { cum_size = 0; for (unsigned int d = 0; d < NDims; d++) @@ -135,15 +134,10 @@ void SPTree::init(const double* inp_data, const double* inp_corner, center_of_mass[d] = .0; } -// Update the data underlying this tree -template -void SPTree::setData(double* inp_data) { - data = inp_data; -} - // Insert a point into the SPTree template -bool SPTree::insert(unsigned int new_index) { +bool SPTree::Node::insert(const double* data, unsigned int new_index) { + assert(data); // Ignore objects which do not belong in this quad tree const double* point = data + new_index * NDims; if (!boundary.containsPoint(point)) @@ -179,11 +173,11 @@ bool SPTree::insert(unsigned int new_index) { // Otherwise, we need to subdivide the current cell if (!children) - subdivide(); + subdivide(data); // Find out where the point can be inserted for (auto& child : *children) - if (child.insert(new_index)) + if (child.insert(data, new_index)) return true; // Otherwise, the point cannot be inserted (this should never happen) return false; @@ -192,7 +186,7 @@ bool SPTree::insert(unsigned int new_index) { // Create four children which fully divide this cell into four quads of equal // area template -void SPTree::subdivide() { +void SPTree::Node::subdivide(const double* data) { assert(!children); // Create new children double new_corner[NDims]; @@ -200,7 +194,7 @@ void SPTree::subdivide() { for (unsigned int d = 0; d < NDims; d++) { new_width[d] = .5 * boundary.getWidth(d); } - children = make_unique, no_children>>(); + children = make_unique::Node, no_children>>(); for (unsigned int i = 0; i < no_children; i++) { unsigned int div = 1; for (unsigned int d = 0; d < NDims; d++) { @@ -210,12 +204,12 @@ void SPTree::subdivide() { new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); div *= 2; } - (*children)[i].init(data, new_corner, new_width); + (*children)[i].init(new_corner, new_width); } // Move existing points to correct children for (auto& child : *children) { - if (child.insert(index)) { + if (child.insert(data, index)) { break; } } @@ -226,20 +220,25 @@ void SPTree::subdivide() { template void SPTree::fill(unsigned int N) { for (unsigned int i = 0; i < N; i++) - insert(i); + node.insert(data, i); } // Checks whether the specified tree is correct template bool SPTree::isCorrect() const { + return node.isCorrect(data); +} + +template +bool SPTree::Node::isCorrect(const double* data) const { if (!children && cum_size > 0) { const double* point = data + index * NDims; if (!boundary.containsPoint(point)) return false; } if (children) { - for (auto& child : *children) { - if (!child.isCorrect()) { + for (const auto& child : *children) { + if (!child.isCorrect(data)) { return false; } } @@ -250,13 +249,13 @@ bool SPTree::isCorrect() const { // Build a list of all indices in SPTree template void SPTree::getAllIndices(unsigned int* indices) const { - getAllIndices(indices, 0); + node.getAllIndices(indices, 0); } // Build a list of all indices in SPTree template -unsigned int SPTree::getAllIndices(unsigned int* indices, - unsigned int loc) const { +unsigned int SPTree::Node::getAllIndices(unsigned int* indices, + unsigned int loc) const { // Gather indices in current quadrant if (!children && cum_size > 0) { indices[++loc] = index; @@ -270,20 +269,17 @@ unsigned int SPTree::getAllIndices(unsigned int* indices, return loc; } -template -unsigned int SPTree::getDepth() const { - if (!children) - return 1; - int depth = 0; - for (auto& child : *children) - depth = fmax(depth, child.getDepth()); - return 1 + depth; -} - // Compute non-edge forces using Barnes-Hut algorithm template void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) const { + node.computeNonEdgeForces(data, point_index, theta, neg_f, sum_Q); +} + +template +[[gnu::hot]] void SPTree::Node::computeNonEdgeForces( + const double* data, unsigned int point_index, double theta, double neg_f[], + double* sum_Q) const { // Make sure that we spend no time on empty nodes or self-interactions if (cum_size == 0 || (!children && cum_size >= 1 && index == point_index)) return; @@ -315,8 +311,8 @@ void SPTree::computeNonEdgeForces(unsigned int point_index, double theta, neg_f[d] += mult * buff[d]; } else { // Recursively apply Barnes-Hut to children - for (auto& child : *children) - child.computeNonEdgeForces(point_index, theta, neg_f, sum_Q); + for (const auto& child : *children) + child.computeNonEdgeForces(data, point_index, theta, neg_f, sum_Q); } } @@ -326,6 +322,15 @@ void SPTree::computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, const double* val_P, int N, double* pos_f) const { + node.computeEdgeForces(data, row_P, col_P, val_P, N, pos_f); +} + +template +void SPTree::Node::computeEdgeForces(const double* data, + const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, int N, + double* pos_f) const { // Loop over all edges in the graph unsigned int ind1 = 0; unsigned int ind2 = 0; @@ -355,6 +360,11 @@ void SPTree::computeEdgeForces(const unsigned int* row_P, // Print out tree template void SPTree::print() const { + node.print(data); +} + +template +void SPTree::Node::print(const double* data) const { if (cum_size == 0) { fprintf(stderr, "Empty node\n"); return; @@ -367,15 +377,15 @@ void SPTree::print() const { for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", point[d]); fprintf(stderr, " (index = %d)", index); - fprintf(stderr, "]\n"); } + fprintf(stderr, "]\n"); } else { fprintf(stderr, "Intersection node with center-of-mass = ["); for (int d = 0; d < NDims; d++) fprintf(stderr, "%f, ", center_of_mass[d]); fprintf(stderr, "]; children are:\n"); - for (auto& child : *children) - child.print(); + for (const auto& child : *children) + child.print(data); } } diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 5527108..3be732a 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -62,21 +62,41 @@ class SPTree { enum { no_children = 2 * SPTree::no_children }; private: - // Axis-aligned bounding box stored as a center with half-dimensions to - // represent the boundaries of this quad tree - Cell boundary; + class Node { + public: + bool isCorrect(const double* data) const; + bool insert(const double* data, unsigned int new_index); + void subdivide(const double* data); + void print(const double* data) const; + void computeNonEdgeForces(const double* data, unsigned int point_index, + double theta, double neg_f[], + double* sum_Q) const; + void computeEdgeForces(const double* data, const unsigned int* row_P, + const unsigned int* col_P, const double* val_P, + int N, double* pos_f) const; + unsigned int getAllIndices(unsigned int* indices, unsigned int loc) const; + void init(const double* inp_corner, const double* inp_width); + + private: + // Axis-aligned bounding box stored as a center with half-dimensions to + // represent the boundaries of this quad tree + Cell boundary; + + std::array center_of_mass; + + // Children + std::unique_ptr::Node, no_children>> children; + + // Properties of this node in the tree + unsigned int index; + unsigned int cum_size; + }; // Indices in this space-partitioning tree node, corresponding center-of-mass, // and list of all children const double* data; - std::array center_of_mass; - // Children - std::unique_ptr, no_children>> children; - - // Properties of this node in the tree - unsigned int cum_size; - unsigned int index; + Node node; SPTree(const SPTree&) = delete; SPTree& operator=(const SPTree&) = delete; @@ -86,12 +106,8 @@ class SPTree { SPTree(SPTree&&) = default; SPTree(const double* inp_data, unsigned int N); ~SPTree() = default; - void setData(double* inp_data); - void construct(Cell boundary); bool isCorrect() const; - void rebuildTree(); void getAllIndices(unsigned int* indices) const; - unsigned int getDepth() const; void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) const; void computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, @@ -99,14 +115,7 @@ class SPTree { void print() const; private: - void init(const double* inp_data, const double* inp_corner, - const double* inp_width); void fill(unsigned int N); - bool insert(unsigned int new_index); - void subdivide(); - unsigned int getAllIndices(unsigned int* indices, unsigned int loc) const; - bool isChild(unsigned int test_index, unsigned int start, - unsigned int end) const; }; template <> From 8c86710da89e5b64ba9b9343aa1bc04803f43199 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 13:49:24 -0700 Subject: [PATCH 11/17] Don't use conda on Travis. Rather than downloading an entire miniconda distribution, just `pip install` the requisite packages. Should speed up the build significantly. Also, use clang compiler only, because otherwise we fail the numerical stability test (which is ok, generally, but annoying). Use lld to link, because the llvm LTO plugin for ld.gold doesn't work on travis. Turn of stability test on CI. Minor compiler optimization differences can destroy repeatability. Getting it to not do that is too complicated. --- .travis.yml | 29 +++++++---------------------- setup.py | 14 +++++++++----- tsne/tests/test_stability.py | 6 +++++- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0ac713a..fda95df 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,37 +1,22 @@ language: python python: - - "2.7" - # - "3.4" +- "2.7" +compiler: clang +env: +- CC="clang" CXX="clang++" LDFLAGS="-fuse-ld=lld" dist: xenial addons: apt: packages: - build-essential - -sudo: false + - liblapack3 install: - - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - fi - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - - conda info -a - - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION - - source activate test-environment - - conda install numpy cython scipy pytest scikit-learn - - python setup.py install - - source deactivate - + - pip install numpy==1.14.2 cython==0.28.2 pytest==3.5.1 scikit-learn==0.19.1 scipy==1.0.1 script: - pwd - - source activate test-environment + - python setup.py install - cd tsne/tests - py.test -s -vv diff --git a/setup.py b/setup.py index d8de271..20dfab5 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ """ import sys +import os import platform from distutils.core import setup @@ -47,16 +48,19 @@ else: # LINUX - + opt_flags = ['-msse3', '-O3', '-flto'] + ldflags = list(opt_flags) + if 'LDFLAGS' in os.environ and os.environ['LDFLAGS']: + ldflags.extend(os.environ['LDFLAGS'].split(' ')) + ldflags.extend(['-Wl,-Bdynamic,--as-needed', '-lgcc_s']) ext_modules = [Extension(name='bh_sne', sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], include_dirs=[ numpy.get_include(), 'tsne/bh_sne_src/'], - extra_compile_args=['-msse3', '-O3', '-Wall', '-flto', - '-fPIC', '-w', '-std=c++14'], - extra_link_args=['-flto', - '-Wl,-Bdynamic,--as-needed', '-lgcc_s'], + extra_compile_args=opt_flags + + ['-Wall', '-fPIC', '-std=c++14', '-w'], + extra_link_args=ldflags, language='c++'), ] diff --git a/tsne/tests/test_stability.py b/tsne/tests/test_stability.py index 68a2eac..b78edc0 100644 --- a/tsne/tests/test_stability.py +++ b/tsne/tests/test_stability.py @@ -1,9 +1,13 @@ - def test_seed(): from tsne import bh_sne from sklearn.datasets import load_iris import numpy as np import os.path + from os import environ + + if 'CI' in environ: + # too many different compiler differences causing numeric instability. + return iris = load_iris() From 6f1065d91b66b6d7e2e4d938ff9c8c500f6baa0a Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Wed, 13 Mar 2019 17:06:01 -0700 Subject: [PATCH 12/17] Add support for PGO build with clang. --- Makefile | 20 ++++++++++++++++++-- setup.py | 5 +++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 166b268..a710977 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,22 @@ build: setup.py tsne/bh_sne.pyx \ $(wildcard tsne/bh_sne_src/*.cpp) python $< build_ext --inplace +PROFILE_LOCATION=$(shell pwd)/profile + +$(PROFILE_LOCATION)/bh_sne.profdata: PROFILE=generate +$(PROFILE_LOCATION)/bh_sne.profdata: setup.py Makefile tsne/bh_sne.pyx \ + $(wildcard tsne/bh_sne_src/*.h) \ + $(wildcard tsne/bh_sne_src/*.cpp) + rm -rf build tsne/*.so *.so $(PROFILE_LOCATION)/*.profraw + PROFILE=generate python $< build_ext --inplace + mkdir -p $(PROFILE_LOCATION) + cd tsne/tests && LLVM_PROFILE_FILE="$(PROFILE_LOCATION)/%p-%m.profraw" py.test + llvm-profdata merge -output=$@ $(PROFILE_LOCATION)/*.profraw + rm -rf tsne/*.so *.so build tsne/*.o + +build_pgo: setup.py $(PROFILE_LOCATION)/bh_sne.profdata + PROFILE="$(PROFILE_LOCATION)/bh_sne.profdata" python $< build_ext --inplace + install: setup.py build python $< install @@ -14,10 +30,10 @@ sdist: setup.py tsne/bh_sne.pyx \ python $< sdist test: build - cd tsne/tests && py.test -s -vv + cd tsne/tests && time py.test -s -vv clean: make -C tsne/bh_sne_src clean - rm -rf *.pyc *.so build/ bh_sne.cpp + rm -rf *.pyc *.so build/ profile/ bh_sne.cpp rm -rf tsne/*.pyc tsne/*.so tsne/*.o tsne/build/ tsne/bh_sne.cpp rm -rf .pytest_cache/ diff --git a/setup.py b/setup.py index 20dfab5..709dbb0 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,11 @@ else: # LINUX opt_flags = ['-msse3', '-O3', '-flto'] + if 'PROFILE' in os.environ: + if os.environ['PROFILE'] == 'generate': + opt_flags.append('-fprofile-instr-generate') + elif os.path.isfile(os.environ['PROFILE']): + opt_flags.append('-fprofile-instr-use='+os.environ['PROFILE']) ldflags = list(opt_flags) if 'LDFLAGS' in os.environ and os.environ['LDFLAGS']: ldflags.extend(os.environ['LDFLAGS'].split(' ')) From ec20c0a478002864c4a3a7494985f1177fa70740 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Mon, 25 Mar 2019 18:45:52 -0700 Subject: [PATCH 13/17] Make VpTree DataPoint objects much more efficient. 1. Don't copy the data. Just keep a reference. 2. Don't store the dimension in every datapoint. That's ridiculously inefficient. 3. Don't store the index in every datapoint. Just compute it as needed from pointer arithmetic. --- tsne/bh_sne_src/tsne.cpp | 10 ++-- tsne/bh_sne_src/vptree.h | 113 +++++++++++++-------------------------- 2 files changed, 44 insertions(+), 79 deletions(-) diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index d31c63f..fcfbf31 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -324,10 +324,12 @@ void computeGaussianPerplexity(double* X, int N, int D, row_P[n + 1] = row_P[n] + (unsigned int)K; // Build ball tree on data set - VpTree tree; - vector obj_X(N, DataPoint(D, -1, X)); + auto dist = [D](auto x, auto y) { return euclidean_distance(D, x, y); }; + VpTree tree((euclidean_distance(D))); + vector obj_X; + obj_X.reserve(N); for (int n = 0; n < N; n++) - obj_X[n] = DataPoint(D, n, X + n * D); + obj_X.emplace_back(DataPoint(X + n * D)); tree.create(obj_X); // Loop over all points to find nearest neighbors @@ -395,7 +397,7 @@ void computeGaussianPerplexity(double* X, int N, int D, for (unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P; for (unsigned int m = 0; m < K; m++) { - col_P[row_P[n] + m] = (unsigned int)indices[m + 1].index(); + col_P[row_P[n] + m] = (unsigned int)(indices[m + 1]._x - X) / D; val_P[row_P[n] + m] = cur_P[m]; } } diff --git a/tsne/bh_sne_src/vptree.h b/tsne/bh_sne_src/vptree.h index 2092ed5..2639e97 100644 --- a/tsne/bh_sne_src/vptree.h +++ b/tsne/bh_sne_src/vptree.h @@ -47,79 +47,45 @@ #pragma GCC visibility push(hidden) -class DataPoint { - int _ind; - +class DataPoint final { public: - double* _x; - int _D; - DataPoint() { - _D = 1; - _ind = -1; - _x = NULL; - } - DataPoint(int D, int ind, const double* x) { - _D = D; - _ind = ind; - _x = (double*)malloc(_D * sizeof(double)); - for (int d = 0; d < _D; d++) - _x[d] = x[d]; - } - DataPoint(const DataPoint& other) { // this makes a deep copy -- should not - // free anything - if (this != &other) { - _D = other.dimensionality(); - _ind = other.index(); - _x = (double*)malloc(_D * sizeof(double)); - for (int d = 0; d < _D; d++) - _x[d] = other.x(d); - } - } - ~DataPoint() { - if (_x != NULL) - free(_x); - } - DataPoint& operator=(const DataPoint& other) { // asignment should free old - // object - if (this != &other) { - if (_x != NULL) - free(_x); - _D = other.dimensionality(); - _ind = other.index(); - _x = (double*)malloc(_D * sizeof(double)); - for (int d = 0; d < _D; d++) - _x[d] = other.x(d); - } - return *this; - } - int index() const { - return _ind; - } - int dimensionality() const { - return _D; + const double* _x; + DataPoint(const double* x) : _x(x) { } + DataPoint(const DataPoint&) = default; + DataPoint(DataPoint&&) = default; + DataPoint& operator=(const DataPoint& other) = default; double x(int d) const { return _x[d]; } }; -double euclidean_distance(const DataPoint& t1, const DataPoint& t2) { - double dd = .0; - double* x1 = t1._x; - double* x2 = t2._x; - double diff; - for (int d = 0; d < t1._D; d++) { - diff = (x1[d] - x2[d]); - dd += diff * diff; +class euclidean_distance final { + const int _D; + + public: + explicit euclidean_distance(int D) : _D(D) { + } + euclidean_distance(euclidean_distance&&) = default; + euclidean_distance(const euclidean_distance&) = default; + double operator()(const DataPoint& t1, const DataPoint& t2) const { + double dd = .0; + const double* x1 = t1._x; + const double* x2 = t2._x; + double diff; + for (int d = 0; d < _D; d++) { + diff = (x1[d] - x2[d]); + dd += diff * diff; + } + return sqrt(dd); } - return sqrt(dd); -} +}; -template +template class VpTree { public: // Default constructor - VpTree() = default; + explicit VpTree(Distance&& distance) : distance(distance){}; // Destructor ~VpTree() = default; @@ -157,6 +123,7 @@ class VpTree { } private: + const Distance distance; std::vector _items; double _tau; @@ -186,16 +153,6 @@ class VpTree { } }; - // Distance comparator for use in std::nth_element - struct DistanceComparator { - const T& item; - DistanceComparator(const T& item) : item(item) { - } - bool operator()(const T& a, const T& b) { - return distance(item, a) < distance(item, b); - } - }; - // Function that (recursively) fills the tree std::unique_ptr buildFromPoints(int lower, int upper) { if (upper == lower) { // indicates that we're done here! @@ -214,12 +171,15 @@ class VpTree { // Partition around the median distance int median = (upper + lower) / 2; - std::nth_element(_items.begin() + lower + 1, _items.begin() + median, - _items.begin() + upper, - DistanceComparator(_items[lower])); + auto& lower_item = _items[lower]; + std::nth_element( + _items.begin() + lower + 1, _items.begin() + median, + _items.begin() + upper, [this, lower_item](auto& x, auto& y) { + return distance(lower_item, x) < distance(lower_item, y); + }); // Threshold of the new node will be the distance to the median - node->threshold = distance(_items[lower], _items[median]); + node->threshold = distance(lower_item, _items[median]); // Recursively build tree node->index = lower; @@ -289,6 +249,9 @@ class VpTree { } } } + + VpTree(const VpTree&) = delete; + VpTree& operator=(const VpTree&) = delete; }; #pragma GCC visibility pop From 1fb79c1e80d23f26bc0c3b26bed22fd3a3b3c7f4 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Mon, 25 Mar 2019 19:01:28 -0700 Subject: [PATCH 14/17] Align SPTree nodes on 16-byte bounds. This allows for more sse optimizations in a few cases. --- tsne/bh_sne_src/sptree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 3be732a..53508ee 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -40,7 +40,7 @@ #pragma GCC visibility push(hidden) template -class Cell { +class alignas(16) Cell { std::array corner; std::array width; @@ -62,7 +62,7 @@ class SPTree { enum { no_children = 2 * SPTree::no_children }; private: - class Node { + class alignas(16) Node { public: bool isCorrect(const double* data) const; bool insert(const double* data, unsigned int new_index); @@ -78,12 +78,12 @@ class SPTree { void init(const double* inp_corner, const double* inp_width); private: + std::array center_of_mass; + // Axis-aligned bounding box stored as a center with half-dimensions to // represent the boundaries of this quad tree Cell boundary; - std::array center_of_mass; - // Children std::unique_ptr::Node, no_children>> children; From cba336e4fa2593ebd9aef47352737ebf54c8c0d0 Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Fri, 29 Mar 2019 10:06:52 -0700 Subject: [PATCH 15/17] Restructure vptree to work on indices. Makes the datapoints smaller and allows vectorization in initialization to work better. --- tsne/bh_sne_src/tsne.cpp | 14 +++++++------- tsne/bh_sne_src/vptree.h | 29 ++++++++--------------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index fcfbf31..13f5386 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -324,18 +324,18 @@ void computeGaussianPerplexity(double* X, int N, int D, row_P[n + 1] = row_P[n] + (unsigned int)K; // Build ball tree on data set - auto dist = [D](auto x, auto y) { return euclidean_distance(D, x, y); }; - VpTree tree((euclidean_distance(D))); - vector obj_X; - obj_X.reserve(N); + VpTree tree((euclidean_distance(D, X))); + vector obj_X(N); for (int n = 0; n < N; n++) - obj_X.emplace_back(DataPoint(X + n * D)); + obj_X[n] = n; tree.create(obj_X); // Loop over all points to find nearest neighbors fprintf(stderr, "Building tree...\n"); - vector indices; + vector indices; vector distances; + indices.reserve(N); + distances.reserve(N); for (int n = 0; n < N; n++) { if (n % 10000 == 0) fprintf(stderr, " - point %d of %d\n", n, N); @@ -397,7 +397,7 @@ void computeGaussianPerplexity(double* X, int N, int D, for (unsigned int m = 0; m < K; m++) cur_P[m] /= sum_P; for (unsigned int m = 0; m < K; m++) { - col_P[row_P[n] + m] = (unsigned int)(indices[m + 1]._x - X) / D; + col_P[row_P[n] + m] = indices[m + 1]; val_P[row_P[n] + m] = cur_P[m]; } } diff --git a/tsne/bh_sne_src/vptree.h b/tsne/bh_sne_src/vptree.h index 2639e97..c9387ce 100644 --- a/tsne/bh_sne_src/vptree.h +++ b/tsne/bh_sne_src/vptree.h @@ -47,34 +47,21 @@ #pragma GCC visibility push(hidden) -class DataPoint final { - public: - const double* _x; - DataPoint(const double* x) : _x(x) { - } - DataPoint(const DataPoint&) = default; - DataPoint(DataPoint&&) = default; - DataPoint& operator=(const DataPoint& other) = default; - double x(int d) const { - return _x[d]; - } -}; - class euclidean_distance final { const int _D; + const double* data; public: - explicit euclidean_distance(int D) : _D(D) { + explicit euclidean_distance(int D, const double* data) : _D(D), data(data) { } euclidean_distance(euclidean_distance&&) = default; euclidean_distance(const euclidean_distance&) = default; - double operator()(const DataPoint& t1, const DataPoint& t2) const { + double operator()(const unsigned int t1, const unsigned int t2) const { double dd = .0; - const double* x1 = t1._x; - const double* x2 = t2._x; - double diff; + const double* x1 = data+t1*_D; + const double* x2 = data+t2*_D; for (int d = 0; d < _D; d++) { - diff = (x1[d] - x2[d]); + double diff = (x1[d] - x2[d]); dd += diff * diff; } return sqrt(dd); @@ -156,7 +143,7 @@ class VpTree { // Function that (recursively) fills the tree std::unique_ptr buildFromPoints(int lower, int upper) { if (upper == lower) { // indicates that we're done here! - return NULL; + return nullptr; } // Lower index is center of current node @@ -194,7 +181,7 @@ class VpTree { // Helper function that searches the tree void search(const Node* node, const T& target, int k, std::priority_queue& heap) { - if (node == NULL) + if (node == nullptr) return; // indicates that we're done here // Compute distance between target and current node From 7a774ee55c67272fada99a167c4dbd0b534cfb6a Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Fri, 29 Mar 2019 10:45:01 -0700 Subject: [PATCH 16/17] Have computeEdgeForces return a vector In addition to being more clear, this allows the compiler to recognize that pos_f is not aliased anywhere else so it can vectorize the calculation better. --- tsne/bh_sne_src/sptree.cpp | 23 +++++++++++++---------- tsne/bh_sne_src/sptree.h | 13 ++++++++----- tsne/bh_sne_src/tsne.cpp | 28 ++++++++++++++-------------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index a928b71..d57d751 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -43,6 +43,7 @@ using std::array; using std::make_unique; +using std::vector; template Cell::Cell(const double* inp_corner, const double* inp_width) { @@ -318,19 +319,20 @@ template // Computes edge forces template -void SPTree::computeEdgeForces(const unsigned int* row_P, - const unsigned int* col_P, - const double* val_P, int N, - double* pos_f) const { - node.computeEdgeForces(data, row_P, col_P, val_P, N, pos_f); +vector SPTree::computeEdgeForces(const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, + int N) const { + return node.computeEdgeForces(data, row_P, col_P, val_P, N); } template -void SPTree::Node::computeEdgeForces(const double* data, - const unsigned int* row_P, - const unsigned int* col_P, - const double* val_P, int N, - double* pos_f) const { +vector SPTree::Node::computeEdgeForces(const double* data, + const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, + int N) const { + vector pos_f(N * NDims); // Loop over all edges in the graph unsigned int ind1 = 0; unsigned int ind2 = 0; @@ -355,6 +357,7 @@ void SPTree::Node::computeEdgeForces(const double* data, } ind1 += NDims; } + return pos_f; } // Print out tree diff --git a/tsne/bh_sne_src/sptree.h b/tsne/bh_sne_src/sptree.h index 53508ee..e399b60 100644 --- a/tsne/bh_sne_src/sptree.h +++ b/tsne/bh_sne_src/sptree.h @@ -36,6 +36,7 @@ #include #include +#include #pragma GCC visibility push(hidden) @@ -71,9 +72,10 @@ class SPTree { void computeNonEdgeForces(const double* data, unsigned int point_index, double theta, double neg_f[], double* sum_Q) const; - void computeEdgeForces(const double* data, const unsigned int* row_P, - const unsigned int* col_P, const double* val_P, - int N, double* pos_f) const; + std::vector computeEdgeForces(const double* data, + const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, int N) const; unsigned int getAllIndices(unsigned int* indices, unsigned int loc) const; void init(const double* inp_corner, const double* inp_width); @@ -110,8 +112,9 @@ class SPTree { void getAllIndices(unsigned int* indices) const; void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q) const; - void computeEdgeForces(const unsigned int* row_P, const unsigned int* col_P, - const double* val_P, int N, double* pos_f) const; + std::vector computeEdgeForces(const unsigned int* row_P, + const unsigned int* col_P, + const double* val_P, int N) const; void print() const; private: diff --git a/tsne/bh_sne_src/tsne.cpp b/tsne/bh_sne_src/tsne.cpp index 13f5386..da43130 100644 --- a/tsne/bh_sne_src/tsne.cpp +++ b/tsne/bh_sne_src/tsne.cpp @@ -88,7 +88,7 @@ void computeGaussianPerplexity(double* X, int N, int D, vector* _col_P, vector* _val_P, double perplexity, int K); -void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); +vector computeSquaredEuclideanDistance(double* X, int N, int D); double randn(); void symmetrizeMatrix(vector* row_P, vector* col_P, vector* val_P, int N); @@ -108,10 +108,9 @@ void computeGradient(const vector& inp_row_P, // Compute all terms required for t-SNE gradient double sum_Q = .0; - vector pos_f(N * NDIMS); vector neg_f(N * NDIMS); - tree.computeEdgeForces(inp_row_P.data(), inp_col_P.data(), inp_val_P.data(), - N, pos_f.data()); + auto pos_f = tree.computeEdgeForces(inp_row_P.data(), inp_col_P.data(), + inp_val_P.data(), N); for (int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, neg_f.data() + n * NDIMS, &sum_Q); @@ -129,8 +128,7 @@ void computeExactGradient(const vector& P, double* Y, int N, dC.assign(N * D, 0.0); // Compute the squared Euclidean distance matrix - vector DD(N * N); - computeSquaredEuclideanDistance(Y, N, D, DD.data()); + vector DD = computeSquaredEuclideanDistance(Y, N, D); // Compute Q-matrix and normalization sum vector Q(N * N); @@ -169,9 +167,8 @@ void computeExactGradient(const vector& P, double* Y, int N, template double evaluateError(const vector& P, double* Y, int N) { // Compute the squared Euclidean distance matrix - vector DD(N * N); + vector DD = computeSquaredEuclideanDistance(Y, N, D); vector Q(N * N); - computeSquaredEuclideanDistance(Y, N, D, DD.data()); // Compute Q-matrix and normalization sum int nN = 0; @@ -240,8 +237,7 @@ double evaluateError(const vector& row_P, void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { // Compute the squared Euclidean distance matrix - vector DD(N * N); - computeSquaredEuclideanDistance(X, N, D, DD.data()); + vector DD = computeSquaredEuclideanDistance(X, N, D); // Compute the Gaussian kernel row by row int nN = 0; @@ -493,7 +489,8 @@ void symmetrizeMatrix(vector* _row_P, } // Compute squared Euclidean distance matrix -void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { +vector computeSquaredEuclideanDistance(double* X, int N, int D) { + vector DD(N * N); const double* XnD = X; for (int n = 0; n < N; ++n, XnD += D) { const double* XmD = XnD + D; @@ -501,13 +498,16 @@ void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { *curr_elem = 0.0; double* curr_elem_sym = curr_elem + N; for (int m = n + 1; m < N; ++m, XmD += D, curr_elem_sym += N) { - *(++curr_elem) = 0.0; + double total = 0.0; for (int d = 0; d < D; ++d) { - *curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]); + double diff = XnD[d] - XmD[d]; + total += diff * diff; } + *(++curr_elem) = total; *curr_elem_sym = *curr_elem; } } + return DD; } // Makes data zero-mean @@ -644,7 +644,7 @@ void run(double* X, int N, int D, double* Y, double perplexity, double theta, fprintf(stderr, "Symmetrizing...\n"); int nN = 0; for (int n = 0; n < N; n++) { - int mN = (n + 1) * N; + int mN = nN + N; for (int m = n + 1; m < N; m++) { P[nN + m] += P[mN + n]; P[mN + n] = P[nN + m]; From 453270e2f394020b9427868302679da957a5847a Mon Sep 17 00:00:00 2001 From: Adam Azarchs Date: Fri, 29 Mar 2019 11:18:24 -0700 Subject: [PATCH 17/17] Remove sqrt call. Not only was that an important slowdown, but it also impacted repeatability across libm implementations. --- tsne/bh_sne_src/sptree.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tsne/bh_sne_src/sptree.cpp b/tsne/bh_sne_src/sptree.cpp index d57d751..dbad881 100644 --- a/tsne/bh_sne_src/sptree.cpp +++ b/tsne/bh_sne_src/sptree.cpp @@ -297,12 +297,11 @@ template // Check whether we can use this node as a "summary" double max_width = 0.0; - double cur_width; for (unsigned int d = 0; d < NDims; d++) { - cur_width = boundary.getWidth(d); + double cur_width = boundary.getWidth(d); max_width = (max_width > cur_width) ? max_width : cur_width; } - if (!children || max_width / sqrt(sqdist) < theta) { + if (!children || max_width * max_width < sqdist * theta * theta) { // Compute and add t-SNE force between point and current node sqdist = 1.0 / (1.0 + sqdist); double mult = cum_size * sqdist;