1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2009, Xavier Delacour, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 // 2009-01-12, Xavier Delacour <xavier.delacour@gmail.com>
45 // * hash perf could be improved
46 // * in particular, implement integer only (converted fixed from float input)
48 // * number of hash functions could be reduced (andoni paper)
50 // * redundant distance computations could be suppressed
52 // * rework CvLSHOperations interface-- move some of the loops into it to
53 // * allow efficient async storage
56 // Datar, M., Immorlica, N., Indyk, P., and Mirrokni, V. S. 2004. Locality-sensitive hashing
57 // scheme based on p-stable distributions. In Proceedings of the Twentieth Annual Symposium on
58 // Computational Geometry (Brooklyn, New York, USA, June 08 - 11, 2004). SCG '04. ACM, New York,
59 // NY, 253-262. DOI= http://doi.acm.org/10.1145/997817.997857
70 class memory_hash_ops : public CvLSHOperations {
73 std::vector<int> free_data;
77 std::vector<node> nodes;
78 std::vector<int> free_nodes;
79 std::vector<int> bins;
82 memory_hash_ops(int _d, int n) : d(_d) {
86 virtual int vector_add(const void* _p) {
87 const T* p = (const T*)_p;
89 if (free_data.empty()) {
91 data.insert(data.end(), d, 0);
93 i = free_data.end()[-1];
96 std::copy(p, p + d, data.begin() + i);
99 virtual void vector_remove(int i) {
100 free_data.push_back(i * d);
102 virtual const void* vector_lookup(int i) {
105 virtual void vector_reserve(int n) {
108 virtual unsigned int vector_count() {
109 return (unsigned)(data.size() / d - free_data.size());
112 virtual void hash_insert(lsh_hash h, int /*l*/, int i) {
114 if (free_nodes.empty()) {
115 ii = (int)nodes.size();
116 nodes.push_back(node());
118 ii = free_nodes.end()[-1];
119 free_nodes.pop_back();
122 int h1 = h.h1 % bins.size();
128 virtual void hash_remove(lsh_hash h, int /*l*/, int i) {
129 int h1 = h.h1 % bins.size();
130 for (int ii = bins[h1], iin, iip = -1; ii != -1; iip = ii, ii = iin) {
131 iin = nodes[ii].next;
132 if (nodes[ii].h2 == h.h2 && nodes[ii].i == i) {
133 free_nodes.push_back(ii);
137 nodes[iip].next = iin;
141 virtual int hash_lookup(lsh_hash h, int /*l*/, int* ret_i, int ret_i_max) {
142 int h1 = h.h1 % bins.size();
144 for (int ii = bins[h1]; ii != -1 && k < ret_i_max; ii = nodes[ii].next)
145 if (nodes[ii].h2 == h.h2)
146 ret_i[k++] = nodes[ii].i;
151 template <class T,int cvtype>
152 class pstable_l2_func {
153 CvMat *a, *b, *r1, *r2;
156 pstable_l2_func(const pstable_l2_func& x);
157 pstable_l2_func& operator= (const pstable_l2_func& rhs);
159 typedef T scalar_type;
160 typedef T accum_type;
161 pstable_l2_func(int _d, int _k, double _r, CvRNG& rng)
162 : d(_d), k(_k), r(_r) {
163 assert(sizeof(T) == CV_ELEM_SIZE1(cvtype));
164 a = cvCreateMat(k, d, cvtype);
165 b = cvCreateMat(k, 1, cvtype);
166 r1 = cvCreateMat(k, 1, CV_32SC1);
167 r2 = cvCreateMat(k, 1, CV_32SC1);
168 cvRandArr(&rng, a, CV_RAND_NORMAL, cvScalar(0), cvScalar(1));
169 cvRandArr(&rng, b, CV_RAND_UNI, cvScalar(0), cvScalar(r));
170 cvRandArr(&rng, r1, CV_RAND_UNI,
171 cvScalar(std::numeric_limits<int>::min()),
172 cvScalar(std::numeric_limits<int>::max()));
173 cvRandArr(&rng, r2, CV_RAND_UNI,
174 cvScalar(std::numeric_limits<int>::min()),
175 cvScalar(std::numeric_limits<int>::max()));
184 // * factor all L functions into this (reduces number of matrices to 4 total;
185 // * simpler syntax in lsh_table). give parameter l here that tells us which
188 lsh_hash operator() (const T* x) const {
189 const T* aj = (const T*)a->data.ptr;
190 const T* bj = (const T*)b->data.ptr;
194 for (int j = 0; j < k; ++j) {
196 for (int jj = 0; jj < d; ++jj)
201 h.h1 += r1->data.i[j] * si;
202 h.h2 += r2->data.i[j] * si;
209 accum_type distance(const T* p, const T* q) const {
211 for (int j = 0; j < d; ++j) {
212 accum_type d1 = p[j] - q[j];
222 typedef typename H::scalar_type scalar_type;
223 typedef typename H::accum_type accum_type;
226 CvLSHOperations* ops;
230 static accum_type comp_dist(const std::pair<int,accum_type>& x,
231 const std::pair<int,accum_type>& y) {
232 return x.second < y.second;
235 lsh_table(const lsh_table& x);
236 lsh_table& operator= (const lsh_table& rhs);
238 lsh_table(CvLSHOperations* _ops, int _d, int _L, int _k, double _r, CvRNG& rng)
239 : ops(_ops), d(_d), L(_L), k(_k), r(_r) {
241 for (int j = 0; j < L; ++j)
242 g[j] = new H(d, k, r, rng);
245 for (int j = 0; j < L; ++j)
253 unsigned int size() const {
254 return ops->vector_count();
257 void add(const scalar_type* data, int n, int* ret_indices = 0) {
258 for (int j=0;j<n;++j) {
259 const scalar_type* x = data+j*d;
260 int i = ops->vector_add(x);
264 for (int l = 0; l < L; ++l) {
265 lsh_hash h = (*g[l])(x);
266 ops->hash_insert(h, l, i);
270 void remove(const int* indices, int n) {
271 for (int j = 0; j < n; ++j) {
273 const scalar_type* x = (const scalar_type*)ops->vector_lookup(i);
275 for (int l = 0; l < L; ++l) {
276 lsh_hash h = (*g[l])(x);
277 ops->hash_remove(h, l, i);
279 ops->vector_remove(i);
282 void query(const scalar_type* q, int k0, int emax, double* dist, int* results) {
283 int* tmp = (int*)cvStackAlloc(sizeof(int) * emax);
284 typedef std::pair<int, accum_type> dr_type; // * swap int and accum_type here, for naming consistency
285 dr_type* dr = (dr_type*)cvStackAlloc(sizeof(dr_type) * k0);
288 // * handle k0 >= emax, in which case don't track max distance
290 for (int l = 0; l < L && emax > 0; ++l) {
291 lsh_hash h = (*g[l])(q);
292 int m = ops->hash_lookup(h, l, tmp, emax);
293 for (int j = 0; j < m && emax > 0; ++j, --emax) {
295 const scalar_type* p = (const scalar_type*)ops->vector_lookup(i);
296 accum_type pd = (*g[l]).distance(p, q);
298 dr[k1++] = std::make_pair(i, pd);
299 std::push_heap(dr, dr + k1, comp_dist);
300 } else if (pd < dr[0].second) {
301 std::pop_heap(dr, dr + k0, comp_dist);
302 dr[k0 - 1] = std::make_pair(i, pd);
303 std::push_heap(dr, dr + k0, comp_dist);
308 for (int j = 0; j < k1; ++j)
309 dist[j] = dr[j].second, results[j] = dr[j].first;
310 std::fill(dist + k1, dist + k0, 0);
311 std::fill(results + k1, results + k0, -1);
313 void query(const scalar_type* data, int n, int k0, int emax, double* dist, int* results) {
314 for (int j = 0; j < n; ++j) {
315 query(data, k0, emax, dist, results);
316 data += d; // * this may not agree with step for some scalar_types
323 typedef lsh_table<pstable_l2_func<float, CV_32FC1> > lsh_pstable_l2_32f;
324 typedef lsh_table<pstable_l2_func<double, CV_64FC1> > lsh_pstable_l2_64f;
329 lsh_pstable_l2_32f* lsh_32f;
330 lsh_pstable_l2_64f* lsh_64f;
334 CvLSH* cvCreateLSH(CvLSHOperations* ops, int d, int L, int k, int type, double r, int64 seed) {
336 CvRNG rng = cvRNG(seed);
339 CV_FUNCNAME("cvCreateLSH");
341 if (type != CV_32FC1 && type != CV_64FC1)
342 CV_ERROR(CV_StsUnsupportedFormat, "vectors must be either CV_32FC1 or CV_64FC1");
346 case CV_32FC1: lsh->u.lsh_32f = new lsh_pstable_l2_32f(ops, d, L, k, r, rng); break;
347 case CV_64FC1: lsh->u.lsh_64f = new lsh_pstable_l2_64f(ops, d, L, k, r, rng); break;
354 CvLSH* cvCreateMemoryLSH(int d, int n, int L, int k, int type, double r, int64 seed) {
355 CvLSHOperations* ops = 0;
358 case CV_32FC1: ops = new memory_hash_ops<float>(d,n); break;
359 case CV_64FC1: ops = new memory_hash_ops<double>(d,n); break;
361 return cvCreateLSH(ops, d, L, k, type, r, seed);
364 void cvReleaseLSH(CvLSH** lsh) {
365 switch ((*lsh)->type) {
366 case CV_32FC1: delete (*lsh)->u.lsh_32f; break;
367 case CV_64FC1: delete (*lsh)->u.lsh_64f; break;
374 unsigned int LSHSize(CvLSH* lsh) {
376 case CV_32FC1: return lsh->u.lsh_32f->size(); break;
377 case CV_64FC1: return lsh->u.lsh_64f->size(); break;
384 void cvLSHAdd(CvLSH* lsh, const CvMat* data, CvMat* indices) {
386 int* ret_indices = 0;
389 CV_FUNCNAME("cvLSHAdd");
392 case CV_32FC1: dims = lsh->u.lsh_32f->dims(); break;
393 case CV_64FC1: dims = lsh->u.lsh_64f->dims(); break;
394 default: assert(0); return;
399 if (dims != data->cols)
400 CV_ERROR(CV_StsBadSize, "data must be n x d, where d is what was used to construct LSH");
402 if (CV_MAT_TYPE(data->type) != lsh->type)
403 CV_ERROR(CV_StsUnsupportedFormat, "type of data and constructed LSH must agree");
405 if (CV_MAT_TYPE(indices->type) != CV_32SC1)
406 CV_ERROR(CV_StsUnsupportedFormat, "indices must be CV_32SC1");
407 if (indices->rows * indices->cols != n)
408 CV_ERROR(CV_StsBadSize, "indices must be n x 1 or 1 x n for n x d data");
409 ret_indices = indices->data.i;
413 case CV_32FC1: lsh->u.lsh_32f->add(data->data.fl, n, ret_indices); break;
414 case CV_64FC1: lsh->u.lsh_64f->add(data->data.db, n, ret_indices); break;
415 default: assert(0); return;
420 void cvLSHRemove(CvLSH* lsh, const CvMat* indices) {
424 CV_FUNCNAME("cvLSHRemove");
426 if (CV_MAT_TYPE(indices->type) != CV_32SC1)
427 CV_ERROR(CV_StsUnsupportedFormat, "indices must be CV_32SC1");
428 n = indices->rows * indices->cols;
430 case CV_32FC1: lsh->u.lsh_32f->remove(indices->data.i, n); break;
431 case CV_64FC1: lsh->u.lsh_64f->remove(indices->data.i, n); break;
432 default: assert(0); return;
437 void cvLSHQuery(CvLSH* lsh, const CvMat* data, CvMat* indices, CvMat* dist, int k, int emax) {
441 CV_FUNCNAME("cvLSHQuery");
444 case CV_32FC1: dims = lsh->u.lsh_32f->dims(); break;
445 case CV_64FC1: dims = lsh->u.lsh_64f->dims(); break;
446 default: assert(0); return;
450 CV_ERROR(CV_StsOutOfRange, "k must be positive");
451 if (CV_MAT_TYPE(data->type) != lsh->type)
452 CV_ERROR(CV_StsUnsupportedFormat, "type of data and constructed LSH must agree");
453 if (dims != data->cols)
454 CV_ERROR(CV_StsBadSize, "data must be n x d, where d is what was used to construct LSH");
455 if (dist->rows != data->rows || dist->cols != k)
456 CV_ERROR(CV_StsBadSize, "dist must be n x k for n x d data");
457 if (dist->rows != indices->rows || dist->cols != indices->cols)
458 CV_ERROR(CV_StsBadSize, "dist and indices must be same size");
459 if (CV_MAT_TYPE(dist->type) != CV_64FC1)
460 CV_ERROR(CV_StsUnsupportedFormat, "dist must be CV_64FC1");
461 if (CV_MAT_TYPE(indices->type) != CV_32SC1)
462 CV_ERROR(CV_StsUnsupportedFormat, "indices must be CV_32SC1");
465 case CV_32FC1: lsh->u.lsh_32f->query(data->data.fl, data->rows,
466 k, emax, dist->data.db, indices->data.i); break;
467 case CV_64FC1: lsh->u.lsh_64f->query(data->data.db, data->rows,
468 k, emax, dist->data.db, indices->data.i); break;
469 default: assert(0); return;