@@ -57,9 +57,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
5757 self .diagonal = diagonal
5858 self .diagonal_c = diagonal_c
5959 self .verbose = verbose
60-
60+
6161 def fit (self , X , constraints ):
6262 """Learn the MMC model.
63+
6364 Parameters
6465 ----------
6566 X : (n x d) data matrix
@@ -73,11 +74,11 @@ def fit(self, X, constraints):
7374 return self ._fit_diag (X , constraints )
7475 else :
7576 return self ._fit_full (X , constraints )
76-
77+
7778 def _process_inputs (self , X , constraints ):
78-
79+
7980 self .X_ = X = check_array (X )
80-
81+
8182 # check to make sure that no two constrained vectors are identical
8283 a ,b ,c ,d = constraints
8384 no_ident = vector_norm (X [a ] - X [b ]) > 1e-9
@@ -88,7 +89,7 @@ def _process_inputs(self, X, constraints):
8889 raise ValueError ('No non-trivial similarity constraints given for MMC.' )
8990 if len (c ) == 0 :
9091 raise ValueError ('No non-trivial dissimilarity constraints given for MMC.' )
91-
92+
9293 # init metric
9394 if self .A0 is None :
9495 self .A_ = np .identity (X .shape [1 ])
@@ -98,11 +99,12 @@ def _process_inputs(self, X, constraints):
9899 self .A_ /= 10.0
99100 else :
100101 self .A_ = check_array (self .A0 )
101-
102+
102103 return a ,b ,c ,d
103104
104105 def _fit_full (self , X , constraints ):
105106 """Learn full metric using MMC.
107+
106108 Parameters
107109 ----------
108110 X : (n x d) data matrix
@@ -115,11 +117,11 @@ def _fit_full(self, X, constraints):
115117 num_pos = len (a )
116118 num_neg = len (c )
117119 num_samples , num_dim = X .shape
118-
120+
119121 error1 = error2 = 1e10
120122 eps = 0.01 # error-bound of iterative projection on C1 and C2
121123 A = self .A_
122-
124+
123125 # Create weight vector from similar samples
124126 pos_diff = X [a ] - X [b ]
125127 w = np .einsum ('ij,ik->jk' , pos_diff , pos_diff ).ravel ()
@@ -131,27 +133,27 @@ def _fit_full(self, X, constraints):
131133 # X[a] - X[b]
132134 # ).sum(axis = 0)
133135 t = w .dot (A .ravel ()) / 100.0
134-
136+
135137 w_norm = np .linalg .norm (w )
136138 w1 = w / w_norm # make `w` a unit vector
137139 t1 = t / w_norm # distance from origin to `w^T*x=t` plane
138-
140+
139141 cycle = 1
140142 alpha = 0.1 # initial step size along gradient
141-
143+
142144 grad1 = self ._fS1 (X , a , b , A ) # gradient of similarity constraint function
143145 grad2 = self ._fD1 (X , c , d , A ) # gradient of dissimilarity constraint function
144146 M = self ._grad_projection (grad1 , grad2 ) # gradient of fD1 orthogonal to fS1
145-
147+
146148 A_old = A .copy ()
147149
148150 for cycle in xrange (self .max_iter ):
149-
151+
150152 # projection of constraints C1 and C2
151153 satisfy = False
152-
154+
153155 for it in xrange (self .max_proj ):
154-
156+
155157 # First constraint:
156158 # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1)
157159 # (1) can be rewritten as a linear constraint: w^T x = t,
@@ -164,28 +166,28 @@ def _fit_full(self, X, constraints):
164166 else :
165167 x = x0 + (t1 - w1 .dot (x0 )) * w1
166168 A [:] = x .reshape (num_dim , num_dim )
167-
169+
168170 # Second constraint:
169171 # PSD constraint A >= 0
170172 # project A onto domain A>0
171173 l , V = np .linalg .eigh ((A + A .T ) / 2 )
172174 A [:] = np .dot (V * np .maximum (0 , l [None ,:]), V .T )
173-
175+
174176 fDC2 = w .dot (A .ravel ())
175177 error2 = (fDC2 - t ) / t
176178 if error2 < eps :
177179 satisfy = True
178180 break
179-
181+
180182 # third constraint: gradient ascent
181183 # max: g(A) >= 1
182184 # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij)
183-
185+
184186 obj_previous = self ._fD (X , c , d , A_old ) # g(A_old)
185187 obj = self ._fD (X , c , d , A ) # g(A)
186-
188+
187189 if satisfy and (obj > obj_previous or cycle == 0 ):
188-
190+
189191 # If projection of 1 and 2 is successful, and such projection
190192 # improves objective function, slightly increase learning rate
191193 # and update from the current A.
@@ -195,15 +197,15 @@ def _fit_full(self, X, constraints):
195197 grad1 = self ._fD1 (X , c , d , A )
196198 M = self ._grad_projection (grad1 , grad2 )
197199 A += alpha * M
198-
200+
199201 else :
200-
202+
201203 # If projection of 1 and 2 failed, or obj <= obj_previous due
202204 # to projection of 1 and 2, shrink learning rate and re-update
203205 # from the previous A.
204206 alpha /= 2
205207 A [:] = A_old + alpha * M
206-
208+
207209 delta = np .linalg .norm (alpha * M ) / np .linalg .norm (A_old )
208210 if delta < self .convergence_threshold :
209211 break
@@ -221,7 +223,7 @@ def _fit_full(self, X, constraints):
221223 self .A_ [:] = A_old
222224 self .n_iter_ = cycle
223225 return self
224-
226+
225227 def _fit_diag (self , X , constraints ):
226228 """Learn diagonal metric using MMC.
227229 Parameters
@@ -236,33 +238,33 @@ def _fit_diag(self, X, constraints):
236238 num_pos = len (a )
237239 num_neg = len (c )
238240 num_samples , num_dim = X .shape
239-
241+
240242 s_sum = np .sum ((X [a ] - X [b ]) ** 2 , axis = 0 )
241-
243+
242244 it = 0
243245 error = 1.0
244246 eps = 1e-6
245247 reduction = 2.0
246248 w = np .diag (self .A_ ).copy ()
247-
249+
248250 while error > self .convergence_threshold :
249-
251+
250252 fD0 , fD_1st_d , fD_2nd_d = self ._D_constraint (X , c , d , w )
251253 obj_initial = np .dot (s_sum , w ) + self .diagonal_c * fD0
252254 fS_1st_d = s_sum # first derivative of the similarity constraints
253-
255+
254256 gradient = fS_1st_d - self .diagonal_c * fD_1st_d # gradient of the objective
255257 hessian = - self .diagonal_c * fD_2nd_d + eps * np .eye (num_dim ) # Hessian of the objective
256258 step = np .dot (np .linalg .inv (hessian ), gradient );
257-
259+
258260 # Newton-Rapshon update
259261 # search over optimal lambda
260262 lambd = 1 # initial step-size
261263 w_tmp = np .maximum (0 , w - lambd * step )
262-
264+
263265 obj = np .dot (s_sum , w_tmp ) + self .diagonal_c * self ._D_objective (X , c , d , w_tmp )
264266 obj_previous = obj * 1.1 # just to get the while-loop started
265-
267+
266268 inner_it = 0
267269 while obj < obj_previous :
268270 obj_previous = obj
@@ -271,32 +273,32 @@ def _fit_diag(self, X, constraints):
271273 w_tmp = np .maximum (0 , w - lambd * step )
272274 obj = np .dot (s_sum , w_tmp ) + self .diagonal_c * self ._D_objective (X , c , d , w_tmp )
273275 inner_it += 1
274-
276+
275277 w [:] = w_previous
276278 error = np .abs ((obj_previous - obj_initial ) / obj_previous )
277279 if self .verbose :
278280 print ('mmc iter: %d, conv = %f' % (it , error ))
279281 it += 1
280-
282+
281283 self .A_ = np .diag (w )
282284 return self
283285
284286 def _fD (self , X , c , d , A ):
285287 """The value of the dissimilarity constraint function.
286-
288+
287289 f = f(\sum_{ij \in D} distance(x_i, x_j))
288290 i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'}
289291 """
290292 diff = X [c ] - X [d ]
291293 return np .log (np .sum (np .sqrt (np .sum (np .dot (diff , A ) * diff , axis = 1 ))) + 1e-6 )
292-
294+
293295 def _fD1 (self , X , c , d , A ):
294296 """The gradient of the dissimilarity constraint function w.r.t. A.
295-
297+
296298 For example, let distance by L1 norm:
297299 f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'})
298300 df/dA_{kl} = f'* d(\sum_{ij \in D} \sqrt{(x_i-x_j)^k*(x_i-x_j)^l})/dA_{kl}
299-
301+
300302 Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A)
301303 so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij
302304 df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)})
@@ -312,31 +314,31 @@ def _fD1(self, X, c, d, A):
312314 sum_deri = np .einsum ('ijk,i->jk' , M , 0.5 / (dist + 1e-6 ))
313315 sum_dist = dist .sum ()
314316 return sum_deri / (sum_dist + 1e-6 )
315-
317+
316318 def _fS1 (self , X , a , b , A ):
317319 """The gradient of the similarity constraint function w.r.t. A.
318-
320+
319321 f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij'
320322 df/dA = d(d_ij*A*d_ij')/dA
321-
323+
322324 Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A)
323325 so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij
324326 """
325327 dim = X .shape [1 ]
326328 diff = X [a ] - X [b ]
327329 return np .einsum ('ij,ik->jk' , diff , diff ) # sum of outer products of all rows in `diff`
328-
330+
329331 def _grad_projection (self , grad1 , grad2 ):
330332 grad2 = grad2 / np .linalg .norm (grad2 )
331333 gtemp = grad1 - np .sum (grad1 * grad2 ) * grad2
332334 gtemp /= np .linalg .norm (gtemp )
333335 return gtemp
334-
336+
335337 def _D_objective (self , X , c , d , w ):
336338 return np .log (np .sum (np .sqrt (np .sum (((X [c ] - X [d ]) ** 2 ) * w [None ,:], axis = 1 ) + 1e-6 )))
337-
339+
338340 def _D_constraint (self , X , c , d , w ):
339- """Compute the value, 1st derivative, second derivative (Hessian) of
341+ """Compute the value, 1st derivative, second derivative (Hessian) of
340342 a dissimilarity constraint function gF(sum_ij distance(d_ij A d_ij))
341343 where A is a diagonal matrix (in the form of a column vector 'w').
342344 """
@@ -355,18 +357,18 @@ def _D_constraint(self, X, c, d, w):
355357 sum_deri1 / sum_dist ,
356358 sum_deri2 / sum_dist - np .outer (sum_deri1 , sum_deri1 ) / (sum_dist * sum_dist )
357359 )
358-
360+
359361 def metric (self ):
360362 return self .A_
361-
363+
362364 def transformer (self ):
363365 """Computes the transformation matrix from the Mahalanobis matrix.
364366 L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with
365367 the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors.
366-
368+
367369 The Cholesky decomposition cannot be applied here, since MMC learns only a positive
368370 *semi*-definite Mahalanobis matrix.
369-
371+
370372 Returns
371373 -------
372374 L : (d x d) matrix
@@ -384,6 +386,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
384386 num_labeled = np .inf , num_constraints = None ,
385387 A0 = None , diagonal = False , diagonal_c = 1.0 , verbose = False ):
386388 """Initialize the learner.
389+
387390 Parameters
388391 ----------
389392 max_iter : int, optional
@@ -414,6 +417,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
414417
415418 def fit (self , X , y , random_state = np .random ):
416419 """Create constraints from labels and learn the MMC model.
420+
417421 Parameters
418422 ----------
419423 X : (n x d) matrix
0 commit comments