Skip to content

Commit ce5f238

Browse files
Converting constraints to a nice object
A big refactor, but I think it makes things cleaner.
1 parent e31fb50 commit ce5f238

File tree

12 files changed

+151
-118
lines changed

12 files changed

+151
-118
lines changed

examples/sandwich.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
from sklearn.metrics import pairwise_distances
88
from sklearn.neighbors import NearestNeighbors
99

10-
import metric_learn.constraints as C
11-
from metric_learn import ITML, LMNN, LSML, SDML
10+
from metric_learn import LMNN, ITML_Supervised, LSML_Supervised, SDML_Supervised
1211

1312

1413
def sandwich_demo():
@@ -22,22 +21,21 @@ def sandwich_demo():
2221
ax.set_xticks([])
2322
ax.set_yticks([])
2423

25-
num_constraints = 60
2624
mls = [
27-
(LMNN(), (x, y)),
28-
(ITML(), (x, C.positive_negative_pairs(y, len(x), num_constraints))),
29-
(SDML(), (x, C.adjacency_matrix(y, len(x), num_constraints))),
30-
(LSML(), (x, C.relative_quadruplets(y, num_constraints)))
25+
LMNN(),
26+
ITML_Supervised(num_constraints=200),
27+
SDML_Supervised(num_constraints=200),
28+
LSML_Supervised(num_constraints=200),
3129
]
3230

33-
for ax_num, (ml,args) in zip(range(3,7), mls):
34-
ml.fit(*args)
31+
for ax_num, ml in enumerate(mls, start=3):
32+
ml.fit(x, y)
3533
tx = ml.transform()
3634
ml_knn = nearest_neighbors(tx, k=2)
37-
ax = plt.subplot(3,2,ax_num)
38-
plot_sandwich_data(tx, y, ax)
39-
plot_neighborhood_graph(tx, ml_knn, y, ax)
40-
ax.set_title('%s space' % ml.__class__.__name__)
35+
ax = plt.subplot(3, 2, ax_num)
36+
plot_sandwich_data(tx, y, axis=ax)
37+
plot_neighborhood_graph(tx, ml_knn, y, axis=ax)
38+
ax.set_title(ml.__class__.__name__)
4139
ax.set_xticks([])
4240
ax.set_yticks([])
4341
plt.show()

metric_learn/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import absolute_import
22

3+
from .constraints import Constraints
34
from .covariance import Covariance
45
from .itml import ITML, ITML_Supervised
56
from .lmnn import LMNN

metric_learn/constraints.py

Lines changed: 74 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,65 +4,86 @@
44
"""
55
import numpy as np
66
import random
7+
import warnings
78
from six.moves import xrange
9+
from scipy.sparse import coo_matrix
810

9-
# @TODO: consider creating a stateful class
10-
# https://github.com/all-umass/metric-learn/pull/19#discussion_r67386226
11+
__all__ = ['Constraints']
1112

1213

13-
def adjacency_matrix(labels, num_points, num_constraints):
14-
a, c = np.random.randint(len(labels), size=(2,num_constraints))
15-
b, d = np.empty((2, num_constraints), dtype=int)
16-
for i,(al,cl) in enumerate(zip(labels[a],labels[c])):
17-
b[i] = random.choice(np.nonzero(labels == al)[0])
18-
d[i] = random.choice(np.nonzero(labels != cl)[0])
19-
W = np.zeros((num_points,num_points))
20-
W[a,b] = 1
21-
W[c,d] = -1
22-
# make W symmetric
23-
W[b,a] = 1
24-
W[d,c] = -1
25-
return W
14+
class Constraints(object):
15+
def __init__(self, partial_labels):
16+
'''partial_labels : int arraylike, -1 indicating unknown label'''
17+
partial_labels = np.asanyarray(partial_labels)
18+
self.num_points, = partial_labels.shape
19+
self.known_label_idx, = np.where(partial_labels >= 0)
20+
self.known_labels = partial_labels[self.known_label_idx]
2621

22+
def adjacency_matrix(self, num_constraints):
23+
a, b, c, d = self.positive_negative_pairs(num_constraints)
24+
row = np.concatenate((a, c))
25+
col = np.concatenate((b, d))
26+
data = np.ones_like(row, dtype=int)
27+
data[len(a):] = -1
28+
adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2)
29+
# symmetrize
30+
return adj + adj.T
2731

28-
def positive_negative_pairs(labels, num_points, num_constraints):
29-
ac,bd = np.random.randint(num_points, size=(2,num_constraints))
30-
pos = labels[ac] == labels[bd]
31-
a,c = ac[pos], ac[~pos]
32-
b,d = bd[pos], bd[~pos]
33-
return a,b,c,d
32+
def positive_negative_pairs(self, num_constraints, same_length=False):
33+
a, b = self._pairs(num_constraints, same_label=True)
34+
c, d = self._pairs(num_constraints, same_label=False)
35+
if same_length and len(a) != len(c):
36+
n = min(len(a), len(c))
37+
return a[:n], b[:n], c[:n], d[:n]
38+
return a, b, c, d
3439

40+
def _pairs(self, num_constraints, same_label=True, max_iter=10):
41+
num_labels = len(self.known_labels)
42+
ab = set()
43+
it = 0
44+
while it < max_iter and len(ab) < num_constraints:
45+
nc = num_constraints - len(ab)
46+
for aidx in np.random.randint(num_labels, size=nc):
47+
if same_label:
48+
mask = self.known_labels[aidx] == self.known_labels
49+
mask[aidx] = False # avoid identity pairs
50+
else:
51+
mask = self.known_labels[aidx] != self.known_labels
52+
b_choices, = np.where(mask)
53+
if len(b_choices) > 0:
54+
ab.add((aidx, np.random.choice(b_choices)))
55+
it += 1
56+
if len(ab) < num_constraints:
57+
warnings.warn("Only generated %d %s constraints (requested %d)" % (
58+
len(ab), 'positive' if same_label else 'negative', num_constraints))
59+
ab = np.array(list(ab)[:num_constraints], dtype=int)
60+
return self.known_label_idx[ab.T]
3561

36-
def relative_quadruplets(labels, num_constraints):
37-
C = np.empty((num_constraints,4), dtype=int)
38-
a, c = np.random.randint(len(labels), size=(2,num_constraints))
39-
for i,(al,cl) in enumerate(zip(labels[a],labels[c])):
40-
C[i,1] = random.choice(np.nonzero(labels == al)[0])
41-
C[i,3] = random.choice(np.nonzero(labels != cl)[0])
42-
C[:,0] = a
43-
C[:,2] = c
44-
return C
62+
def chunks(self, num_chunks=100, chunk_size=2):
63+
chunks = -np.ones_like(self.known_label_idx, dtype=int)
64+
uniq, lookup = np.unique(self.known_labels, return_inverse=True)
65+
all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))]
66+
idx = 0
67+
while idx < num_chunks and all_inds:
68+
c = random.randint(0, len(all_inds)-1)
69+
inds = all_inds[c]
70+
if len(inds) < chunk_size:
71+
del all_inds[c]
72+
continue
73+
ii = random.sample(inds, chunk_size)
74+
inds.difference_update(ii)
75+
chunks[ii] = idx
76+
idx += 1
77+
if idx < num_chunks:
78+
raise ValueError('Unable to make %d chunks of %d examples each' %
79+
(num_chunks, chunk_size))
80+
return chunks
4581

46-
47-
def chunks(Y, num_chunks=100, chunk_size=2, seed=None):
48-
# @TODO: remove seed from params and use numpy RandomState
49-
# https://github.com/all-umass/metric-learn/pull/19#discussion_r67386666
50-
random.seed(seed)
51-
chunks = -np.ones_like(Y, dtype=int)
52-
uniq, lookup = np.unique(Y, return_inverse=True)
53-
all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))]
54-
idx = 0
55-
while idx < num_chunks and all_inds:
56-
c = random.randint(0, len(all_inds)-1)
57-
inds = all_inds[c]
58-
if len(inds) < chunk_size:
59-
del all_inds[c]
60-
continue
61-
ii = random.sample(inds, chunk_size)
62-
inds.difference_update(ii)
63-
chunks[ii] = idx
64-
idx += 1
65-
if idx < num_chunks:
66-
raise ValueError('Unable to make %d chunks of %d examples each' %
67-
(num_chunks, chunk_size))
68-
return chunks
82+
@staticmethod
83+
def random_subset(all_labels, num_preserved=np.inf):
84+
n = len(all_labels)
85+
num_ignored = max(0, n - num_preserved)
86+
idx = np.random.randint(n, size=num_ignored)
87+
partial_labels = np.array(all_labels, copy=True)
88+
partial_labels[idx] = -1
89+
return Constraints(partial_labels)

metric_learn/covariance.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from __future__ import absolute_import
1212
import numpy as np
13+
1314
from .base_metric import BaseMetricLearner
1415

1516

metric_learn/itml.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
from six.moves import xrange
1717
from sklearn.metrics import pairwise_distances
1818

19-
from . import constraints
2019
from .base_metric import BaseMetricLearner
20+
from .constraints import Constraints
2121

2222

2323
class ITML(BaseMetricLearner):
@@ -70,7 +70,7 @@ def fit(self, X, constraints, bounds=None, A0=None):
7070
----------
7171
X : (n x d) data matrix
7272
each row corresponds to a single instance
73-
constraints : tuple of arrays
73+
constraints : 4-tuple of arrays
7474
(a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d])
7575
bounds : list (pos,neg) pairs, optional
7676
bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg
@@ -142,7 +142,8 @@ def _vector_norm(X):
142142
class ITML_Supervised(ITML):
143143
"""Information Theoretic Metric Learning (ITML)"""
144144
def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3,
145-
num_constraints=None, bounds=None, A0=None, verbose=False):
145+
num_labeled=np.inf, num_constraints=None, bounds=None, A0=None,
146+
verbose=False):
146147
"""Initialize the learner.
147148
148149
Parameters
@@ -151,17 +152,17 @@ def __init__(self, gamma=1., max_iters=1000, convergence_threshold=1e-3,
151152
value for slack variables
152153
max_iters : int, optional
153154
convergence_threshold : float, optional
154-
num_constraints: int, needed for .fit()
155+
num_labeled : int, optional
156+
number of labels to preserve for training
157+
num_constraints: int, optional
158+
number of constraints to generate
155159
verbose : bool, optional
156160
if True, prints information while learning
157161
"""
158162
ITML.__init__(self, gamma=gamma, max_iters=max_iters,
159163
convergence_threshold=convergence_threshold, verbose=verbose)
160-
self.params.update({
161-
'num_constraints': num_constraints,
162-
'bounds': bounds,
163-
'A0': A0,
164-
})
164+
self.params.update(num_labeled=num_labeled, num_constraints=num_constraints,
165+
bounds=bounds, A0=A0)
165166

166167
def fit(self, X, labels):
167168
"""Create constraints from labels and learn the ITML model.
@@ -178,6 +179,6 @@ def fit(self, X, labels):
178179
num_classes = np.unique(labels)
179180
num_constraints = 20*(len(num_classes))**2
180181

181-
C = constraints.positive_negative_pairs(labels, X.shape[0], num_constraints)
182-
return ITML.fit(self, X, C, bounds=self.params['bounds'],
183-
A0=self.params['A0'])
182+
c = Constraints.random_subset(labels, self.params['num_labeled'])
183+
return ITML.fit(self, X, c.positive_negative_pairs(num_constraints),
184+
bounds=self.params['bounds'], A0=self.params['A0'])

metric_learn/lfda.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import scipy
1616
from six.moves import xrange
1717
from sklearn.metrics import pairwise_distances
18+
1819
from .base_metric import BaseMetricLearner
1920

2021

metric_learn/lmnn.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from collections import Counter
1515
from six.moves import xrange
1616
from sklearn.metrics import pairwise_distances
17+
1718
from .base_metric import BaseMetricLearner
1819

1920

@@ -237,10 +238,12 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None):
237238

238239
class LMNN(_base_LMNN):
239240
def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
240-
regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False):
241+
regularization=0.5, convergence_tol=0.001, use_pca=True,
242+
verbose=False):
241243
_base_LMNN.__init__(self, k=k, min_iter=min_iter, max_iter=max_iter,
242244
learn_rate=learn_rate, regularization=regularization,
243-
convergence_tol=convergence_tol, use_pca=use_pca, verbose=verbose)
245+
convergence_tol=convergence_tol, use_pca=use_pca,
246+
verbose=verbose)
244247

245248
def fit(self, X, labels):
246249
self.X = X

metric_learn/lsml.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
import scipy.linalg
1313
from six.moves import xrange
1414

15-
from . import constraints
1615
from .base_metric import BaseMetricLearner
16+
from .constraints import Constraints
1717

1818

1919
class LSML(BaseMetricLearner):
@@ -35,10 +35,12 @@ def __init__(self, tol=1e-3, max_iter=1000, verbose=False):
3535

3636
def _prepare_inputs(self, X, constraints, weights, prior):
3737
self.X = X
38-
self.vab = np.diff(X[constraints[:,:2]], axis=1)[:,0]
39-
self.vcd = np.diff(X[constraints[:,2:]], axis=1)[:,0]
38+
a,b,c,d = constraints
39+
self.vab = X[a] - X[b]
40+
self.vcd = X[c] - X[d]
41+
assert self.vab.shape == self.vcd.shape, 'Constraints must have same length'
4042
if weights is None:
41-
self.w = np.ones(constraints.shape[0])
43+
self.w = np.ones(self.vab.shape[0])
4244
else:
4345
self.w = weights
4446
self.w /= self.w.sum() # weights must sum to 1
@@ -57,7 +59,7 @@ def fit(self, X, constraints, weights=None, prior=None):
5759
----------
5860
X : (n x d) data matrix
5961
each row corresponds to a single instance
60-
constraints : (m x 4) matrix of ints
62+
constraints : 4-tuple of arrays
6163
(a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d])
6264
weights : (m,) array of floats, optional
6365
scale factor for each constraint
@@ -130,8 +132,8 @@ def _regularization_loss(metric, prior_inv):
130132

131133

132134
class LSML_Supervised(LSML):
133-
def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None,
134-
weights=None, verbose=False):
135+
def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf,
136+
num_constraints=None, weights=None, verbose=False):
135137
"""Initialize the learner.
136138
137139
Parameters
@@ -140,18 +142,18 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None,
140142
max_iter : int, optional
141143
prior : (d x d) matrix, optional
142144
guess at a metric [default: covariance(X)]
143-
num_constraints: int, needed for .fit()
145+
num_labeled : int, optional
146+
number of labels to preserve for training
147+
num_constraints: int, optional
148+
number of constraints to generate
144149
weights : (m,) array of floats, optional
145150
scale factor for each constraint
146151
verbose : bool, optional
147152
if True, prints information while learning
148153
"""
149154
LSML.__init__(self, tol=tol, max_iter=max_iter, verbose=verbose)
150-
self.params.update({
151-
'prior': prior,
152-
'num_constraints': num_constraints,
153-
'weights': weights,
154-
})
155+
self.params.update(prior=prior, num_labeled=num_labeled,
156+
num_constraints=num_constraints, weights=weights)
155157

156158
def fit(self, X, labels):
157159
"""Create constraints from labels and learn the LSML model.
@@ -168,6 +170,7 @@ def fit(self, X, labels):
168170
num_classes = np.unique(labels)
169171
num_constraints = 20*(len(num_classes))**2
170172

171-
C = constraints.relative_quadruplets(labels, num_constraints)
172-
return LSML.fit(self, X, C, weights=self.params['weights'],
173+
c = Constraints.random_subset(labels, self.params['num_labeled'])
174+
pairs = c.positive_negative_pairs(num_constraints, same_length=True)
175+
return LSML.fit(self, X, pairs, weights=self.params['weights'],
173176
prior=self.params['prior'])

metric_learn/nca.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from __future__ import absolute_import
77
import numpy as np
88
from six.moves import xrange
9+
910
from .base_metric import BaseMetricLearner
1011

1112

0 commit comments

Comments
 (0)