-
Notifications
You must be signed in to change notification settings - Fork 3
/
ClassificationBoltzmannMachine.hs
312 lines (255 loc) · 14.1 KB
/
ClassificationBoltzmannMachine.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
{-# LANGUAGE PatternGuards, ScopedTypeVariables #-}
-- | Base Restricted Boltzmann machine.
module Hopfield.Boltzmann.ClassificationBoltzmannMachine where
-- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine
-- Using RBM for recognition
-- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf
-- http://www.dmi.usherb.ca/~larocheh/publications/drbm-mitacs-poster.pdf
import Data.Maybe
import Control.Monad
import Control.Monad.Random
import Data.List
import Data.Vector ((!))
import qualified Data.Vector as V
import qualified Numeric.Container as NC
import Hopfield.Common
import Hopfield.Util
-- In the case of the Boltzmann Machine the weight matrix establishes the
-- weights between visible and hidden neurons
-- w i j - connection between visible neuron i and hidden neuron j
-- | determines the rate in which the weights are changed in the training phase.
-- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm
learningRate :: Double
learningRate = 0.001
data Mode = Hidden | Visible | Classification
deriving(Eq, Show)
data BoltzmannData = BoltzmannData {
weightsB :: Weights -- ^ the weights of the network
, classificationWeights :: Weights -- weights for classification
, biasB :: Bias
, biasC :: Bias
, biasD :: Bias
, patternsB :: [Pattern] -- ^ the patterns which were used to train it
-- can be decuded from weights, maybe should be remove now
, hiddenCount :: Int -- ^ number of neurons in the hidden layer
, pattern_to_class :: [(Pattern, Int)] -- the class of the given pattern
-- classes have to be in consecutive order, from 0
}
deriving(Show)
-- | Retrieves the dimension of the weights matrix corresponding to the given mode.
-- For hidden, it is the width of the matrix, and for visible it is the height.
-- One has to ensure that the appropriate weights matrix is passed with this function.
getDimension :: Mode -> Weights -> Int
getDimension Hidden ws = V.length $ ws
getDimension Visible ws = V.length $ ws ! 0
getDimension Classification ws = V.length $ ws ! 0
-- | @buildCBoltzmannData patterns@ trains a boltzmann network with @patterns@.
-- The number of hidden neurons is set to the number of visible neurons.
buildCBoltzmannData :: MonadRandom m => [Pattern] -> m BoltzmannData
buildCBoltzmannData [] = error "Train patterns are empty"
buildCBoltzmannData pats =
buildCBoltzmannData' pats nr_visible
where nr_visible = V.length (head pats)
-- | @buildCBoltzmannData' patterns nrHidden@: Takes a list of patterns and
-- builds a Boltzmann network (by training) in which these patterns are
-- stable states. The result of this function can be used to run a pattern
-- against the network, by using 'matchPatternBoltzmann'.
buildCBoltzmannData' :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData
buildCBoltzmannData' [] _ = error "Train patterns are empty"
buildCBoltzmannData' pats nrHidden
| first_len == 0
= error "Cannot have empty patterns"
| any (\x -> V.length x /= first_len) pats
= error "All training patterns must have the same length"
| otherwise = trainBoltzmann pats nrHidden
where
first_len = V.length $ head pats
-- | @getActivationProbability ws bias pat index@
-- can be used to compute the activation probability for a neuron in the
-- visible layer, or for parts of the sums requires for
-- the probability of the classifications
getActivationSum :: Weights -> Bias -> Pattern -> Int -> Double
getActivationSum ws bias pat index
= bias ! index + dotProduct (columnVector ws index) (toDouble pat)
-- | @getActivationProbabilityVisible ws bias h index@ returns the activation
-- probability for a neuron @index@ in a visible pattern, given the weights
-- matrix @ws@, the vector of biases @bias@. Applies the activation function
-- to the activation sum, in order to obtain the probability.
getActivationProbabilityVisible :: Weights -> Bias -> Pattern -> Int -> Double
getActivationProbabilityVisible ws bias h index
= activation $ getActivationSum ws bias h index
-- | @getActivationSumHidden ws bias h index@ returns the activation
-- sum for a neuron @index@ in a hidden pattern, given the weights
-- matrix @ws@, the vector of biases @bias@.
getActivationSumHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double
getActivationSumHidden ws u c v y index
| Just e <- validPattern Visible ws v = error e
| Just e <- validPattern Classification u y = error e
| otherwise = c ! index + dotProduct (ws ! index) (toDouble v) + dotProduct (u ! index) (toDouble y)
-- | @getActivationSumHidden ws bias h index@ returns the activation
-- sum for all neurons in the hidden pattern, given the weights
-- matrix @ws@, the vector of biases @bias@.
getHiddenSums :: Weights -> Weights -> Bias -> Pattern -> Pattern -> V.Vector Double
getHiddenSums ws u c v y
= V.fromList [getActivationSumHidden ws u c v y i | i <- [0 .. (V.length ws) - 1] ]
-- | @getActivationProbabilityVisible ws u bias v index@ returns the activation
-- probability for a neuron @index@ in a hidden pattern, given the weights
-- matrices @ws@ and @u@, the vector of biases @bias@. Applies the activation function
-- to the activation sum, in order to obtain the probability.
getActivationProbabilityHidden :: Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> Double
getActivationProbabilityHidden ws u c v y index
= activation $ getActivationSumHidden ws u c v y index
-- | @updateNeuronVisible ws bias h index@ updates a neuron in the visible layer by using gibbsSampling, according
-- to the activation probability
updateNeuronVisible :: MonadRandom m => Weights -> Bias -> Pattern -> Int -> m Int
updateNeuronVisible ws bias h index
= gibbsSampling $ getActivationProbabilityVisible ws bias h index
-- | Updates a neuron in the hidden layer by using gibbsSampling, according
-- to the activation probability
updateNeuronHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> Int -> m Int
updateNeuronHidden ws u c v y index
= gibbsSampling $ getActivationProbabilityHidden ws u c v y index
-- | Updates the entire visible layer by using gibbsSampling, according
-- to the activation probability
updateVisible :: MonadRandom m => Weights -> Bias -> Pattern -> m Pattern
updateVisible ws bias h
| Just e <- validPattern Hidden ws h = error e
| otherwise = V.fromList `liftM` mapM (updateNeuronVisible ws bias h) updatedIndices
where
updatedIndices = [0 .. (V.length $ ws ! 0) - 1]
-- | Updates the entire visible layer by using gibbsSampling, according
-- to the activation probability
updateHidden :: MonadRandom m => Weights -> Weights -> Bias -> Pattern -> Pattern -> m Pattern
updateHidden ws u c v y
| Just e <- validPattern Visible ws v = error e
| otherwise = V.fromList `liftM` mapM (updateNeuronHidden ws u c v y) updatedIndices
where
updatedIndices = [0 .. (V.length ws) - 1 ]
-- | Updates a classification vector given the current state of the network (
-- the u matrix and the vector of biases d, together with a hidden vector h)
updateClassification :: Weights -> Bias -> Pattern -> Pattern
updateClassification u d h
= V.fromList [ if n == newClass then 1 else 0 | n <- allClasses]
where
-- TODO replace with actual sampling using inverse method (with cdf list)
expActivation = exp . (getActivationSum u d h)
newClass = maximumBy (compareBy expActivation) allClasses
allClasses = [0 .. nrClasses - 1]
nrClasses = V.length d
-- @getClassificationVector pat_to_classes pat@ returns the classification
-- vector of @pat@, by looking up in @pat@ in @pat_to_classes@ to obtain the
-- class of the pattern. The classification vector is obtained by
-- creating vector with all 0s and only 1 in the position of the class.
-- The length of all classification vectors is the number of classes.
getClassificationVector :: [(Pattern, Int)] -> Pattern -> Pattern
getClassificationVector pat_classes pat
= V.fromList [ if n == pat_class then 1 else 0 | n <- map snd pat_classes]
where pat_class = fromJust $ lookup pat pat_classes
-- | One step which updates the weights in the CD-n training process.
-- The weights are changed according to one of the training patterns.
-- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm
-- @oneTrainingStep bm visible@ updates the parameters of @bm@ (the 2 weight
-- matrices and the biases) according to the training instance @v@
-- and its classification, obtained by looking in the map kept in @bm@
oneTrainingStep :: MonadRandom m => BoltzmannData -> Pattern -> m BoltzmannData
oneTrainingStep (BoltzmannData ws u b c d pats nr_h pat_to_class) v = do
let y = getClassificationVector pat_to_class v
h_sum = getHiddenSums ws u c v y
h <- updateHidden ws u c v y
v' <- updateVisible ws b h
let y' = updateClassification u d h
(h_sum' :: V.Vector Double) = getHiddenSums ws u c v' y'
getOuterProduct v1 v2 = NC.toLists $ (fromDataVector v1) `NC.outer` (fromDataVector $ toDouble v2)
getDelta pos neg = map (map (* learningRate)) $ combine (-) pos neg
updateWeights w d_w = vector2D $ combine (+) (list2D w) d_w
deltaBias v1 v2 = V.map ((* learningRate) . fromIntegral) (combineVectors (-) v1 v2)
deltaBiasC v1 v2 = V.map (* learningRate) (combineVectors (-) v1 v2)
updateBias bias delta_bias = combineVectors (+) bias delta_bias
pos_ws = getOuterProduct h_sum v -- "positive gradient for ws"
neg_ws = getOuterProduct h_sum' v' -- "negative gradient for ws"
pos_u = getOuterProduct h_sum y -- "positive gradient for u"
neg_u = getOuterProduct h_sum' y' -- "negative gradient for u"
d_ws = getDelta pos_ws neg_ws -- "delta ws"
new_ws = updateWeights ws d_ws
d_u = getDelta pos_u neg_u -- "delta u"
new_u = updateWeights u d_u
new_b = updateBias b (deltaBias v v')
new_c = updateBias c (deltaBiasC h_sum h_sum')
new_d = updateBias d (deltaBias y y')
return $ BoltzmannData new_ws new_u new_b new_c new_d pats nr_h pat_to_class
-- | The training function for the Boltzmann Machine.
-- We are using the contrastive divergence algorithm CD-1
-- TODO see if making the vis
-- (we could extend to CD-n, but "In pratice, CD-1 has been shown to work surprisingly well."
-- @trainBoltzmann pats nrHidden@ where @pats@ are the training patterns
-- and @nrHidden@ is the number of neurons to be created in the hidden layer.
-- http://en.wikipedia.org/wiki/Restricted_Boltzmann_machine#Training_algorithm
trainBoltzmann :: MonadRandom m => [Pattern] -> Int -> m BoltzmannData
trainBoltzmann pats nr_h = do
ws <- vector2D `liftM` genWeights
u <- vector2D `liftM` genU
foldM oneTrainingStep (BoltzmannData ws u b c d pats nr_h pats_classes) pats
where
genWeights = replicateM nr_h . replicateM nr_visible $ normal 0.0 0.01
genU = replicateM nr_h . replicateM nr_classes $ normal 0.0 0.01
b = V.fromList $ replicate nr_visible 0.0
c = V.fromList $ replicate nr_h 0.0
d = V.fromList $ replicate nr_classes 0.0
nr_classes = length nub_pats
nub_pats = nub pats
pats_classes = zip nub_pats [0 .. ]
nr_visible = V.length $ head pats
-- | @matchPatternBoltzmann bm pat@ given the Boltzmann trained network @bm@
-- recognizes @pat@, by classifying it to one of the patterns the network was
-- trained with. This is done by computing the free energy of @pat@ with
-- every possible classification, and choosing the classification with
-- lowest energy.
-- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf
matchPatternCBoltzmann :: BoltzmannData -> Pattern -> Int
matchPatternCBoltzmann bm v
| Just e <- validPattern Visible (weightsB bm) v = error e
| otherwise = fromJust $ maxPat `elemIndex` pats
where
pats_classes = pattern_to_class bm
pats = patternsB bm
patternsWithClassifications = [ (p, getClassificationVector pats_classes p) | p <- map fst pats_classes]
probability classification = exp $ - (getFreeEnergy bm v classification)
(maxPat, _) = maximumBy (compareBy $ probability . snd) patternsWithClassifications
-- | @getFreeEnergy bm visible classification_vector@
-- Computes the free energy of @v@ with @classification_vector@, according
-- to the trained Boltzmann network @bm@. It is used for classifying a given
-- visible vector according to the classes used for training the network @bm@.
getFreeEnergy :: BoltzmannData -> Pattern -> Pattern -> Double
getFreeEnergy (BoltzmannData ws u _b c d _pats _nrH _pats_classes) v y
= - dotProduct d (toDouble y) - (V.sum $ V.map softplus hiddenSums)
where hiddenSums = getHiddenSums ws u c v y
-- | The activation function for the network (the logistic sigmoid).
-- http://en.wikipedia.org/wiki/Sigmoid_function
activation :: Double -> Double
activation x = 1.0 / (1.0 + exp (-x))
-- | The function used to compute the free energy
-- http://uai.sis.pitt.edu/papers/11/p463-louradour.pdf
softplus :: Double -> Double
softplus x = log (1.0 + exp x)
-- TODO move to tests
validClassificationVector :: Pattern -> Int -> Maybe String
validClassificationVector pat size
| V.length pat /= size = Just "classification vector does not match expected size"
| V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in classification pattern"
| V.sum pat /=1 = Just "Invalid classification vector"
| otherwise = Nothing
-- | @validPattern mode weights pattern@
-- Returns an error string in a Just if the @pattern@ is not compatible
-- with @weights@ and Nothing otherwise. @mode@ gives the type of the pattern,
-- which is checked (Visible or Hidden).
validPattern :: Mode -> Weights -> Pattern -> Maybe String
validPattern mode ws pat
| getDimension mode ws /= V.length pat = Just $ "Size of pattern must match network size in " ++ show mode
| V.any (\x -> notElem x [0, 1]) pat = Just "Non binary element in Boltzmann pattern"
| otherwise = Nothing
-- | @validWeights ws@ checks that a weight matrix is well formed.
validWeights :: Weights -> Maybe String
validWeights ws
| V.null ws = Just "The matrix of weights is empty"
| V.any (\x -> V.length x /= V.length (ws ! 0)) ws = Just "Weights matrix ill formed"
| otherwise = Nothing