diff --git a/src/UnsupervisedClustering.jl b/src/UnsupervisedClustering.jl index afc1999..271d75f 100644 --- a/src/UnsupervisedClustering.jl +++ b/src/UnsupervisedClustering.jl @@ -29,8 +29,7 @@ export concatenate, GeneticAlgorithm, ClusteringChain -abstract type Algorithm end -abstract type Result end +include("abstract.jl") include("localsearch/gmm.jl") include("localsearch/kmeans.jl") diff --git a/src/abstract.jl b/src/abstract.jl new file mode 100644 index 0000000..6b12328 --- /dev/null +++ b/src/abstract.jl @@ -0,0 +1,2 @@ +abstract type AbstractAlgorithm end +abstract type AbstractResult end \ No newline at end of file diff --git a/src/concatenate.jl b/src/concatenate.jl index c4dce82..d93a332 100644 --- a/src/concatenate.jl +++ b/src/concatenate.jl @@ -1,8 +1,8 @@ -# function concatenate_k(result::Result, results::Result...) +# function concatenate_k(result::AbstractResult, results::AbstractResult...) # return result.k + sum([i.k for i in results]) # end -function concatenate_assignments(result::Result, results::Result...) +function concatenate_assignments(result::AbstractResult, results::AbstractResult...) size = length(results) accumulated = zeros(Int, size) for i in 1:size @@ -32,23 +32,23 @@ function concatenate_clusters(result::KmedoidsResult, results::KmedoidsResult... return vcat(result.clusters, [results[i].clusters .+ accumulated[i] for i in 1:size]...) end -function concatenate_objective(result::Result, results::Result...) +function concatenate_objective(result::AbstractResult, results::AbstractResult...) return result.objective + sum([i.objective for i in results]) end -function concatenate_objective_per_cluster(result::Result, results::Result...) +function concatenate_objective_per_cluster(result::AbstractResult, results::AbstractResult...) return vcat(result.objective_per_cluster, [i.objective_per_cluster for i in results]...) end -function concatenate_iterations(result::Result, results::Result...) +function concatenate_iterations(result::AbstractResult, results::AbstractResult...) return result.iterations + sum([i.iterations for i in results]) end -function concatenate_elapsed(result::Result, results::Result...) +function concatenate_elapsed(result::AbstractResult, results::AbstractResult...) return result.elapsed + sum([i.elapsed for i in results]) end -function concatenate_converged(result::Result, results::Result...) +function concatenate_converged(result::AbstractResult, results::AbstractResult...) return result.converged && all([i.converged for i in results]) end diff --git a/src/ensemble/chain.jl b/src/ensemble/chain.jl index 6d798a6..ab7f178 100644 --- a/src/ensemble/chain.jl +++ b/src/ensemble/chain.jl @@ -1,15 +1,15 @@ @doc """ - ClusteringChain(algorithms::Algorithm...) + ClusteringChain(algorithms::AbstractAlgorithm...) ClusteringChain represents a chain of clustering algorithms that are executed sequentially. It allows for applying multiple clustering algorithms in a specific order to refine and improve the clustering results. # Fields - `algorithms`: the vector of clustering algorithms that will be executed in sequence. """ -Base.@kwdef struct ClusteringChain <: Algorithm - algorithms::AbstractVector{<:Algorithm} +Base.@kwdef struct ClusteringChain <: AbstractAlgorithm + algorithms::AbstractVector{<:AbstractAlgorithm} - function ClusteringChain(algorithms::Algorithm...) + function ClusteringChain(algorithms::AbstractAlgorithm...) return new(collect(algorithms)) end end diff --git a/src/localsearch/gmm.jl b/src/localsearch/gmm.jl index 80bc80a..43c5389 100644 --- a/src/localsearch/gmm.jl +++ b/src/localsearch/gmm.jl @@ -23,7 +23,7 @@ The GMM is a clustering algorithm that models the underlying data distribution a Maximum likelihood from incomplete data via the EM algorithm. Journal of the royal statistical society: series B (methodological) 39.1 (1977): 1-22. """ -Base.@kwdef mutable struct GMM <: Algorithm +Base.@kwdef mutable struct GMM <: AbstractAlgorithm estimator::CovarianceMatrixEstimator verbose::Bool = DEFAULT_VERBOSE rng::AbstractRNG = Random.GLOBAL_RNG @@ -58,7 +58,7 @@ GMMResult struct represents the result of the GMM clustering algorithm. - `converged`: indicates whether the algorithm has converged to a solution. - `k`: the number of clusters. """ -mutable struct GMMResult{I <: Integer, R <: Real} <: Result +mutable struct GMMResult{I <: Integer, R <: Real} <: AbstractResult assignments::Vector{I} weights::Vector{R} clusters::Vector{Vector{R}} diff --git a/src/localsearch/kmeans.jl b/src/localsearch/kmeans.jl index 28922e7..5b36031 100644 --- a/src/localsearch/kmeans.jl +++ b/src/localsearch/kmeans.jl @@ -24,7 +24,7 @@ The k-means is a clustering algorithm that aims to partition data into clusters Least squares quantization in PCM. IEEE transactions on information theory 28.2 (1982): 129-137. """ -Base.@kwdef mutable struct Kmeans <: Algorithm +Base.@kwdef mutable struct Kmeans <: AbstractAlgorithm metric::SemiMetric = SqEuclidean() verbose::Bool = DEFAULT_VERBOSE rng::AbstractRNG = Random.GLOBAL_RNG @@ -56,7 +56,7 @@ KmeansResult struct represents the result of the k-means clustering algorithm. - `converged`: indicates whether the algorithm has converged to a solution. - `k`: the number of clusters. """ -mutable struct KmeansResult{I <: Integer, R <: Real} <: Result +mutable struct KmeansResult{I <: Integer, R <: Real} <: AbstractResult assignments::Vector{I} clusters::Matrix{R} objective::R diff --git a/src/localsearch/kmedoids.jl b/src/localsearch/kmedoids.jl index 5b5580a..e6bb17c 100644 --- a/src/localsearch/kmedoids.jl +++ b/src/localsearch/kmedoids.jl @@ -16,7 +16,7 @@ The k-medoids is a variation of k-means clustering algorithm that uses actual da # References """ -Base.@kwdef mutable struct Kmedoids <: Algorithm +Base.@kwdef mutable struct Kmedoids <: AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE rng::AbstractRNG = Random.GLOBAL_RNG tolerance::Real = DEFAULT_TOLERANCE @@ -47,7 +47,7 @@ KmedoidsResult struct represents the result of the k-medoids clustering algorith - `converged`: indicates whether the algorithm has converged to a solution. - `k`: the number of clusters. """ -mutable struct KmedoidsResult{I <: Integer, R <: Real} <: Result +mutable struct KmedoidsResult{I <: Integer, R <: Real} <: AbstractResult assignments::Vector{I} clusters::Vector{I} objective::R diff --git a/src/localsearch/ksegmentation.jl b/src/localsearch/ksegmentation.jl index 0ff8158..cc3f911 100644 --- a/src/localsearch/ksegmentation.jl +++ b/src/localsearch/ksegmentation.jl @@ -1,4 +1,4 @@ -Base.@kwdef mutable struct Ksegmentation <: Algorithm +Base.@kwdef mutable struct Ksegmentation <: AbstractAlgorithm end const KsegmentationResult{I <: Integer, R <: Real} = KmeansResult{I, R} diff --git a/src/markov.jl b/src/markov.jl index 81e2502..f64bf40 100644 --- a/src/markov.jl +++ b/src/markov.jl @@ -12,7 +12,7 @@ function stochastic_matrix(k::Integer, from::AbstractVector{<:Integer}, to::Abst return matrix end -function stochastic_matrix(from::Result, to::Result) +function stochastic_matrix(from::AbstractResult, to::AbstractResult) @assert from.k == to.k return stochastic_matrix(from.k, from.assignments, to.assignments) end diff --git a/src/metaheuristic/generation.jl b/src/metaheuristic/generation.jl index c0ea57e..001de16 100644 --- a/src/metaheuristic/generation.jl +++ b/src/metaheuristic/generation.jl @@ -1,9 +1,9 @@ mutable struct Generation - population::AbstractVector{<:Result} + population::AbstractVector{<:AbstractResult} empty::Set{Integer} function Generation() - return new(Vector{Result}(), Set{Int}()) + return new(Vector{AbstractResult}(), Set{Int}()) end end @@ -21,7 +21,7 @@ function remove(generation::Generation, i::Integer) return nothing end -function add!(generation::Generation, result::Result) +function add!(generation::Generation, result::AbstractResult) if length(generation.empty) > 0 generation.population[pop!(generation.empty)] = result else @@ -50,7 +50,7 @@ function get_best_solution(generation::Generation) return best_solution end -function crossover(parent1::Result, parent2::Result, data::AbstractMatrix{<:Real}, rng::AbstractRNG) +function crossover(parent1::AbstractResult, parent2::AbstractResult, data::AbstractMatrix{<:Real}, rng::AbstractRNG) k = parent1.k weights = zeros(k, k) diff --git a/src/metaheuristic/geneticalgorithm.jl b/src/metaheuristic/geneticalgorithm.jl index 63733b3..02c110f 100644 --- a/src/metaheuristic/geneticalgorithm.jl +++ b/src/metaheuristic/geneticalgorithm.jl @@ -1,6 +1,6 @@ @doc """ GeneticAlgorithm( - local_search::Algorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 max_iterations_without_improvement::Integer = 150 @@ -20,8 +20,8 @@ GeneticAlgorithm represents a clustering algorithm that utilizes a genetic algor # References """ -Base.@kwdef struct GeneticAlgorithm <: Algorithm - local_search::Algorithm +Base.@kwdef struct GeneticAlgorithm <: AbstractAlgorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 max_iterations_without_improvement::Integer = 150 @@ -57,7 +57,7 @@ genetic_algorithm = GeneticAlgorithm(local_search = kmeans) result = fit(genetic_algorithm, data, k) ``` """ -function fit(meta::GeneticAlgorithm, data::AbstractMatrix{<:Real}, k::Integer)::Result +function fit(meta::GeneticAlgorithm, data::AbstractMatrix{<:Real}, k::Integer)::AbstractResult generation = Generation() best_objective = 0.0 diff --git a/src/metaheuristic/multistart.jl b/src/metaheuristic/multistart.jl index 2e2f077..3330e49 100644 --- a/src/metaheuristic/multistart.jl +++ b/src/metaheuristic/multistart.jl @@ -1,6 +1,6 @@ @doc """ MultiStart( - local_search::Algorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 ) @@ -12,8 +12,8 @@ The MultiStart approach repeatedly applies a clustering algorithm to generate mu - `verbose`: controls whether the algorithm should display additional information during execution. - `max_iterations`: represents the maximum number of iterations the algorithm will perform before stopping. """ -Base.@kwdef struct MultiStart <: Algorithm - local_search::Algorithm +Base.@kwdef struct MultiStart <: AbstractAlgorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 end @@ -46,7 +46,7 @@ multi_start = MultiStart(local_search = kmeans) result = fit(multi_start, data, k) ``` """ -function fit(meta::MultiStart, data::AbstractMatrix{<:Real}, k::Integer)::Result +function fit(meta::MultiStart, data::AbstractMatrix{<:Real}, k::Integer)::AbstractResult best_result = fit(meta.local_search, data, k) if meta.verbose diff --git a/src/metaheuristic/randomswap.jl b/src/metaheuristic/randomswap.jl index 3dcd08e..1b6d27f 100644 --- a/src/metaheuristic/randomswap.jl +++ b/src/metaheuristic/randomswap.jl @@ -1,6 +1,6 @@ @doc """ RandomSwap( - local_search::Algorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 max_iterations_without_improvement::Integer = 150 @@ -19,8 +19,8 @@ RandomSwap is a meta-heuristic approach used for clustering problems. It follows Efficiency of random swap clustering. Journal of big data 5.1 (2018): 1-29. """ -Base.@kwdef struct RandomSwap <: Algorithm - local_search::Algorithm +Base.@kwdef struct RandomSwap <: AbstractAlgorithm + local_search::AbstractAlgorithm verbose::Bool = DEFAULT_VERBOSE max_iterations::Integer = 200 max_iterations_without_improvement::Integer = 150 @@ -54,7 +54,7 @@ random_swap = RandomSwap(local_search = kmeans) result = fit(random_swap, data, k) ``` """ -function fit(meta::RandomSwap, data::AbstractMatrix{<:Real}, k::Integer)::Result +function fit(meta::RandomSwap, data::AbstractMatrix{<:Real}, k::Integer)::AbstractResult iterations_without_improvement = 0 best_result = fit(meta.local_search, data, k) diff --git a/src/print.jl b/src/print.jl index a921123..8895867 100644 --- a/src/print.jl +++ b/src/print.jl @@ -2,7 +2,7 @@ # return (number == 0) ? 1 : (log10(number) + 1) # end -print_objective(result::Result) = print_objective(result.objective) +print_objective(result::AbstractResult) = print_objective(result.objective) function print_objective(objective::Real) @printf("%12.4f ", objective) return nothing @@ -13,13 +13,13 @@ function print_iteration(iteration::Integer) return nothing end -print_iterations(result::Result) = print_iterations(result.iterations) +print_iterations(result::AbstractResult) = print_iterations(result.iterations) function print_iterations(iterations::Integer) @printf("%8dit ", iterations) return nothing end -print_elapsed(result::Result) = print_elapsed(result.elapsed) +print_elapsed(result::AbstractResult) = print_elapsed(result.elapsed) function print_elapsed(elapsed::Real) @printf("%10.2fs ", elapsed) return nothing @@ -30,7 +30,7 @@ function print_change(change::Real) return nothing end -function print_result(result::Result) +function print_result(result::AbstractResult) print_objective(result) print_iterations(result) print_elapsed(result) diff --git a/src/result/counts.jl b/src/result/counts.jl index 49bfe22..ac3352a 100644 --- a/src/result/counts.jl +++ b/src/result/counts.jl @@ -1,3 +1,3 @@ -function counts(result::Result) +function counts(result::AbstractResult) return StatsBase.counts(result.assignments, result.k) end diff --git a/src/seed.jl b/src/seed.jl index dbc2ed5..152b47b 100644 --- a/src/seed.jl +++ b/src/seed.jl @@ -1,4 +1,4 @@ -function seed!(algorithm::Algorithm, seed::Integer) +function seed!(algorithm::AbstractAlgorithm, seed::Integer) Random.seed!(algorithm.rng, seed) return nothing end diff --git a/test/gmmsk.jl b/test/gmmsk.jl index ccd8160..88c6a16 100644 --- a/test/gmmsk.jl +++ b/test/gmmsk.jl @@ -1,4 +1,4 @@ -Base.@kwdef mutable struct GMMSK <: UnsupervisedClustering.Algorithm +Base.@kwdef mutable struct GMMSK <: UnsupervisedClustering.AbstractAlgorithm verbose::Bool = false rng::AbstractRNG = Random.GLOBAL_RNG tolerance::Real = 1e-3