diff --git a/docs/src/documents.md b/docs/src/documents.md
index 5c959e60..2e9196be 100644
--- a/docs/src/documents.md
+++ b/docs/src/documents.md
@@ -266,6 +266,7 @@ julia> remove_words!(sd, ["lear"])
 julia> sd
 StringDocument{String}(" is mad", TextAnalysis.DocumentMetadata(Languages.English(), "Untitled Document", "Unknown Author", "Unknown Time"))
 ```
+
 At other times, you'll want to remove whole classes of words. To make this
 easier, we can use several classes of basic words defined by the Languages.jl
 package:
@@ -294,6 +295,7 @@ These special classes can all be removed using specially-named parameters:
 These functions use words lists, so they are capable of working for many
 different languages without change, also these operations can be combined
 together for improved performance:
+
 * `prepare!(sd, strip_articles| strip_numbers| strip_html_tags)`
 
 In addition to removing words, it is also common to take words that are
diff --git a/docs/src/features.md b/docs/src/features.md
index 9dbe86f9..ca4a8b27 100644
--- a/docs/src/features.md
+++ b/docs/src/features.md
@@ -126,8 +126,41 @@ julia> hash_dtv(crps[1])
  0  0  0  0  0  0  0  0  0  0  0  0  0  …  0  0  0  0  0  0  0  0  0  0  0  0
 ```
 
+## TF (Term Frequency)
+
+Often we need to find out the proportion of a document is contributed
+by each term. This can be done by finding the term frequency function
+
+    tf(dtm)
+
+The paramter, `dtm` can be of the types - `DocumentTermMatrix` , `SparseMatrixCSC` or `Matrix`
+
+```julia
+julia> crps = Corpus([StringDocument("To be or not to be"),
+              StringDocument("To become or not to become")])
+
+julia> update_lexicon!(crps)
+
+julia> m = DocumentTermMatrix(crps)
+
+julia> tf(m)
+2×6 SparseArrays.SparseMatrixCSC{Float64,Int64} with 10 stored entries:
+  [1, 1]  =  0.166667
+  [2, 1]  =  0.166667
+  [1, 2]  =  0.333333
+  [2, 3]  =  0.333333
+  [1, 4]  =  0.166667
+  [2, 4]  =  0.166667
+  [1, 5]  =  0.166667
+  [2, 5]  =  0.166667
+  [1, 6]  =  0.166667
+  [2, 6]  =  0.166667
+```
+
 ## TF-IDF (Term Frequency - Inverse Document Frequency)
 
+    tf_idf(dtm)
+
 In many cases, raw word counts are not appropriate for use because:
 
 * (A) Some documents are longer than other documents
@@ -135,8 +168,38 @@ In many cases, raw word counts are not appropriate for use because:
 
 You can work around this by performing TF-IDF on a DocumentTermMatrix:
 
-    m = DocumentTermMatrix(crps)
-    tf_idf(m)
+```julia
+julia> crps = Corpus([StringDocument("To be or not to be"),
+              StringDocument("To become or not to become")])
+
+julia> update_lexicon!(crps)
+
+julia> m = DocumentTermMatrix(crps)
+DocumentTermMatrix(
+  [1, 1]  =  1
+  [2, 1]  =  1
+  [1, 2]  =  2
+  [2, 3]  =  2
+  [1, 4]  =  1
+  [2, 4]  =  1
+  [1, 5]  =  1
+  [2, 5]  =  1
+  [1, 6]  =  1
+  [2, 6]  =  1, ["To", "be", "become", "not", "or", "to"], Dict("or"=>5,"not"=>4,"to"=>6,"To"=>1,"be"=>2,"become"=>3))
+
+julia> tf_idf(m)
+2×6 SparseArrays.SparseMatrixCSC{Float64,Int64} with 10 stored entries:
+  [1, 1]  =  0.0
+  [2, 1]  =  0.0
+  [1, 2]  =  0.231049
+  [2, 3]  =  0.231049
+  [1, 4]  =  0.0
+  [2, 4]  =  0.0
+  [1, 5]  =  0.0
+  [2, 5]  =  0.0
+  [1, 6]  =  0.0
+  [2, 6]  =  0.0
+```
 
 As you can see, TF-IDF has the effect of inserting 0's into the columns of
 words that occur in all documents. This is a useful way to avoid having to
diff --git a/src/metadata.jl b/src/metadata.jl
index f8700570..b9b98c05 100644
--- a/src/metadata.jl
+++ b/src/metadata.jl
@@ -1,41 +1,131 @@
-##############################################################################
-#
-# Metadata field getters and setters
-#
-##############################################################################
-
 import Languages.name
 
+"""
+    title(doc)
+
+Return the title metadata for `doc`.
+
+See also: [`title!`](@ref), [`titles`](@ref), [`titles!`](@ref)
+"""
 title(d::AbstractDocument) = d.metadata.title
+
+"""
+    language(doc)
+
+Return the language metadata for `doc`.
+
+See also: [`language!`](@ref), [`languages`](@ref), [`languages!`](@ref)
+"""
 language(d::AbstractDocument) = d.metadata.language
+
+"""
+    author(doc)
+
+Return the author metadata for `doc`.
+
+See also: [`author!`](@ref), [`authors`](@ref), [`authors!`](@ref)
+"""
 author(d::AbstractDocument) = d.metadata.author
+
+"""
+    timestamp(doc)
+
+Return the timestamp metadata for `doc`.
+
+See also: [`timestamp!`](@ref), [`timestamps`](@ref), [`timestamps!`](@ref)
+"""
 timestamp(d::AbstractDocument) = d.metadata.timestamp
 
+
+"""
+    title!(doc, str)
+
+Set the title of `doc` to `str`.
+
+See also: [`title`](@ref), [`titles`](@ref), [`titles!`](@ref)
+"""
 function title!(d::AbstractDocument, nv::AbstractString)
     d.metadata.title = nv
 end
 
-function language!(d::AbstractDocument, nv::T) where T <: Language
+"""
+    language!(doc, lang::Language)
+
+Set the language of `doc` to `lang`.
+
+# Example
+```julia-repl
+julia> d = StringDocument("String Document 1")
+
+julia> language!(d, Languages.Spanish())
+
+julia> d.metadata.language
+Languages.Spanish()
+```
+
+See also: [`language`](@ref), [`languages`](@ref), [`languages!`](@ref)
+"""
+function language!(d::AbstractDocument, nv::Language)
     d.metadata.language = nv
 end
 
+"""
+    author!(doc, author)
+
+Set the author metadata of doc to `author`.
+
+See also: [`author`](@ref), [`authors`](@ref), [`authors!`](@ref)
+"""
 function author!(d::AbstractDocument, nv::AbstractString)
     d.metadata.author = nv
 end
 
+"""
+    timestamp!(doc, timestamp::AbstractString)
+
+Set the timestamp metadata of doc to `timestamp`.
+
+See also: [`timestamp`](@ref), [`timestamps`](@ref), [`timestamps!`](@ref)
+"""
 function timestamp!(d::AbstractDocument, nv::AbstractString)
     d.metadata.timestamp = nv
 end
 
-##############################################################################
-#
-# Vectorized getters for an entire Corpus
-#
-##############################################################################
 
+"""
+    titles(crps)
+
+Return the titles for each document in `crps`.
+
+See also: [`titles!`](@ref), [`title`](@ref), [`title!`](@ref)
+"""
 titles(c::Corpus) = map(d -> title(d), documents(c))
+
+"""
+    languages(crps)
+
+Return the languages for each document in `crps`.
+
+See also: [`languages!`](@ref), [`language`](@ref), [`language!`](@ref)
+"""
 languages(c::Corpus) = map(d -> language(d), documents(c))
+
+"""
+    authors(crps)
+
+Return the authors for each document in `crps`.
+
+See also: [`authors!`](@ref), [`author`](@ref), [`author!`](@ref)
+"""
 authors(c::Corpus) = map(d -> author(d), documents(c))
+
+"""
+    timestamps(crps)
+
+Return the timestamps for each document in `crps`.
+
+See also: [`timestamps!`](@ref), [`timestamp`](@ref), [`timestamp!`](@ref)
+"""
 timestamps(c::Corpus) = map(d -> timestamp(d), documents(c))
 
 titles!(c::Corpus, nv::AbstractString) = title!.(documents(c), nv)
@@ -43,6 +133,16 @@ languages!(c::Corpus, nv::T) where {T <: Language} = language!.(documents(c), Re
 authors!(c::Corpus, nv::AbstractString) = author!.(documents(c), Ref(nv))
 timestamps!(c::Corpus, nv::AbstractString) = timestamp!.(documents(c), Ref(nv))
 
+"""
+    titles!(crps, vec::Vector{String})
+    titles!(crps, str)
+
+Update titles of the documents in a Corpus.
+
+If the input is a String, set the same title for all documents. If the input is a vector, set title of `i`th document to corresponding `i`th element in the vector `vec`. In the latter case, the number of documents must equal the length of vector.
+
+See also: [`titles`](@ref), [`title!`](@ref), [`title`](@ref)
+"""
 function titles!(c::Corpus, nvs::Vector{String})
     length(c) == length(nvs) || throw(DimensionMismatch("dimensions must match"))
     for (i, d) in pairs(IndexLinear(), documents(c))
@@ -50,6 +150,16 @@ function titles!(c::Corpus, nvs::Vector{String})
     end
 end
 
+"""
+    languages!(crps, langs::Vector{Language})
+    languages!(crps, lang::Language)
+
+Update languages of documents in a Corpus.
+
+If the input is a Vector, then language of the `i`th document is set to the `i`th element in the vector, respectively. However, the number of documents must equal the length of vector.
+
+See also: [`languages`](@ref), [`language!`](@ref), [`language`](@ref)
+"""
 function languages!(c::Corpus, nvs::Vector{T}) where T <: Language
     length(c) == length(nvs) || throw(DimensionMismatch("dimensions must match"))
     for (i, d) in pairs(IndexLinear(), documents(c))
@@ -57,6 +167,14 @@ function languages!(c::Corpus, nvs::Vector{T}) where T <: Language
     end
 end
 
+"""
+    authors!(crps, athrs)
+    authors!(crps, athr)
+
+Set the authors of the documents in `crps` to the `athrs`, respectively.
+
+See also: [`authors`](@ref), [`author!`](@ref), [`author`](@ref)
+"""
 function authors!(c::Corpus, nvs::Vector{String})
     length(c) == length(nvs) || throw(DimensionMismatch("dimensions must match"))
     for (i, d) in pairs(IndexLinear(), documents(c))
@@ -64,6 +182,14 @@ function authors!(c::Corpus, nvs::Vector{String})
     end
 end
 
+"""
+    timestamps!(crps, times::Vector{String})
+    timestamps!(crps, time::AbstractString)
+
+Set the timestamps of the documents in `crps` to the timestamps in `times`, respectively.
+
+See also: [`timestamps`](@ref), [`timestamp!`](@ref), [`timestamp`](@ref)
+"""
 function timestamps!(c::Corpus, nvs::Vector{String})
     length(c) == length(nvs) || throw(DimensionMismatch("dimensions must match"))
     for (i, d) in pairs(IndexLinear(), documents(c))
diff --git a/src/ngramizer.jl b/src/ngramizer.jl
index cc19c2ba..6d8e0fa9 100644
--- a/src/ngramizer.jl
+++ b/src/ngramizer.jl
@@ -1,9 +1,18 @@
-##############################################################################
-#
-# Construct n-grams using single space concatenation
-#
-##############################################################################
+"""
+    ngramize(lang, tokens, n)
 
+Compute the ngrams of `tokens` of the order `n`.
+
+# Example
+
+```julia-repl
+julia> ngramize(Languages.English(), ["To", "be", "or", "not", "to"], 3)
+Dict{AbstractString,Int64} with 3 entries:
+  "be or not" => 1
+  "or not to" => 1
+  "To be or"  => 1
+```
+"""
 function ngramize(lang::S, words::Vector{T}, n::Int) where {S <: Language, T <: AbstractString}
     (n == 1) && return onegramize(lang, words)
 
@@ -14,12 +23,27 @@ function ngramize(lang::S, words::Vector{T}, n::Int) where {S <: Language, T <:
     for index in 1:(n_words - n + 1)
         token = join(words[index:(index + n - 1)], " ")
         tokens[token] = get(tokens, token, 0) + 1
-       
     end
-
     return tokens
 end
 
+"""
+    onegramize(lang, tokens)
+
+Create the unigrams dict for input tokens.
+
+# Example
+
+```julia-repl
+julia> onegramize(Languages.English(), ["To", "be", "or", "not", "to", "be"])
+Dict{String,Int64} with 5 entries:
+  "or"  => 1
+  "not" => 1
+  "to"  => 1
+  "To"  => 1
+  "be"  => 2
+```
+"""
 function onegramize(lang::S, words::Vector{T}) where {S <: Language, T <: AbstractString}
     n_words = length(words)
     tokens = Dict{T, Int}()
@@ -28,5 +52,5 @@ function onegramize(lang::S, words::Vector{T}) where {S <: Language, T <: Abstra
         tokens[word] = get(tokens, word, 0) + 1
     end
 
-    tokens
+    return tokens
 end
diff --git a/src/preprocessing.jl b/src/preprocessing.jl
index 55dfc4cf..f4dea4d7 100644
--- a/src/preprocessing.jl
+++ b/src/preprocessing.jl
@@ -1,4 +1,3 @@
-
 const strip_patterns                = UInt32(0)
 const strip_corrupt_utf8            = UInt32(0x1) << 0
 const strip_case                    = UInt32(0x1) << 1
@@ -37,17 +36,29 @@ function mk_regex(regex_string)
 end
 
 
-##############################################################################
-#
-# Remove corrupt UTF8 characters
-#
-##############################################################################
+"""
+    remove_corrupt_utf8(str)
+
+Remove corrupt UTF8 characters in `str`.
+
+See also: [`remove_corrupt_utf8!`](@ref)
+"""
 function remove_corrupt_utf8(s::AbstractString)
     return map(x->isvalid(x) ? x : ' ', s)
 end
 
 remove_corrupt_utf8!(d::FileDocument) = error("FileDocument cannot be modified")
 
+"""
+    remove_corrupt_utf8!(doc)
+    remove_corrupt_utf8!(crps)
+
+Remove corrupt UTF8 characters for `doc` or documents in `crps`.
+
+Does not support `FileDocument` or Corpus containing `FileDocument`.
+
+See also: [`remove_corrupt_utf8`](@ref)
+"""
 function remove_corrupt_utf8!(d::StringDocument)
     d.text = remove_corrupt_utf8(d.text)
     nothing
@@ -78,16 +89,10 @@ function remove_corrupt_utf8!(crps::Corpus)
     end
 end
 
-##############################################################################
-#
-# Conversion to lowercase
-#
-##############################################################################
-
 """
-    remove_case(s::AbstractString)
+    remove_case(str)
 
-Converts the string to lowercase. 
+Convert `str` to lowercase.
 
 See also: [`remove_case!`](@ref)
 """
@@ -95,26 +100,31 @@ remove_case(s::T) where {T <: AbstractString} = lowercase(s)
 
 
 """
-    remove_case!(d::TokenDocument)
-    remove_case!(d::StringDocument)
-    remove_case!(d::NGramDocument)
+    remove_case!(doc)
+    remove_case!(crps)
 
-    remove_case!(c::Corpus)
+Convert the text of `doc` or `crps` to lowercase.
 
-Converts the text of the document or corpus to lowercase. This method does not
-works with FileDocument
+Does not support `FileDocument` or `crps` containing `FileDocument`.
 
 # Example
 
 ```julia-repl
-julia> str="The quick brown fox jumps over the lazy dog"
-julia> sd=StringDocument(str)
-StringDocument{String}("The quick brown fox jumps over the lazy dog", TextAnalysis.DocumentMetadata(Languages.English(), "Untitled Document", "Unknown Author", "Unknown Time"))
+julia> str = "The quick brown fox jumps over the lazy dog"
+julia> sd = StringDocument(str)
+A StringDocument{String}
+ * Language: Languages.English()
+ * Title: Untitled Document
+ * Author: Unknown Author
+ * Timestamp: Unknown Time
+ * Snippet: The quick brown fox jumps over the lazy dog
 
 julia> remove_case!(sd)
 julia> sd.text
 "the quick brown fox jumps over the lazy dog"
 ```
+
+See also: [`remove_case`](@ref)
 """
 remove_case!(d::FileDocument) = error("FileDocument cannot be modified")
 
@@ -148,21 +158,60 @@ function remove_case!(crps::Corpus)
     end
 end
 
-##############################################################################
-#
-# Stripping HTML tags
-#
-##############################################################################
+
 const script_tags = Regex("<script\\b[^>]*>([\\s\\S]*?)</script>")
 const style_tags = Regex("<style\\b[^>]*>([\\s\\S]*?)</style>")
 const html_tags = Regex("<[^>]*>")
 
+"""
+    remove_html_tags(str)
+
+Remove html tags from `str`, including the style and script tags.
+
+See also: [`remove_html_tags!`](@ref)
+"""
 function remove_html_tags(s::AbstractString)
     s = remove_patterns(s, script_tags)
     s = remove_patterns(s, style_tags)
     remove_patterns(s, html_tags)
 end
 
+"""
+    remove_html_tags!(doc::StringDocument)
+    remove_html_tags!(crps)
+
+Remove html tags from the `StringDocument` or documents `crps`.
+
+Does not work for documents other than `StringDocument`.
+
+# Example
+
+```julia-repl
+julia> html_doc = StringDocument(
+             "
+               <html>
+                   <head><script language=\"javascript\">x = 20;</script></head>
+                   <body>
+                       <h1>Hello</h1><a href=\"world\">world</a>
+                   </body>
+               </html>
+             "
+            )
+A StringDocument{String}
+ * Language: Languages.English()
+ * Title: Untitled Document
+ * Author: Unknown Author
+ * Timestamp: Unknown Time
+ * Snippet:  <html> <head><s
+
+julia> remove_html_tags!(html_doc)
+
+julia> strip(text(html_doc))
+"Hello world"
+```
+
+See also: [`remove_html_tags`](@ref)
+"""
 function remove_html_tags!(d::AbstractDocument)
     error("HTML tags can be removed only from a StringDocument")
 end
@@ -178,16 +227,12 @@ function remove_html_tags!(crps::Corpus)
     end
 end
 
-##############################################################################
-#
-# Remove specified words
-#
-##############################################################################
+
 """
-    remove_words!(d::AbstractDocument, words::Vector)
-    remove_words!(c::Corpus, words::Vector)
+    remove_words!(doc, words::Vector{AbstractString})
+    remove_words!(crps, words::Vector{AbstractString})
 
-Removes the tokens defined in the list `words` from the source Document or Corpus
+Remove the occurences of words from `doc` or `crps`.
 
 # Example
 
@@ -217,14 +262,33 @@ end
 
 tag_pos!(entity) = error("Not yet implemented")
 
+"""
+    sparse_terms(crps, alpha=0.05])
 
+Find the sparse terms from Corpus, occuring in less than `alpha` percentage of the documents.
 
-##############################################################################
-#
-# Drop terms based on frequency
-#
-##############################################################################
+# Example
 
+```
+julia> crps = Corpus([StringDocument("This is Document 1"),
+                      StringDocument("This is Document 2")])
+A Corpus with 2 documents:
+* 2 StringDocument's
+* 0 FileDocument's
+* 0 TokenDocument's
+* 0 NGramDocument's
+
+Corpus's lexicon contains 0 tokens
+Corpus's index contains 0 tokens
+
+julia> sparse_terms(crps, 0.5)
+2-element Array{String,1}:
+ "1"
+ "2"
+```
+
+See also: [`remove_sparse_terms!`](@ref), [`frequent_terms`](@ref)
+"""
 function sparse_terms(crps::Corpus, alpha::Real = alpha_sparse)
     update_lexicon!(crps)
     update_inverse_index!(crps)
@@ -239,6 +303,34 @@ function sparse_terms(crps::Corpus, alpha::Real = alpha_sparse)
     return res
 end
 
+"""
+    frequent_terms(crps, alpha=0.95)
+
+Find the frequent terms from Corpus, occuring more than `alpha` percentage of the documents.
+
+# Example
+
+```
+julia> crps = Corpus([StringDocument("This is Document 1"),
+                      StringDocument("This is Document 2")])
+A Corpus with 2 documents:
+ * 2 StringDocument's
+ * 0 FileDocument's
+ * 0 TokenDocument's
+ * 0 NGramDocument's
+
+Corpus's lexicon contains 0 tokens
+Corpus's index contains 0 tokens
+
+julia> frequent_terms(crps)
+3-element Array{String,1}:
+ "is"
+ "This"
+ "Document"
+```
+
+See also: [`remove_frequent_terms!`](@ref), [`sparse_terms`](@ref)
+"""
 function frequent_terms(crps::Corpus, alpha::Real = alpha_frequent)
     update_lexicon!(crps)
     update_inverse_index!(crps)
@@ -253,20 +345,115 @@ function frequent_terms(crps::Corpus, alpha::Real = alpha_frequent)
     return res
 end
 
-# Sparse terms occur in less than x percent of all documents
+"""
+    remove_sparse_terms!(crps, alpha=0.05)
+
+Remove sparse terms in crps, occuring less than `alpha` percent of documents.
+
+# Example
+
+```julia-repl
+julia> crps = Corpus([StringDocument("This is Document 1"),
+                      StringDocument("This is Document 2")])
+A Corpus with 2 documents:
+ * 2 StringDocument's
+ * 0 FileDocument's
+ * 0 TokenDocument's
+ * 0 NGramDocument's
+
+Corpus's lexicon contains 0 tokens
+Corpus's index contains 0 tokens
+
+julia> remove_sparse_terms!(crps, 0.5)
+
+julia> crps[1].text
+"This is Document "
+
+julia> crps[2].text
+"This is Document "
+```
+
+See also: [`remove_frequent_terms!`](@ref), [`sparse_terms`](@ref)
+"""
 remove_sparse_terms!(crps::Corpus, alpha::Real = alpha_sparse) = remove_words!(crps, sparse_terms(crps, alpha))
 
-# Frequent terms occur in more than x percent of all documents
+"""
+    remove_frequent_terms!(crps, alpha=0.95)
+
+Remove terms in `crps`, occuring more than `alpha` percent of documents.
+
+# Example
+
+```julia-repl
+julia> crps = Corpus([StringDocument("This is Document 1"),
+                      StringDocument("This is Document 2")])
+A Corpus with 2 documents:
+* 2 StringDocument's
+* 0 FileDocument's
+* 0 TokenDocument's
+* 0 NGramDocument's
+
+Corpus's lexicon contains 0 tokens
+Corpus's index contains 0 tokens
+
+julia> remove_frequent_terms!(crps)
+
+julia> text(crps[1])
+"     1"
+
+julia> text(crps[2])
+"     2"
+```
+
+See also: [`remove_sparse_terms!`](@ref), [`frequent_terms`](@ref)
+"""
 remove_frequent_terms!(crps::Corpus, alpha::Real = alpha_frequent) = remove_words!(crps, frequent_terms(crps, alpha))
 
 
+"""
+    prepare!(doc, flags)
+    prepare!(crps, flags)
+
+Preprocess document or corpus based on the input flags.
+
+# List of Flags
+
+* strip_patterns
+* strip_corrupt_utf8
+* strip_case
+* stem_words
+* tag_part_of_speech
+* strip_whitespace
+* strip_punctuation
+* strip_numbers
+* strip_non_letters
+* strip_indefinite_articles
+* strip_definite_articles
+* strip_articles
+* strip_prepositions
+* strip_pronouns
+* strip_stopwords
+* strip_sparse_terms
+* strip_frequent_terms
+* strip_html_tags
 
-##############################################################################
-#
-# Remove parts from document based on flags or regular expressions
-#
-##############################################################################
+# Example
 
+```julia-repl
+julia> doc = StringDocument("This is a document of mine")
+A StringDocument{String}
+ * Language: Languages.English()
+ * Title: Untitled Document
+ * Author: Unknown Author
+ * Timestamp: Unknown Time
+ * Snippet: This is a document of mine
+
+julia> prepare!(doc, strip_pronouns | strip_articles)
+
+julia> text(doc)
+"This is   document of "
+```
+"""
 function prepare!(crps::Corpus, flags::UInt32; skip_patterns = Set{AbstractString}(), skip_words = Set{AbstractString}())
     ((flags & strip_sparse_terms) > 0) && union!(skip_words, sparse_terms(crps))
     ((flags & strip_frequent_terms) > 0) && union!(skip_words, frequent_terms(crps))
@@ -302,41 +489,49 @@ end
 
 
 """
-    remove_whitespace(s::AbstractString)
+    remove_whitespace(str)
 
-Squashes multiple whitespaces to a single one. And removes all leading and
-trailing whitespaces in a string. 
+Squash multiple whitespaces to a single one.
+And remove all leading and trailing whitespaces.
 
+See also: [`remove_whitespace!`](@ref)
 """
-remove_whitespace(s::AbstractString) = replace(strip(s), r"\s+"=>" ")
+remove_whitespace(str::AbstractString) = replace(strip(str), r"\s+"=>" ")
 
 
 """
-    remove_whitespace!(s::AbstractDocument)
+    remove_whitespace!(doc)
+    remove_whitespace!(crps)
 
-Squashes multiple whitespaces to a single space. And removes all leading and
-trailing whitespaces in a StringDocument and Corpus. 
+Squash multiple whitespaces to a single space and remove all leading and trailing whitespaces in document or crps.
 
-Does no-op for NGramDocument and TokenDocument. 
+Does no-op for `FileDocument`, `TokenDocument` or `NGramDocument`.
 
+See also: [`remove_whitespace`](@ref)
 """
 function remove_whitespace!(d::StringDocument)
-  d.text = remove_whitespace(d.text)
+    d.text = remove_whitespace(d.text)
 end
 
 function remove_whitespace!(crps::Corpus)
-  for doc in crps
-    remove_whitespace!(doc)
-  end
+    for doc in crps
+        remove_whitespace!(doc)
+    end
 end
 
 function remove_whitespace!(d::AbstractDocument)
-  nothing
+    nothing
 end
 
+"""
+    remove_patterns(str, rex::Regex)
 
-function remove_patterns(s::AbstractString, rex::Regex) 
-  return replace(s, rex => "")
+Remove the part of str matched by rex.
+
+See also: [`remove_patterns!`](@ref)
+"""
+function remove_patterns(s::AbstractString, rex::Regex)
+    return replace(s, rex => "")
 end
 
 function remove_patterns(s::SubString{T}, rex::Regex) where T <: String
@@ -360,7 +555,16 @@ function remove_patterns(s::SubString{T}, rex::Regex) where T <: String
     String(take!(iob))
 end
 
+"""
+    remove_patterns!(doc, rex::Regex)
+    remove_patterns!(crps, rex::Regex)
+
+Remove patterns matched by `rex` in document or Corpus.
+
+Does not modify `FileDocument` or Corpus containing `FileDocument`.
 
+See also: [`remove_patterns`](@ref)
+"""
 remove_patterns!(d::FileDocument, rex::Regex) = error("FileDocument cannot be modified")
 
 function remove_patterns!(d::StringDocument, rex::Regex)
diff --git a/src/sentiment.jl b/src/sentiment.jl
index 91ec5804..255b814e 100644
--- a/src/sentiment.jl
+++ b/src/sentiment.jl
@@ -48,12 +48,12 @@ function get_sentiment(handle_unknown, ip::Array{T, 1}, weight, rwi) where T <:
 	if ele in keys(rwi) && rwi[ele] <= size(weight[:embedding_1]["embedding_1"]["embeddings:0"])[2]   # there are only 5000 unique embeddings
             push!(res, rwi[ele])
 	else
-	    for words in handle_unknown(ele) 
+	    for words in handle_unknown(ele)
 		if words in keys(rwi) && rwi[words] <= size(weight[:embedding_1]["embedding_1"]["embeddings:0"])[2]
 		    push!(res, rwi[words])
 		end
-	    end	
-		
+	    end
+
 	end
     end
     return model(pad_sequences(res))[1]
@@ -67,7 +67,7 @@ struct SentimentModel
         # Only load Flux once it is actually needed
         global Flux
         Flux = Base.require(TextAnalysis, :Flux)
-        
+
         new(read_weights(), read_word_ids())
     end
 end
@@ -89,15 +89,15 @@ end
 
 
 """
- ```
- model = SentimentAnalyzer(doc)
- model = SentimentAnalyzer(doc, handle_unknown)
- ```
- Return sentiment of the input doc in range 0 to 1, 0 being least sentiment score and 1 being
- the highest:
-  -  doc              = Input Document for calculating document (AbstractDocument type)
-  -  handle_unknown   = A function for handling unknown words. Should return an array (default x->tuple())
- """
+    model = SentimentAnalyzer(doc)
+    model = SentimentAnalyzer(doc, handle_unknown)
+
+Predict sentiment of the input doc in range 0 to 1, 0 being least sentiment score and 1 being the highest.
+
+# Arguments
+-  doc              = Input Document for calculating document (`AbstractDocument` type)
+-  handle_unknown   = A function for handling unknown words. Should return an array (default x->tuple())
+"""
 
 function(m::SentimentAnalyzer)(d::AbstractDocument, handle_unknown = x->tuple())
     m.model(handle_unknown, tokens(d))
diff --git a/src/stemmer.jl b/src/stemmer.jl
index 72215391..89344527 100644
--- a/src/stemmer.jl
+++ b/src/stemmer.jl
@@ -8,8 +8,11 @@ const ISO_8859_1    = "ISO_8859_1"
 const CP850         = "CP850"
 const KOI8_R        = "KOI8_R"
 
-##
-# lists the stemmer algorithms loaded
+"""
+    stemmer_types()
+
+List all the stemmer algorithms loaded.
+"""
 function stemmer_types()
     cptr = ccall((:sb_stemmer_list, libstemmer), Ptr{Ptr{UInt8}}, ())
     (C_NULL == cptr) && error("error getting stemmer types")
@@ -50,7 +53,7 @@ mutable struct Stemmer
     end
 end
 
-show(io::IO, stm::Stemmer) = println(io, "Stemmer algorithm:$(stm.alg) encoding:$(stm.enc)")
+Base.show(io::IO, stm::Stemmer) = println(io, "Stemmer algorithm:$(stm.alg) encoding:$(stm.enc)")
 
 function release(stm::Stemmer)
     (C_NULL == stm.cptr) && return
@@ -59,6 +62,14 @@ function release(stm::Stemmer)
     nothing
 end
 
+"""
+    stem(stemmer::Stemmer, str)
+    stem(stemmer::Stemmer, words::Array)
+
+Stem the input with the Stemming algorthm of `stemmer`.
+
+See also: [`stem!`](@ref)
+"""
 function stem(stemmer::Stemmer, bstr::AbstractString)
     sres = ccall((:sb_stemmer_stem, libstemmer),
                 Ptr{UInt8},
@@ -83,13 +94,26 @@ function stem(stemmer::Stemmer, words::Array)
     for idx in 1:l
         ret[idx] = stem(stemmer, words[idx])
     end
-    ret
+    return ret
 end
 
+"""
+    stemmer_for_document(doc)
+
+Search for an appropriate stemmer based on the language of the document.
+"""
 function stemmer_for_document(d::AbstractDocument)
     Stemmer(lowercase(Languages.english_name(language(d))))
 end
 
+"""
+    stem!(doc)
+    stem!(crps)
+
+Stems the document or documents in `crps` with a suitable stemmer.
+
+Stemming cannot be done for `FileDocument` and Corpus made of these type of documents.
+"""
 function stem!(d::AbstractDocument)
     stemmer = stemmer_for_document(d)
     stem!(stemmer, d)
diff --git a/src/tf_idf.jl b/src/tf_idf.jl
index a7f142c3..21babcf4 100644
--- a/src/tf_idf.jl
+++ b/src/tf_idf.jl
@@ -1,21 +1,12 @@
-##############################################################################
-#
-# TF
-#
-##############################################################################
+"""
+    tf!(dtm::AbstractMatrix{Real}, tf::AbstractMatrix{AbstractFloat})
 
-tf(dtm::Matrix{T}) where {T <: Real} = tf!(dtm, Array{Float64}(undef, size(dtm)...))
-
-tf(dtm::SparseMatrixCSC{T}) where {T <: Real} =  tf!(dtm, similar(dtm, Float64))
-
-tf!(dtm::AbstractMatrix{T}) where {T <: Real} = tf!(dtm, dtm)
-
-tf!(dtm::SparseMatrixCSC{T}) where {T <: Real} = tf!(dtm, dtm)
+Overwrite `tf` with the term frequency of the `dtm`.
 
-tf(dtm::DocumentTermMatrix) = tf(dtm.dtm)
+Works correctly if `dtm` and `tf` are same matrix.
 
-# The second Matrix will be overwritten with the result
-# Will work correctly if dtm and tfidf are the same matrix
+See also: [`tf`](@ref), [`tf_idf`](@ref), [`tf_idf!`](@ref)
+"""
 function tf!(dtm::AbstractMatrix{T1}, tf::AbstractMatrix{T2}) where {T1 <: Real, T2 <: AbstractFloat}
     n, p = size(dtm)
 
@@ -31,7 +22,15 @@ function tf!(dtm::AbstractMatrix{T1}, tf::AbstractMatrix{T2}) where {T1 <: Real,
     return tf
 end
 
-# assumes second matrix has same nonzeros as first one
+"""
+    tf!(dtm::SparseMatrixCSC{Real}, tf::SparseMatrixCSC{AbstractFloat})
+
+Overwrite `tf` with the term frequency of the `dtm`.
+
+`tf` should have the has same nonzeros as `dtm`.
+
+See also: [`tf`](@ref), [`tf_idf`](@ref), [`tf_idf!`](@ref)
+"""
 function tf!(dtm::SparseMatrixCSC{T}, tf::SparseMatrixCSC{F}) where {T <: Real, F <: AbstractFloat}
     rows = rowvals(dtm)
     dtmvals = nonzeros(dtm)
@@ -48,31 +47,61 @@ function tf!(dtm::SparseMatrixCSC{T}, tf::SparseMatrixCSC{F}) where {T <: Real,
           tfvals[j] = dtmvals[j] / max(words_in_documents[row], one(T))
        end
     end
-    tf
+    return tf
 end
 
-##############################################################################
-#
-# TF-IDF
-#
-##############################################################################
+tf!(dtm::AbstractMatrix{T}) where {T <: Real} = tf!(dtm, dtm)
 
-tf_idf(dtm::Matrix{T}) where {T <: Real} = tf_idf!(dtm, Array{Float64}(undef, size(dtm)...))
+tf!(dtm::SparseMatrixCSC{T}) where {T <: Real} = tf!(dtm, dtm)
 
-tf_idf(dtm::SparseMatrixCSC{T}) where {T <: Real} =  tf_idf!(dtm, similar(dtm, Float64))
+"""
+    tf(dtm::DocumentTermMatrix)
+    tf(dtm::SparseMatrixCSC{Real})
+    tf(dtm::Matrix{Real})
 
-tf_idf!(dtm::AbstractMatrix{T}) where {T <: Real} = tf_idf!(dtm, dtm)
+Compute the `term-frequency` of the input.
 
-tf_idf!(dtm::SparseMatrixCSC{T}) where {T <: Real} = tf_idf!(dtm, dtm)
+# Example
 
-tf_idf(dtm::DocumentTermMatrix) = tf_idf(dtm.dtm)
+```julia-repl
+julia> crps = Corpus([StringDocument("To be or not to be"),
+              StringDocument("To become or not to become")])
 
-# This does not make sense, since DocumentTermMatrix is based on an array of integers
-#tf_idf!(dtm::DocumentTermMatrix) = tf_idf!(dtm.dtm)
+julia> update_lexicon!(crps)
+
+julia> m = DocumentTermMatrix(crps)
+
+julia> tf(m)
+2×6 SparseArrays.SparseMatrixCSC{Float64,Int64} with 10 stored entries:
+  [1, 1]  =  0.166667
+  [2, 1]  =  0.166667
+  [1, 2]  =  0.333333
+  [2, 3]  =  0.333333
+  [1, 4]  =  0.166667
+  [2, 4]  =  0.166667
+  [1, 5]  =  0.166667
+  [2, 5]  =  0.166667
+  [1, 6]  =  0.166667
+  [2, 6]  =  0.166667
+```
+
+See also: [`tf!`](@ref), [`tf_idf`](@ref), [`tf_idf!`](@ref)
+"""
+tf(dtm::DocumentTermMatrix) = tf(dtm.dtm)
+
+tf(dtm::Matrix{T}) where {T <: Real} = tf!(dtm, Array{Float64}(undef, size(dtm)...))
+
+tf(dtm::SparseMatrixCSC{T}) where {T <: Real} =  tf!(dtm, similar(dtm, Float64))
+
+"""
+    tf_idf!(dtm::AbstractMatrix{Real}, tf_idf::AbstractMatrix{AbstractFloat})
 
+Overwrite `tf_idf` with the tf-idf (Term Frequency - Inverse Doc Frequency) of the `dtm`.
 
-# The second Matrix will be overwritten with the result
-# Will work correctly if dtm and tfidf are the same matrix
+`dtm` and `tf-idf` must be matrices of same dimensions.
+
+See also: [`tf`](@ref), [`tf!`](@ref) , [`tf_idf`](@ref)
+"""
 function tf_idf!(dtm::AbstractMatrix{T1}, tfidf::AbstractMatrix{T2}) where {T1 <: Real, T2 <: AbstractFloat}
     n, p = size(dtm)
 
@@ -93,7 +122,15 @@ function tf_idf!(dtm::AbstractMatrix{T1}, tfidf::AbstractMatrix{T2}) where {T1 <
     return tfidf
 end
 
-# sparse version
+"""
+    tf_idf!(dtm::SparseMatrixCSC{Real}, tfidf::SparseMatrixCSC{AbstractFloat})
+
+Overwrite `tfidf` with the tf-idf (Term Frequency - Inverse Doc Frequency) of the `dtm`.
+
+The arguments must have same number of nonzeros.
+
+See also: [`tf`](@ref), [`tf_idf`](@ref), [`tf_idf!`](@ref)
+"""
 function tf_idf!(dtm::SparseMatrixCSC{T}, tfidf::SparseMatrixCSC{F}) where {T <: Real, F <: AbstractFloat}
     rows = rowvals(dtm)
     dtmvals = nonzeros(dtm)
@@ -117,5 +154,63 @@ function tf_idf!(dtm::SparseMatrixCSC{T}, tfidf::SparseMatrixCSC{F}) where {T <:
        end
     end
 
-    tfidf
+    return tfidf
 end
+
+"""
+    tf_idf!(dtm)
+
+Compute tf-idf for `dtm`
+"""
+tf_idf!(dtm::AbstractMatrix{T}) where {T <: Real} = tf_idf!(dtm, dtm)
+
+tf_idf!(dtm::SparseMatrixCSC{T}) where {T <: Real} = tf_idf!(dtm, dtm)
+
+# This does not make sense, since DocumentTermMatrix is based on an array of integers
+#tf_idf!(dtm::DocumentTermMatrix) = tf_idf!(dtm.dtm)
+
+"""
+    tf(dtm::DocumentTermMatrix)
+    tf(dtm::SparseMatrixCSC{Real})
+    tf(dtm::Matrix{Real})
+
+Compute `tf-idf` value (Term Frequency - Inverse Document Frequency) for the input.
+
+In many cases, raw word counts are not appropriate for use because:
+
+- Some documents are longer than other documents
+- Some words are more frequent than other words
+
+A simple workaround this can be done by performing `TF-IDF` on a `DocumentTermMatrix`
+
+# Example
+
+```julia-repl
+julia> crps = Corpus([StringDocument("To be or not to be"),
+              StringDocument("To become or not to become")])
+
+julia> update_lexicon!(crps)
+
+julia> m = DocumentTermMatrix(crps)
+
+julia> tf_idf(m)
+2×6 SparseArrays.SparseMatrixCSC{Float64,Int64} with 10 stored entries:
+  [1, 1]  =  0.0
+  [2, 1]  =  0.0
+  [1, 2]  =  0.231049
+  [2, 3]  =  0.231049
+  [1, 4]  =  0.0
+  [2, 4]  =  0.0
+  [1, 5]  =  0.0
+  [2, 5]  =  0.0
+  [1, 6]  =  0.0
+  [2, 6]  =  0.0
+```
+
+See also: [`tf!`](@ref), [`tf_idf`](@ref), [`tf_idf!`](@ref)
+"""
+tf_idf(dtm::DocumentTermMatrix) = tf_idf(dtm.dtm)
+
+tf_idf(dtm::SparseMatrixCSC{T}) where {T <: Real} =  tf_idf!(dtm, similar(dtm, Float64))
+
+tf_idf(dtm::Matrix{T}) where {T <: Real} = tf_idf!(dtm, Array{Float64}(undef, size(dtm)...))
diff --git a/src/tokenizer.jl b/src/tokenizer.jl
index 044011f2..78ef1c9f 100644
--- a/src/tokenizer.jl
+++ b/src/tokenizer.jl
@@ -1,9 +1,37 @@
-##############################################################################
-#
-# Split string into tokens on whitespace
-#
-##############################################################################
+"""
+    tokenize(language, str)
 
+Split `str` into words and other tokens such as punctuation.
+
+# Example
+
+```julia-repl
+julia> tokenize(Languages.English(), "Too foo words!")
+4-element Array{String,1}:
+ "Too"
+ "foo"
+ "words"
+ "!"
+```
+
+See also: [`sentence_tokenize`](@ref)
+"""
 tokenize(lang::S, s::T) where {S <: Language, T <: AbstractString} = WordTokenizers.tokenize(s)
 
+
+"""
+    sentence_tokenize(language, str)
+
+Split `str` into sentences.
+
+# Example
+```julia-repl
+julia> sentence_tokenize(Languages.English(), "Here are few words! I am Foo Bar.")
+2-element Array{SubString{String},1}:
+ "Here are few words!"
+ "I am Foo Bar."
+```
+
+See also: [`tokenize`](@ref)
+"""
 sentence_tokenize(lang::S, s::T) where {S <: Language, T<:AbstractString} = WordTokenizers.split_sentences(s)