diff --git a/docs/_data/menu-ml.yaml b/docs/_data/menu-ml.yaml
index 8e366f7f029aa..8b43c75f511fb 100644
--- a/docs/_data/menu-ml.yaml
+++ b/docs/_data/menu-ml.yaml
@@ -1,7 +1,7 @@
- text: Basic statistics
url: ml-statistics.html
- text: Data sources
- url: ml-datasource
+ url: ml-datasource.html
- text: Pipelines
url: ml-pipeline.html
- text: Extracting, transforming and selecting features
diff --git a/docs/ml-datasource.md b/docs/ml-datasource.md
index 71bec9c798ee1..5dc2d057a9163 100644
--- a/docs/ml-datasource.md
+++ b/docs/ml-datasource.md
@@ -63,7 +63,7 @@ scala> df.select("image.origin", "image.width", "image.height").show(truncate=fa
[`ImageDataSource`](api/java/org/apache/spark/ml/source/image/ImageDataSource.html)
-implements Spark SQL data source API for loading image data as DataFrame.
+implements Spark SQL data source API for loading image data as a DataFrame.
{% highlight java %}
Dataset imagesDF = spark.read().format("image").option("dropInvalid", true).load("data/mllib/images/origin/kittens");
@@ -83,7 +83,7 @@ Will output:
-In PySpark we provide Spark SQL data source API for loading image data as DataFrame.
+In PySpark we provide Spark SQL data source API for loading image data as a DataFrame.
{% highlight python %}
>>> df = spark.read.format("image").option("dropInvalid", true).load("data/mllib/images/origin/kittens")
@@ -100,7 +100,7 @@ In PySpark we provide Spark SQL data source API for loading image data as DataFr
-In SparkR we provide Spark SQL data source API for loading image data as DataFrame.
+In SparkR we provide Spark SQL data source API for loading image data as a DataFrame.
{% highlight r %}
> df = read.df("data/mllib/images/origin/kittens", "image")
@@ -120,4 +120,118 @@ In SparkR we provide Spark SQL data source API for loading image data as DataFra
+
+
+
+## LIBSVM data source
+
+This `LIBSVM` data source is used to load 'libsvm' type files from a directory.
+The loaded DataFrame has two columns: label containing labels stored as doubles and features containing feature vectors stored as Vectors.
+The schemas of the columns are:
+ - label: `DoubleType` (represents the instance label)
+ - features: `VectorUDT` (represents the feature vector)
+
+
+
+[`LibSVMDataSource`](api/scala/index.html#org.apache.spark.ml.source.libsvm.LibSVMDataSource)
+implements a Spark SQL data source API for loading `LIBSVM` data as a DataFrame.
+
+{% highlight scala %}
+scala> val df = spark.read.format("libsvm").option("numFeatures", "780").load("data/mllib/sample_libsvm_data.txt")
+df: org.apache.spark.sql.DataFrame = [label: double, features: vector]
+
+scala> df.show(10)
++-----+--------------------+
+|label| features|
++-----+--------------------+
+| 0.0|(780,[127,128,129...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[124,125,126...|
+| 1.0|(780,[152,153,154...|
+| 1.0|(780,[151,152,153...|
+| 0.0|(780,[129,130,131...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[99,100,101,...|
+| 0.0|(780,[154,155,156...|
+| 0.0|(780,[127,128,129...|
++-----+--------------------+
+only showing top 10 rows
+{% endhighlight %}
+
+
+
+[`LibSVMDataSource`](api/java/org/apache/spark/ml/source/libsvm/LibSVMDataSource.html)
+implements Spark SQL data source API for loading `LIBSVM` data as a DataFrame.
+
+{% highlight java %}
+Dataset df = spark.read.format("libsvm").option("numFeatures", "780").load("data/mllib/sample_libsvm_data.txt");
+df.show(10);
+/*
+Will output:
++-----+--------------------+
+|label| features|
++-----+--------------------+
+| 0.0|(780,[127,128,129...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[124,125,126...|
+| 1.0|(780,[152,153,154...|
+| 1.0|(780,[151,152,153...|
+| 0.0|(780,[129,130,131...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[99,100,101,...|
+| 0.0|(780,[154,155,156...|
+| 0.0|(780,[127,128,129...|
++-----+--------------------+
+only showing top 10 rows
+*/
+{% endhighlight %}
+
+
+
+In PySpark we provide Spark SQL data source API for loading `LIBSVM` data as a DataFrame.
+
+{% highlight python %}
+>>> df = spark.read.format("libsvm").option("numFeatures", "780").load("data/mllib/sample_libsvm_data.txt")
+>>> df.show(10)
++-----+--------------------+
+|label| features|
++-----+--------------------+
+| 0.0|(780,[127,128,129...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[124,125,126...|
+| 1.0|(780,[152,153,154...|
+| 1.0|(780,[151,152,153...|
+| 0.0|(780,[129,130,131...|
+| 1.0|(780,[158,159,160...|
+| 1.0|(780,[99,100,101,...|
+| 0.0|(780,[154,155,156...|
+| 0.0|(780,[127,128,129...|
++-----+--------------------+
+only showing top 10 rows
+{% endhighlight %}
+
+
+
+In SparkR we provide Spark SQL data source API for loading `LIBSVM` data as a DataFrame.
+
+{% highlight r %}
+> df = read.df("data/mllib/sample_libsvm_data.txt", "libsvm")
+> head(select(df, df$label, df$features), 10)
+
+ label features
+1 0
+2 1
+3 1
+4 1
+5 1
+6 0
+7 1
+8 1
+9 0
+10 0
+
+{% endhighlight %}
+
+
+