diff --git a/mllib/pom.xml b/mllib/pom.xml index a5eeef88e9d62..cfeabe4025de6 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -98,6 +98,11 @@ test-jar test + + org.jpmml + pmml-model + 1.1.7 + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExport.scala new file mode 100644 index 0000000000000..274366208bd36 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExport.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.export + +import java.io.OutputStream + +trait ModelExport { + + def save(outputStream: OutputStream): Unit + +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportFactory.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportFactory.scala new file mode 100644 index 0000000000000..afce4e305aaac --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportFactory.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.export + +import org.apache.spark.mllib.clustering.KMeansModel +import org.apache.spark.mllib.export.pmml.KMeansPMMLModelExport + +object ModelExportFactory { + + //TODO: introduce model export typed + + def createModelExport(model: Any): ModelExport = model match { + case kmeans: KMeansModel => new KMeansPMMLModelExport + case _ => throw new IllegalArgumentException("Export not supported for model " + model.getClass) + } + +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportType.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportType.scala new file mode 100644 index 0000000000000..5abb7d6bb4e71 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/ModelExportType.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.export + +object ModelExportType extends Enumeration{ + + type ModelExportType = Value + val PMML = Value + +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala new file mode 100644 index 0000000000000..f53443e3e646d --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.export.pmml + +class KMeansPMMLModelExport extends PMMLModelExport{ + + populateKMeansPMML(); + + def populateKMeansPMML(): Unit = { + //TODO: set here header description + pmml.setVersion("testing... kmeans..."); + //TODO: generate the model... + } + +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/PMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/PMMLModelExport.scala new file mode 100644 index 0000000000000..42203e6b9291a --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/PMMLModelExport.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.export.pmml + +import org.apache.spark.mllib.export.ModelExport +import java.io.OutputStream +import org.jpmml.model.JAXBUtil +import org.dmg.pmml.PMML +import javax.xml.transform.stream.StreamResult +import scala.beans.BeanProperty + +trait PMMLModelExport extends ModelExport{ + + @BeanProperty + var pmml: PMML = new PMML(); + //TODO: set here header app copyright and timestamp + + def save(outputStream: OutputStream): Unit = { + JAXBUtil.marshalPMML(pmml, new StreamResult(outputStream)); + } + +}