From 037eb71bd69a9d203c7de7e0527748b5cd3d9211 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 27 Feb 2018 00:08:16 +0300 Subject: [PATCH] Document ways for starting Luigi inside Python code (#2301) 1. Rename command_line file to running_luigi 2. Add description how to start luigi tasks using luigi.build function from luigi.interface module --- doc/command_line.rst | 38 --------------- doc/index.rst | 2 +- doc/running_luigi.rst | 109 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 39 deletions(-) delete mode 100644 doc/command_line.rst create mode 100644 doc/running_luigi.rst diff --git a/doc/command_line.rst b/doc/command_line.rst deleted file mode 100644 index 552aaba18f..0000000000 --- a/doc/command_line.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. _CommandLine: - -Running from the Command Line -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The prefered way to run Luigi tasks is through the ``luigi`` command line tool -that will be installed with the pip package. - -.. code-block:: python - - # my_module.py, available in your sys.path - import luigi - - class MyTask(luigi.Task): - x = luigi.IntParameter() - y = luigi.IntParameter(default=45) - - def run(self): - print self.x + self.y - -Should be run like this - -.. code-block:: console - - $ luigi --module my_module MyTask --x 123 --y 456 --local-scheduler - -Or alternatively like this: - -.. code-block:: console - - $ python -m luigi --module my_module MyTask --x 100 --local-scheduler - -Note that if a parameter name contains '_', it should be replaced by '-'. -For example, if MyTask had a parameter called 'my_parameter': - -.. code-block:: console - - $ luigi --module my_module MyTask --my-parameter 100 --local-scheduler diff --git a/doc/index.rst b/doc/index.rst index a92bdf662a..477d189dbb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -15,7 +15,7 @@ Table of Contents workflows.rst tasks.rst parameters.rst - command_line.rst + running_luigi.rst central_scheduler.rst execution_model.rst luigi_patterns.rst diff --git a/doc/running_luigi.rst b/doc/running_luigi.rst new file mode 100644 index 0000000000..46dc8c269f --- /dev/null +++ b/doc/running_luigi.rst @@ -0,0 +1,109 @@ +.. _RunningLuigi: + +Running from the Command Line +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The prefered way to run Luigi tasks is through the ``luigi`` command line tool +that will be installed with the pip package. + +.. code-block:: python + + # my_module.py, available in your sys.path + import luigi + + class MyTask(luigi.Task): + x = luigi.IntParameter() + y = luigi.IntParameter(default=45) + + def run(self): + print self.x + self.y + +Should be run like this + +.. code-block:: console + + $ luigi --module my_module MyTask --x 123 --y 456 --local-scheduler + +Or alternatively like this: + +.. code-block:: console + + $ python -m luigi --module my_module MyTask --x 100 --local-scheduler + +Note that if a parameter name contains '_', it should be replaced by '-'. +For example, if MyTask had a parameter called 'my_parameter': + +.. code-block:: console + + $ luigi --module my_module MyTask --my-parameter 100 --local-scheduler + + +Running from Python code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Another way to start tasks from Python code is using ``luigi.build(tasks, worker_scheduler_factory=None, **env_params)`` +from ``luigi.interface`` module. + +This way of running luigi tasks is useful if you want to get some dynamic parameters from another +source, such as database, or provide additional logic before you start tasks. + +One notable difference is that ``build`` defaults to not using the identical process lock. +If you want to change this behaviour, just pass ``no_lock=False``. + + +.. code-block:: python + + class MyTask1(luigi.Task): + x = luigi.IntParameter() + y = luigi.IntParameter(default=0) + + def run(self): + print self.x + self.y + + + class MyTask2(luigi.Task): + x = luigi.IntParameter() + y = luigi.IntParameter(default=1) + z = luigi.IntParameter(default=2) + + def run(self): + print self.x * self.y * self.z + + + if __name__ == '__main__': + luigi.build([MyTask1(x=10), MyTask2(x=15, z=3)]) + + +Also, it is possible to pass additional parameters to ``build`` such as host, port, workers and local_scheduler: + +.. code-block:: python + + if __name__ == '__main__': + luigi.build([MyTask1(x=1)], worker=5) + +To achieve some special requirements you can pass to ``build`` your ``worker_scheduler_factory`` +which will return your worker and\or scheduler implementations: + +.. code-block:: python + + class MyWorker(Worker): + # some custom logic + + + class MyFactory(object): + + def create_local_scheduler(self): + return scheduler.Scheduler(prune_on_get_work=True, record_task_history=False) + + def create_remote_scheduler(self, url): + return rpc.RemoteScheduler(url) + + def create_worker(self, scheduler, worker_processes, assistant=False): + # return your worker instance + return MyWorker( + scheduler=scheduler, worker_processes=worker_processes, assistant=assistant) + + + if __name__ == '__main__': + luigi.build([MyTask1(x=1), worker_scheduler_factory=MyFactory()) + +In some cases (like task queue) it may be useful.