Skip to content

Commit

Permalink
Add HA functionality to WebHdfsClient (#2230)
Browse files Browse the repository at this point in the history
Since version 2.1.0, the hdfs package allows to specify multiple
namenodes (HA functionality), by passing an url containing multiple
namenodes separated by ';'. This functionality is forwarded through to
WebHdfsClient in this commit.
  • Loading branch information
adaitche authored and Tarrasch committed Oct 1, 2017
1 parent 14517c1 commit f6206ee
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion luigi/contrib/hdfs/webhdfs_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,19 @@ class WebHdfsClient(hdfs_abstract_client.HdfsFileSystem):
The library is using `this api
<https://hdfscli.readthedocs.io/en/latest/api.html>`__.
"""

def __init__(self, host=None, port=None, user=None):
self.host = host or hdfs_config.hdfs().namenode_host
self.port = port or webhdfs().port
self.user = user or webhdfs().user or os.environ['USER']

@property
def url(self):
return 'http://' + self.host + ':' + str(self.port)
# the hdfs package allows it to specify multiple namenodes by passing a string containing
# multiple namenodes separated by ';'
hosts = self.host.split(";")
urls = ['http://' + host + ':' + str(self.port) for host in hosts]
return ";".join(urls)

@property
def client(self):
Expand Down

0 comments on commit f6206ee

Please sign in to comment.