Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ssh transport : adapt for subclassing #4363

Merged
merged 13 commits into from
Sep 23, 2020
85 changes: 69 additions & 16 deletions aiida/transports/plugins/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,29 @@ def open(self):
)
raise

# Open also a SFTPClient
self._sftp = self._client.open_sftp()
# Set the current directory to a explicit path, and not to None
self._sftp.chdir(self._sftp.normalize('.'))
# Open also a File transport client. SFTP by default, pure SSH in ssh_only
self.open_file_transport()

return self

def open_file_transport(self):
"""
Open the SFTP channel, and handle error by directing customer to try another transport
"""
from aiida.common.exceptions import InvalidOperation
from paramiko.ssh_exception import SSHException
try:
self._sftp = self._client.open_sftp()
except SSHException:
raise InvalidOperation(
'Error in ssh transport plugin. This may be due to the remote computer not supporting SFTP. '
'Try setting it up with the aiida.transports:ssh_only transport from the aiida-sshonly plugin instead.'
)

self._is_open = True

return self
# Set the current directory to a explicit path, and not to None
self._sftp.chdir(self._sftp.normalize('.'))

def close(self):
"""
Expand Down Expand Up @@ -521,7 +536,7 @@ def chdir(self, path):
# Note: I don't store the result of the function; if I have no
# read permissions, this will raise an exception.
try:
self.sftp.stat('.')
self.stat('.')
except IOError as exc:
if 'Permission denied' in str(exc):
self.chdir(old_path)
Expand All @@ -533,6 +548,35 @@ def normalize(self, path='.'):
"""
return self.sftp.normalize(path)

def stat(self, path):
"""
Retrieve information about a file on the remote system. The return
value is an object whose attributes correspond to the attributes of
Python's ``stat`` structure as returned by ``os.stat``, except that it
contains fewer fields.
The fields supported are: ``st_mode``, ``st_size``, ``st_uid``,
``st_gid``, ``st_atime``, and ``st_mtime``.

:param str path: the filename to stat

:return: a `paramiko.sftp_attr.SFTPAttributes` object containing
attributes about the given file.
"""
return self.sftp.stat(path)

def lstat(self, path):
"""
Retrieve information about a file on the remote system, without
following symbolic links (shortcuts). This otherwise behaves exactly
the same as `stat`.

:param str path: the filename to stat

:return: a `paramiko.sftp_attr.SFTPAttributes` object containing
attributes about the given file.
"""
return self.sftp.lstat(path)

def getcwd(self):
"""
Return the current working directory for this SFTP session, as
Expand Down Expand Up @@ -663,7 +707,7 @@ def isdir(self, path):
if not path:
return False
try:
return S_ISDIR(self.sftp.stat(path).st_mode)
return S_ISDIR(self.stat(path).st_mode)
except IOError as exc:
if getattr(exc, 'errno', None) == 2:
# errno=2 means path does not exist: I return False
Expand Down Expand Up @@ -842,7 +886,7 @@ def puttree(self, localpath, remotepath, callback=None, dereference=True, overwr
this_basename = os.path.relpath(path=this_source[0], start=localpath)

try:
self.sftp.stat(os.path.join(remotepath, this_basename))
self.stat(os.path.join(remotepath, this_basename))
except IOError as exc:
import errno
if exc.errno == errno.ENOENT: # Missing file
Expand Down Expand Up @@ -1010,7 +1054,7 @@ def get_attribute(self, path):
"""
from aiida.transports.util import FileAttribute

paramiko_attr = self.sftp.lstat(path)
paramiko_attr = self.lstat(path)
aiida_attr = FileAttribute()
# map the paramiko class into the aiida one
# note that paramiko object contains more informations than the aiida
Expand Down Expand Up @@ -1183,11 +1227,11 @@ def isfile(self, path):
try:
self.logger.debug(
"stat for path '{}' ('{}'): {} [{}]".format(
path, self.sftp.normalize(path), self.sftp.stat(path),
self.sftp.stat(path).st_mode
path, self.normalize(path), self.stat(path),
self.stat(path).st_mode
)
)
return S_ISREG(self.sftp.stat(path).st_mode)
return S_ISREG(self.stat(path).st_mode)
except IOError as exc:
if getattr(exc, 'errno', None) == 2:
# errno=2 means path does not exist: I return False
Expand Down Expand Up @@ -1228,7 +1272,7 @@ def _exec_command_internal(self, command, combine_stderr=False, bufsize=-1): #
else:
command_to_execute = command

self.logger.debug('Command to be executed: {}'.format(command_to_execute))
self.logger.debug('Command to be executed: {}'.format(command_to_execute[:1000]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the rest of the re-organization and wrapping of methods seems ok to me, but this is the only modification that I am unsure about. Can't you be loosing important log info like this? I would also maybe ask for the input of @giovannipizzi and/or @sphuber in this, since they probably know better what to expect inside that command and why is being logged.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also interested to know why @adegomme thinks this should be cropped.

Copy link
Contributor Author

@adegomme adegomme Sep 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To send a file with the sshonly plugin, the resulting command is equivalent to ssh remote "echo '$(cat localfile)' | cat > remotefile" , with the '$(cat localfile)' being a python read instead, as I never managed to have the cat version behave correctly.
This logger call means that the whole content of the file is printed in the log :
example when making a 'verdi computer test'
09/17/2020 10:17:12 AM <1120861> aiida.transport.SshOnlyTransport: [DEBUG] Command to be executed: cd '/path/aiida' && echo 'Test from '"'"'verdi computer test'"'"' on 2020-09-17T10:17:05.830707' | cat > /path/aiida/tmpbtteknws

So this adds the whole file content to the log output when debug is activated, which is not great. Every input file, structure file, pseudopotential file gets printed in the stdout or the log when debug is on, and when several MB get dumped on you, this can be troublesome.
Cropping is meant to avoid ruining the log and the terminal. 1000 char is a bit arbitrary (but seemed to be large enough to not affect other messages in my tests), and maybe we could add a comment when it's cropped ( simply "...", or "cropped to 1000 chars to maintain readability") to avoid confusion.
Another idea would be to have an optional parameter on _exec_command_internal to disable this log message, but you would not see any file upload log in this case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks for the explanation. In any case, even if we keep this, we should definitely comment this, because it is not obvious at all, especially given that the actual use case comes from a subclass that lives in another plugin package. @giovannipizzi do you see a way around this? If we make the cropping size an optional argument to the function, at least SshOnly can override the copy file method to call _exec_command_internal to pass max_log_size=1000 or something to that effect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After discussing with @sphuber I think a good solution is to have a class attribute, e.g. _MAX_EXEC_COMMAND_LOG_SIZE=None in this base class.
Here the logic would not crop if self._MAX_EXEC_COMMAND_LOG_SIZE is None, otherwise it would crop to this length (command_to_execute[:self._MAX_EXEC_COMMAND_LOG_SIZE]).

In this way the current behaviour is unchanged, and in your subclass @adegomme you can just set _MAX_EXEC_COMMAND_LOG_SIZE to the value you want.

@adegomme what do you think? Are you OK to do this small change? After, it's ready to merge for me


# Note: The default shell will eat one level of escaping, while
# 'bash -l -c ...' will eat another. Thus, we need to escape again.
Expand Down Expand Up @@ -1309,6 +1353,15 @@ def gotocomputer_command(self, remotedir):
)
return cmd

def _symlink(self, source, dest):
"""
Wrap SFTP symlink call without breaking API

:param source: source of link
:param dest: link to create
"""
self.sftp.symlink(source, dest)

def symlink(self, remotesource, remotedestination):
"""
Create a symbolic link between the remote source and the remote
Expand All @@ -1330,17 +1383,17 @@ def symlink(self, remotesource, remotedestination):
for this_source in self.glob(source):
# create the name of the link: take the last part of the path
this_dest = os.path.join(remotedestination, os.path.split(this_source)[-1])
self.sftp.symlink(this_source, this_dest)
self._symlink(this_source, this_dest)
else:
self.sftp.symlink(source, dest)
self._symlink(source, dest)

def path_exists(self, path):
"""
Check if path exists
"""
import errno
try:
self.sftp.stat(path)
self.stat(path)
except IOError as exc:
if exc.errno == errno.ENOENT:
return False
Expand Down