From acd5532d0fb755bcb26eee76744bc77e50ca8fe3 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 30 Aug 2024 12:01:52 +0200 Subject: [PATCH 01/10] sftp: Fix downloading files Fixes: #341 Signed-off-by: Jakub Jelen --- src/pylibsshext/sftp.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pylibsshext/sftp.pyx b/src/pylibsshext/sftp.pyx index 6331c529d..98077a00b 100644 --- a/src/pylibsshext/sftp.pyx +++ b/src/pylibsshext/sftp.pyx @@ -100,7 +100,7 @@ cdef class SFTP: raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - with open(local_file, 'wb+') as f: + with open(local_file, 'ab') as f: bytes_written = f.write(read_buffer[:file_data]) if bytes_written and file_data != bytes_written: sftp.sftp_close(rf) From 4516fab71b7ecdb3ae1fc3ef9b4bef133c7385b7 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 30 Aug 2024 11:20:00 +0200 Subject: [PATCH 02/10] tests: Reproducer for #341 Signed-off-by: Jakub Jelen --- tests/unit/sftp_test.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index fc70c4512..88add8a56 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -2,6 +2,8 @@ """Tests suite for sftp.""" +import random +import string import uuid import pytest @@ -63,3 +65,29 @@ def test_get(dst_path, src_path, sftp_session, transmit_payload): """Check that SFTP file download works.""" sftp_session.get(str(src_path), str(dst_path)) assert dst_path.read_bytes() == transmit_payload + + +@pytest.fixture +def large_payload(): + """Generate a large 1025 byte (1024 + 1B) test payload.""" + payload_len = 1024 + 1 + random_bytes = [ord(random.choice(string.printable)) for _ in range(payload_len)] + return bytes(random_bytes) + + +@pytest.fixture +def src_path_large(tmp_path, large_payload): + """Return a remote path to a 1025 byte-sized file. + + The pylibssh chunk size is 1024 so the test needs a file that would + execute at least two loops. + """ + path = tmp_path / 'large.txt' + path.write_bytes(large_payload) + return path + + +def test_get_large(dst_path, src_path_large, sftp_session, large_payload): + """Check that SFTP can download large file.""" + sftp_session.get(str(src_path_large), str(dst_path)) + assert dst_path.read_bytes() == large_payload From 0e4b3c084963cb5313951baa14618de7efabe8f1 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 30 Aug 2024 11:23:57 +0200 Subject: [PATCH 03/10] tests: Symmetric reproducer for upload Signed-off-by: Jakub Jelen --- tests/unit/sftp_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index 88add8a56..1572c0d66 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -87,6 +87,12 @@ def src_path_large(tmp_path, large_payload): return path +def test_put_large(dst_path, src_path_large, sftp_session, large_payload): + """Check that SFTP can upload large file.""" + sftp_session.put(str(src_path_large), str(dst_path)) + assert dst_path.read_bytes() == large_payload + + def test_get_large(dst_path, src_path_large, sftp_session, large_payload): """Check that SFTP can download large file.""" sftp_session.get(str(src_path_large), str(dst_path)) From beb8e461aaed31a4d18ea5a539cc67f23807ebc7 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 15 Nov 2024 14:08:48 +0100 Subject: [PATCH 04/10] sftp: Fix overwriting existing files while downloading Thanks @kucharskim for the report and testing! Signed-off-by: Jakub Jelen --- src/pylibsshext/sftp.pyx | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/pylibsshext/sftp.pyx b/src/pylibsshext/sftp.pyx index 98077a00b..0b42ff8f4 100644 --- a/src/pylibsshext/sftp.pyx +++ b/src/pylibsshext/sftp.pyx @@ -89,18 +89,19 @@ cdef class SFTP: rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_RDONLY, sftp.S_IRWXU) if rf is NULL: - raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - - while True: - file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * 1024) - if file_data == 0: - break - elif file_data < 0: - sftp.sftp_close(rf) - raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" - % (remote_file, self._get_sftp_error_str())) - - with open(local_file, 'ab') as f: + raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + + with open(local_file, 'wb') as f: + while True: + file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * 1024) + if file_data == 0: + break + elif file_data < 0: + sftp.sftp_close(rf) + raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + bytes_written = f.write(read_buffer[:file_data]) if bytes_written and file_data != bytes_written: sftp.sftp_close(rf) From 708ab289d113677893559884bffff7b1ee71269a Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 15 Nov 2024 14:09:43 +0100 Subject: [PATCH 05/10] tests: Reproduer for overriding existing files Signed-off-by: Jakub Jelen --- tests/unit/sftp_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index 1572c0d66..93446bd9f 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -50,6 +50,22 @@ def dst_path(file_paths_pair): return path +@pytest.fixture +def other_payload(): + """Generate a binary test payload.""" + uuid_name = uuid.uuid4() + return 'Original content: {name!s}'.format(name=uuid_name).encode() + + +@pytest.fixture +def dst_exists_path(file_paths_pair, other_payload): + """Return a data destination path.""" + path = file_paths_pair[1] + path.write_bytes(other_payload) + assert path.exists() + return path + + def test_make_sftp(sftp_session): """Smoke-test SFTP instance creation.""" assert sftp_session @@ -67,6 +83,12 @@ def test_get(dst_path, src_path, sftp_session, transmit_payload): assert dst_path.read_bytes() == transmit_payload +def test_get_existing(dst_exists_path, src_path, sftp_session, transmit_payload): + """Check that SFTP file download works when target file exists.""" + sftp_session.get(str(src_path), str(dst_exists_path)) + assert dst_exists_path.read_bytes() == transmit_payload + + @pytest.fixture def large_payload(): """Generate a large 1025 byte (1024 + 1B) test payload.""" From 2bf0fb30d782f15ccb711c629347b7da1de7f04c Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 15 Nov 2024 14:11:09 +0100 Subject: [PATCH 06/10] tests: Symetric test for upload to override existing file Signed-off-by: Jakub Jelen --- tests/unit/sftp_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index 93446bd9f..1822522dc 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -89,6 +89,12 @@ def test_get_existing(dst_exists_path, src_path, sftp_session, transmit_payload) assert dst_exists_path.read_bytes() == transmit_payload +def test_put_existing(dst_exists_path, src_path, sftp_session, transmit_payload): + """Check that SFTP file download works when target file exists.""" + sftp_session.put(str(src_path), str(dst_exists_path)) + assert dst_exists_path.read_bytes() == transmit_payload + + @pytest.fixture def large_payload(): """Generate a large 1025 byte (1024 + 1B) test payload.""" From e219a995e0cd0bb0cfae44318ca345a0fff15328 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Fri, 30 Aug 2024 13:21:44 +0200 Subject: [PATCH 07/10] Add history fragment Signed-off-by: Jakub Jelen --- docs/changelog-fragments/638.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changelog-fragments/638.bugfix.rst diff --git a/docs/changelog-fragments/638.bugfix.rst b/docs/changelog-fragments/638.bugfix.rst new file mode 100644 index 000000000..4c4849c70 --- /dev/null +++ b/docs/changelog-fragments/638.bugfix.rst @@ -0,0 +1 @@ +Fixed large sftp reads and proper overriding existing files -- by :user:`Jakuje`. From 0e9c2fd64e8d8bf8606152e1db3d00a462823686 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Tue, 19 Nov 2024 11:27:46 +0100 Subject: [PATCH 08/10] sftp: Increase SFTP chunk size to specification-recommended 32kB Signed-off-by: Jakub Jelen --- src/pylibsshext/includes/libssh.pxd | 4 ++ src/pylibsshext/includes/sftp.pxd | 32 ++++++++++++++- src/pylibsshext/sftp.pyx | 60 +++++++++++++++++++---------- 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/pylibsshext/includes/libssh.pxd b/src/pylibsshext/includes/libssh.pxd index 3219f1cd9..673d7eef1 100644 --- a/src/pylibsshext/includes/libssh.pxd +++ b/src/pylibsshext/includes/libssh.pxd @@ -27,6 +27,10 @@ cdef extern from "libssh/libssh.h" nogil: pass ctypedef ssh_session_struct* ssh_session + cdef struct ssh_string_struct: + pass + ctypedef ssh_string_struct* ssh_string + cdef struct ssh_key_struct: pass ctypedef ssh_key_struct* ssh_key diff --git a/src/pylibsshext/includes/sftp.pxd b/src/pylibsshext/includes/sftp.pxd index 9d3db9310..94376578d 100644 --- a/src/pylibsshext/includes/sftp.pxd +++ b/src/pylibsshext/includes/sftp.pxd @@ -17,7 +17,9 @@ # from posix.types cimport mode_t -from pylibsshext.includes.libssh cimport ssh_channel, ssh_session +from libc cimport stdint + +from pylibsshext.includes.libssh cimport ssh_channel, ssh_session, ssh_string cdef extern from "libssh/sftp.h" nogil: @@ -30,6 +32,31 @@ cdef extern from "libssh/sftp.h" nogil: pass ctypedef sftp_file_struct * sftp_file + struct sftp_attributes_struct: + char *name + char *longname + stdint.uint32_t flags + stdint.uint8_t type + stdint.uint64_t size + stdint.uint32_t uid + stdint.uint32_t gid + char *owner + char *group + stdint.uint32_t permissions + stdint.uint64_t atime64 + stdint.uint32_t atime + stdint.uint32_t atime_nseconds + stdint.uint64_t createtime + stdint.uint32_t createtime_nseconds + stdint.uint64_t mtime64 + stdint.uint32_t mtime + stdint.uint32_t mtime_nseconds + ssh_string acl + stdint.uint32_t extended_count + ssh_string extended_type + ssh_string extended_data + ctypedef sftp_attributes_struct * sftp_attributes + cdef int SSH_FX_OK cdef int SSH_FX_EOF cdef int SSH_FX_NO_SUCH_FILE @@ -55,5 +82,8 @@ cdef extern from "libssh/sftp.h" nogil: ssize_t sftp_read(sftp_file file, const void *buf, size_t count) int sftp_get_error(sftp_session sftp) + sftp_attributes sftp_stat(sftp_session session, const char *path) + + cdef extern from "sys/stat.h" nogil: cdef int S_IRWXU diff --git a/src/pylibsshext/sftp.pyx b/src/pylibsshext/sftp.pyx index 0b42ff8f4..555f0d2ad 100644 --- a/src/pylibsshext/sftp.pyx +++ b/src/pylibsshext/sftp.pyx @@ -18,11 +18,15 @@ from posix.fcntl cimport O_CREAT, O_RDONLY, O_TRUNC, O_WRONLY from cpython.bytes cimport PyBytes_AS_STRING +from cpython.mem cimport PyMem_Free, PyMem_Malloc from pylibsshext.errors cimport LibsshSFTPException from pylibsshext.session cimport get_libssh_session +SFTP_MAX_CHUNK = 32_768 # 32kB + + MSG_MAP = { sftp.SSH_FX_OK: "No error", sftp.SSH_FX_EOF: "End-of-file encountered", @@ -63,7 +67,7 @@ cdef class SFTP: rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_WRONLY | O_CREAT | O_TRUNC, sftp.S_IRWXU) if rf is NULL: raise LibsshSFTPException("Opening remote file [%s] for write failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) while buffer != b"": length = len(buffer) @@ -76,39 +80,55 @@ cdef class SFTP: self._get_sftp_error_str(), ) ) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) sftp.sftp_close(rf) def get(self, remote_file, local_file): cdef sftp.sftp_file rf - cdef char read_buffer[1024] + cdef char *read_buffer = NULL + cdef sftp.sftp_attributes attrs remote_file_b = remote_file if isinstance(remote_file_b, unicode): remote_file_b = remote_file.encode("utf-8") + attrs = sftp.sftp_stat(self._libssh_sftp_session, remote_file_b) + if attrs is NULL: + raise LibsshSFTPException("Failed to stat the remote file [%s]. Error: [%s]" + % (remote_file, self._get_sftp_error_str())) + file_size = attrs.size + rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_RDONLY, sftp.S_IRWXU) if rf is NULL: raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - with open(local_file, 'wb') as f: - while True: - file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * 1024) - if file_data == 0: - break - elif file_data < 0: - sftp.sftp_close(rf) - raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" - % (remote_file, self._get_sftp_error_str())) - - bytes_written = f.write(read_buffer[:file_data]) - if bytes_written and file_data != bytes_written: - sftp.sftp_close(rf) - raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" - " does not match number of bytes [%s] written to local file [%s]" - " due to error [%s]" - % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + try: + with open(local_file, 'wb') as f: + buffer_size = min(SFTP_MAX_CHUNK, file_size) + read_buffer = PyMem_Malloc(buffer_size) + if read_buffer is NULL: + raise LibsshSFTPException("Memory allocation error") + + while True: + file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * buffer_size) + if file_data == 0: + break + elif file_data < 0: + sftp.sftp_close(rf) + raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + + bytes_written = f.write(read_buffer[:file_data]) + if bytes_written and file_data != bytes_written: + sftp.sftp_close(rf) + raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" + " does not match number of bytes [%s] written to local file [%s]" + " due to error [%s]" + % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + finally: + if read_buffer is not NULL: + PyMem_Free(read_buffer) sftp.sftp_close(rf) def close(self): From 70a90cabf0e591764592a12fa89c931a4be94000 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Tue, 19 Nov 2024 11:30:49 +0100 Subject: [PATCH 09/10] tests: Adjust the SFTP tests to new chunk size Signed-off-by: Jakub Jelen --- tests/unit/sftp_test.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index 1822522dc..7a179e109 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -97,17 +97,19 @@ def test_put_existing(dst_exists_path, src_path, sftp_session, transmit_payload) @pytest.fixture def large_payload(): - """Generate a large 1025 byte (1024 + 1B) test payload.""" - payload_len = 1024 + 1 - random_bytes = [ord(random.choice(string.printable)) for _ in range(payload_len)] - return bytes(random_bytes) + """Generate a large 32769 byte (32kB + 1B) test payload.""" + random_char_kilobyte = [ord(random.choice(string.printable)) for _ in range(1024)] + full_bytes_number = 32 + a_32kB_chunk = bytes(random_char_kilobyte * full_bytes_number) + the_last_byte = random.choice(random_char_kilobyte).to_bytes(length=1, byteorder='big') + return a_32kB_chunk + the_last_byte @pytest.fixture def src_path_large(tmp_path, large_payload): - """Return a remote path to a 1025 byte-sized file. + """Return a remote path to a 32769 byte-sized file. - The pylibssh chunk size is 1024 so the test needs a file that would + The pylibssh chunk size is 32769 so the test needs a file that would execute at least two loops. """ path = tmp_path / 'large.txt' From 353c4518c19537a48232afd8debaa6bd46532778 Mon Sep 17 00:00:00 2001 From: Jakub Jelen Date: Tue, 19 Nov 2024 12:53:43 +0100 Subject: [PATCH 10/10] Add changelog fragment Signed-off-by: Jakub Jelen --- docs/changelog-fragments/664.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changelog-fragments/664.bugfix.rst diff --git a/docs/changelog-fragments/664.bugfix.rst b/docs/changelog-fragments/664.bugfix.rst new file mode 100644 index 000000000..cd12b2562 --- /dev/null +++ b/docs/changelog-fragments/664.bugfix.rst @@ -0,0 +1 @@ +Improved performance of SFTP transfers by using larger transfer chunks -- by :user:`Jakuje`.