Skip to content

Commit

Permalink
Merge pull request #123 from goldmann/gh-122-dyplicate-when-symlinks
Browse files Browse the repository at this point in the history
Files added when parent directory is a symlink
  • Loading branch information
goldmann authored Sep 1, 2016
2 parents 54cfdd1 + 143bb55 commit e94e94c
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 24 deletions.
58 changes: 41 additions & 17 deletions docker_squash/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def _marker_files(self, tar, members):

return marker_files

def _add_markers(self, markers, tar, files_in_layers):
def _add_markers(self, markers, tar, files_in_layers, added_symlinks):
"""
This method is responsible for adding back all markers that were not
added to the squashed layer AND files they refer to can be found in layers
Expand All @@ -503,11 +503,17 @@ def _add_markers(self, markers, tar, files_in_layers):
for marker, marker_file in six.iteritems(markers):
actual_file = marker.name.replace('.wh.', '')
normalized_file = self._normalize_path(actual_file)

should_be_added_back = False

if self._file_should_be_skipped(normalized_file, added_symlinks):
self.log.debug(
"Skipping '%s' marker file, this file is on a symlink path" % normalized_file)
continue

if normalized_file in tar_files:
self.log.debug(
"Skipping '%s' marker file, this file was added earlier for some reason..." % marker.name)
"Skipping '%s' marker file, this file was added earlier for some reason..." % normalized_file)
continue

if files_in_layers:
Expand Down Expand Up @@ -541,16 +547,18 @@ def _normalize_path(self, path):
def _add_hardlinks(self, squashed_tar, squashed_files, to_skip, skipped_hard_links):
for layer, hardlinks_in_layer in enumerate(skipped_hard_links):
# We need to start from 1, that's why we bump it here
current_layer = layer+1
current_layer = layer + 1
for member in six.itervalues(hardlinks_in_layer):
normalized_name = self._normalize_path(member.name)
normalized_linkname = self._normalize_path(member.linkname)

# Find out if the name is on the list of files to skip - if it is - get the layer number
# where it was found
layer_skip_name = self._file_should_be_skipped(normalized_name, to_skip)
layer_skip_name = self._file_should_be_skipped(
normalized_name, to_skip)
# Do the same for linkname
layer_skip_linkname = self._file_should_be_skipped(normalized_linkname, to_skip)
layer_skip_linkname = self._file_should_be_skipped(
normalized_linkname, to_skip)

# We need to check if we should skip adding back the hard link
# This can happen in the following situations:
Expand All @@ -573,7 +581,8 @@ def _add_file(self, member, content, squashed_tar, squashed_files, to_skip):
normalized_name = self._normalize_path(member.name)

if normalized_name in squashed_files:
self.log.debug("Skipping file '%s' because it is already squashed" % normalized_name)
self.log.debug(
"Skipping file '%s' because it is already squashed" % normalized_name)
return

if self._file_should_be_skipped(normalized_name, to_skip):
Expand All @@ -592,9 +601,10 @@ def _add_file(self, member, content, squashed_tar, squashed_files, to_skip):
squashed_files.append(normalized_name)

def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links):
added_symlinks = []
for layer, symlinks_in_layer in enumerate(skipped_sym_links):
# We need to start from 1, that's why we bump it here
current_layer = layer+1
current_layer = layer + 1
for member in six.itervalues(symlinks_in_layer):

# Handling symlinks. This is similar to hard links with one
Expand All @@ -606,16 +616,24 @@ def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links

# File is already in squashed files, skipping
if normalized_name in squashed_files:
self.log.debug("Found a symbolic link '%s' which is already squashed, skipping" % (normalized_name))
self.log.debug(
"Found a symbolic link '%s' which is already squashed, skipping" % (normalized_name))
continue

if self._file_should_be_skipped(normalized_name, added_symlinks):
self.log.debug(
"Found a symbolic link '%s' which is on a path to previously squashed symlink, skipping" % (normalized_name))
continue
# Find out if the name is on the list of files to skip - if it is - get the layer number
# where it was found
layer_skip_name = self._file_should_be_skipped(normalized_name, to_skip)
layer_skip_name = self._file_should_be_skipped(
normalized_name, to_skip)
# Do the same for linkname
layer_skip_linkname = self._file_should_be_skipped(normalized_linkname, to_skip)
layer_skip_linkname = self._file_should_be_skipped(
normalized_linkname, to_skip)

# If name or linkname was found in the lists of files to be skipped or it's not found in the squashed files
# If name or linkname was found in the lists of files to be
# skipped or it's not found in the squashed files
if layer_skip_name and current_layer > layer_skip_name or layer_skip_linkname and current_layer > layer_skip_linkname:
self.log.debug("Found a symbolic link '%s' to a file which is marked to be skipped: '%s', skipping link too" % (
normalized_name, normalized_linkname))
Expand All @@ -624,11 +642,13 @@ def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links
self.log.debug("Adding symbolic link '%s' pointing to '%s' back..." % (
normalized_name, normalized_linkname))

to_skip.append([normalized_name])
added_symlinks.append([normalized_name])

squashed_files.append(normalized_name)
squashed_tar.addfile(member)

return added_symlinks

def _squash_layers(self, layers_to_squash, layers_to_move):
self.log.info("Starting squashing...")

Expand Down Expand Up @@ -744,20 +764,24 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
if member.isfile():
content = layer_tar.extractfile(member)

self._add_file(member, content, squashed_tar, squashed_files, to_skip)
self._add_file(member, content,
squashed_tar, squashed_files, to_skip)

skipped_hard_links.append(skipped_hard_link_files)
skipped_files.append(skipped_files_in_layer)

self._add_hardlinks(squashed_tar, squashed_files, to_skip, skipped_hard_links)
self._add_symlinks(squashed_tar, squashed_files, to_skip, skipped_sym_links)
self._add_hardlinks(squashed_tar, squashed_files,
to_skip, skipped_hard_links)
added_symlinks = self._add_symlinks(
squashed_tar, squashed_files, to_skip, skipped_sym_links)

for layer in skipped_files:
for member, content in six.itervalues(layer):
self._add_file(member, content, squashed_tar, squashed_files, to_skip)
self._add_file(member, content, squashed_tar,
squashed_files, added_symlinks)

if files_in_layers_to_move:
self._add_markers(skipped_markers, squashed_tar,
files_in_layers_to_move)
files_in_layers_to_move, added_symlinks)

self.log.info("Squashing finished!")
30 changes: 28 additions & 2 deletions tests/test_integ_squash.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __enter__(self):
f = BytesIO(self.dockerfile.encode('utf-8'))
for line in self.docker.build(fileobj=f, tag=self.tag, rm=True):
try:
print(json.loads(line)["stream"].strip())
print(json.loads(line.decode("utf-8"))["stream"].strip())
except:
print(line)

Expand Down Expand Up @@ -909,7 +909,6 @@ def test_should_handle_symlinks_to_directory(self):

with self.Image(dockerfile) as image:
with self.SquashedImage(image, 3, numeric=True) as squashed_image:

with self.Container(squashed_image) as container:
container.assertFileExists('data-template')
container.assertFileExists('data-template/tmp')
Expand All @@ -918,6 +917,33 @@ def test_should_handle_symlinks_to_directory(self):
container.assertFileExists('tmp/dir')
container.assertFileDoesNotExist('tmp/dir/file')

# https://github.com/goldmann/docker-squash/issues/122
def test_should_not_add_duplicate_files(self):
dockerfile = '''
FROM {}
RUN mkdir -p /etc/systemd/system/multi-user.target.wants
RUN mkdir -p /etc/systemd/system/default.target.wants
RUN touch /etc/systemd/system/multi-user.target.wants/remote-fs.target
RUN touch /etc/systemd/system/default.target.wants/remote-fs.target
# End of preparations, going to squash from here
RUN find /etc/systemd/system/* '!' -name '*.wants' | xargs rm -rvf
RUN rmdir -v /etc/systemd/system/multi-user.target.wants && mkdir /etc/systemd/system/container-ipa.target.wants && ln -s /etc/systemd/system/container-ipa.target.wants /etc/systemd/system/multi-user.target.wants
RUN ln -s /etc/group /etc/systemd/system/default.target
RUN ln -s /etc/group /etc/systemd/system/container-ipa.target.wants/ipa-server-configure-first.service
RUN echo "/etc/systemd/system" > /etc/volume-data-list
RUN set -e ; cd / ; mkdir /data-template ; cat /etc/volume-data-list | while read i ; do echo $i ; if [ -e $i ] ; then tar cf - .$i | ( cd /data-template && tar xf - ) ; fi ; mkdir -p $( dirname $i ) ; if [ "$i" == /var/log/ ] ; then mv /var/log /var/log-removed ; else rm -rf $i ; fi ; ln -sf /data$i $i ; done
'''.format(TestIntegSquash.BUSYBOX_IMAGE)

with self.Image(dockerfile) as image:
with self.SquashedImage(image, 6, numeric=True, output_path="tox.tar") as squashed_image:
with self.Container(squashed_image) as container:
container.assertFileExists('data-template/etc/systemd/system/container-ipa.target.wants')
container.assertFileExists('data-template/etc/systemd/system/default.target.wants')
container.assertFileExists('data-template/etc/systemd/system/default.target')
container.assertFileExists('data-template/etc/systemd/system/multi-user.target.wants')
container.assertFileExists('data-template/etc/systemd/system/container-ipa.target.wants/ipa-server-configure-first.service')
container.assertFileExists('etc/systemd/system')


class NumericValues(IntegSquash):
@classmethod
Expand Down
28 changes: 23 additions & 5 deletions tests/test_unit_v1_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def setUp(self):
self.squash = Image(self.log, self.docker_client, self.image, None)

def test_should_not_fail_with_empty_list_of_markers_to_add(self):
self.squash._add_markers({}, None, None)
self.squash._add_markers({}, None, None, [])

def test_should_add_all_marker_files_to_empty_tar(self):
tar = mock.Mock()
Expand All @@ -236,14 +236,32 @@ def test_should_add_all_marker_files_to_empty_tar(self):
type(marker_1).name = mock.PropertyMock(return_value='.wh.marker_1')

markers = {marker_1: 'file'}
self.squash._add_markers(markers, tar, {})
self.squash._add_markers(markers, tar, {}, [])

self.assertTrue(len(tar.addfile.mock_calls) == 1)
tar_info, marker_file = tar.addfile.call_args[0]
self.assertIsInstance(tar_info, tarfile.TarInfo)
self.assertTrue(marker_file == 'file')
self.assertTrue(tar_info.isfile())

def test_should_add_all_marker_files_to_empty_tar_besides_what_should_be_skipped(self):
tar = mock.Mock()
tar.getnames.return_value = []

marker_1 = mock.Mock()
type(marker_1).name = mock.PropertyMock(return_value='.wh.marker_1')
marker_2 = mock.Mock()
type(marker_2).name = mock.PropertyMock(return_value='.wh.marker_2')

markers = {marker_1: 'file1', marker_2: 'file2'}
self.squash._add_markers(markers, tar, {'1234layerdid': ['/marker_1', '/marker_2']}, [['/marker_1']])

self.assertEqual(len(tar.addfile.mock_calls), 1)
tar_info, marker_file = tar.addfile.call_args[0]
self.assertIsInstance(tar_info, tarfile.TarInfo)
self.assertTrue(marker_file == 'file2')
self.assertTrue(tar_info.isfile())

def test_should_skip_a_marker_file_if_file_is_in_unsquashed_layers(self):
tar = mock.Mock()
# List of files in the squashed tar
Expand All @@ -257,7 +275,7 @@ def test_should_skip_a_marker_file_if_file_is_in_unsquashed_layers(self):
markers = {marker_1: 'marker_1', marker_2: 'marker_2'}
# List of files in all layers to be moved
files_in_moved_layers = {'1234layerdid': ['/some/file', '/marker_2']}
self.squash._add_markers(markers, tar, files_in_moved_layers)
self.squash._add_markers(markers, tar, files_in_moved_layers, [])

self.assertEqual(len(tar.addfile.mock_calls), 1)
tar_info, marker_file = tar.addfile.call_args[0]
Expand All @@ -275,7 +293,7 @@ def test_should_not_add_any_marker_files(self):
type(marker_2).name = mock.PropertyMock(return_value='.wh.marker_2')

markers = {marker_1: 'file1', marker_2: 'file2'}
self.squash._add_markers(markers, tar, {'1234layerdid': ['some/file', 'marker_1', 'marker_2']})
self.squash._add_markers(markers, tar, {'1234layerdid': ['some/file', 'marker_1', 'marker_2']}, [])

self.assertTrue(len(tar.addfile.mock_calls) == 0)

Expand All @@ -293,7 +311,7 @@ def test_should_add_marker_file_when_tar_has_prefixed_entries(self):
markers = {marker_1: 'filecontent1', marker_2: 'filecontent2'}

# List of layers to move (and files in these layers), already normalized
self.squash._add_markers(markers, tar, {'1234layerdid': ['/some/file', '/other/file', '/stuff']})
self.squash._add_markers(markers, tar, {'1234layerdid': ['/some/file', '/other/file', '/stuff']}, [])

self.assertEqual(len(tar.addfile.mock_calls), 1)
tar_info, marker_file = tar.addfile.call_args[0]
Expand Down

0 comments on commit e94e94c

Please sign in to comment.