Skip to content

Commit

Permalink
Revert "Implement ar-file parsing in python (#8681)" (#8744)
Browse files Browse the repository at this point in the history
This reverts commit 1d198e4.

Apparently this cause a failure on the mac builder:
https://ci.chromium.org/p/emscripten-releases/builders/ci/mac/b8911649373078606944

```
cache:INFO:  - ok
Traceback (most recent call last):
  File "/b/s/w/ir/k/install/emscripten/emcc.py", line 3391, in <module>
    sys.exit(run(sys.argv))
  File "/b/s/w/ir/k/install/emscripten/emcc.py", line 1894, in run
    final = shared.Building.link(linker_inputs, DEFAULT_FINAL, force_archive_contents=force_archive_contents, just_calculate=just_calculate)
  File "/b/s/w/ir/k/install/emscripten/tools/shared.py", line 1940, in link
    Building.read_link_inputs([x for x in files if not x.startswith('-')])
  File "/b/s/w/ir/k/install/emscripten/tools/shared.py", line 1721, in read_link_inputs
    object_names_in_archives = pool.map(extract_archive_contents, archive_names)
  File "/b/s/w/ir/cipd_bin_packages/lib/python2.7/multiprocessing/pool.py", line 253, in map
    return self.map_async(func, iterable, chunksize).get()
  File "/b/s/w/ir/cipd_bin_packages/lib/python2.7/multiprocessing/pool.py", line 572, in get
    raise self._value
IOError: [Errno 2] No such file or directory: u'/b/s/w/ir/tmp/t/emscripten_temp_oIE5H7_archive_contents/#1/12'
```
  • Loading branch information
sbc100 authored Jun 5, 2019
1 parent 0f3fcfa commit fc186bd
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 224 deletions.
89 changes: 84 additions & 5 deletions emar.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,98 @@

"""Archive helper script
This script is a simple wrapper around llvm-ar. It used to have special
handling for duplicate basenames in order to allow bitcode linking process to
read such files. This is now handled by using tools/arfile.py to read archives.
This script acts as a frontend replacement for `ar`. See emcc.
This is needed because, unlike a traditional linker, emscripten can't handle
archive with duplicate member names. This is because emscripten extracts
archive to a temporary location and duplicate filenames will clobber each
other in this case.
"""

# TODO(sbc): Implement `ar x` within emscripten, in python, to avoid this issue
# and delete this file.

from __future__ import print_function
import hashlib
import os
import shutil
import sys

from tools.toolchain_profiler import ToolchainProfiler
from tools import shared
from tools.response_file import substitute_response_files, create_response_file

if __name__ == '__main__':
ToolchainProfiler.record_process_start()


#
# Main run() function
#
def run():
newargs = [shared.LLVM_AR] + sys.argv[1:]
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
args = substitute_response_files(sys.argv)
newargs = [shared.LLVM_AR] + args[1:]

to_delete = []

# The 3 argmuent form of ar doesn't involve other files. For example
# 'ar x libfoo.a'.
if len(newargs) > 3:
cmd = newargs[1]
if 'r' in cmd:
# We are adding files to the archive.
# Normally the output file is then arg 2, except in the case were the
# a or b modifiers are used in which case its arg 3.
if 'a' in cmd or 'b' in cmd:
out_arg_index = 3
else:
out_arg_index = 2

contents = set()
if os.path.exists(newargs[out_arg_index]):
cmd = [shared.LLVM_AR, 't', newargs[out_arg_index]]
output = shared.check_call(cmd, stdout=shared.PIPE).stdout
contents.update(output.split('\n'))

# Add a hash to colliding basename, to make them unique.
for j in range(out_arg_index + 1, len(newargs)):
orig_name = newargs[j]
full_name = os.path.abspath(orig_name)
dirname = os.path.dirname(full_name)
basename = os.path.basename(full_name)
if basename not in contents:
contents.add(basename)
continue
h = hashlib.md5(full_name.encode('utf-8')).hexdigest()[:8]
parts = basename.split('.')
parts[0] += '_' + h
newname = '.'.join(parts)
full_newname = os.path.join(dirname, newname)
assert not os.path.exists(full_newname)
try:
shutil.copyfile(orig_name, full_newname)
newargs[j] = full_newname
to_delete.append(full_newname)
contents.add(newname)
except:
# it is ok to fail here, we just don't get hashing
contents.add(basename)
pass

if shared.DEBUG:
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)

response_filename = create_response_file(newargs[3:], shared.get_emscripten_temp_dir())
to_delete += [response_filename]
newargs = newargs[:3] + ['@' + response_filename]

if shared.DEBUG:
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)

try:
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
finally:
for d in to_delete:
shared.try_delete(d)


if __name__ == '__main__':
Expand Down
14 changes: 8 additions & 6 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import hashlib
import json
import os
import random
import re
import shutil
import sys
Expand Down Expand Up @@ -5163,25 +5164,26 @@ def test_iostream_and_determinism(self):
return 0;
}
'''
num = 3
num = 5

def test():
print('(iteration)')
time.sleep(1.0)
time.sleep(random.random() / (10 * num)) # add some timing nondeterminism here, not that we need it, but whatever
self.do_run(src, 'hello world\n77.\n')
ret = open('src.cpp.o.js', 'rb').read()
if self.get_setting('WASM') and not self.get_setting('WASM2JS'):
ret += open('src.cpp.o.wasm', 'rb').read()
return ret

builds = [test() for i in range(num)]
print([len(b) for b in builds])
print(list(map(len, builds)))
uniques = set(builds)
if len(uniques) != 1:
for i, unique in enumerate(uniques):
i = 0
for unique in uniques:
open('unique_' + str(i) + '.js', 'wb').write(unique)
# builds must be deterministic, see unique_N.js
self.assertEqual(len(uniques), 1)
i += 1
assert 0, 'builds must be deterministic, see unique_X.js'

def test_stdvec(self):
self.do_run_in_out_file_test('tests', 'core', 'test_stdvec')
Expand Down
37 changes: 32 additions & 5 deletions tests/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -1450,10 +1450,16 @@ def test_archive_duplicate_basenames(self):
''')
run_process([PYTHON, EMCC, os.path.join('b', 'common.c'), '-c', '-o', os.path.join('b', 'common.o')])

try_delete('libdup.a')
run_process([PYTHON, EMAR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
self.assertEqual(text.count('common.o'), 2)
try_delete('liba.a')
run_process([PYTHON, EMAR, 'rc', 'liba.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])

# Verify that archive contains basenames with hashes to avoid duplication
text = run_process([PYTHON, EMAR, 't', 'liba.a'], stdout=PIPE).stdout
self.assertEqual(text.count('common.o'), 1)
self.assertContained('common_', text)
for line in text.split('\n'):
# should not have huge hash names
self.assertLess(len(line), 20, line)

create_test_file('main.c', r'''
void a(void);
Expand All @@ -1463,9 +1469,30 @@ def test_archive_duplicate_basenames(self):
b();
}
''')
run_process([PYTHON, EMCC, 'main.c', '-L.', '-ldup'])
err = run_process([PYTHON, EMCC, 'main.c', '-L.', '-la'], stderr=PIPE).stderr
self.assertNotIn('archive file contains duplicate entries', err)
self.assertContained('a\nb...\n', run_js('a.out.js'))

# Using llvm-ar directly should cause duplicate basenames
try_delete('libdup.a')
run_process([LLVM_AR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
assert text.count('common.o') == 2, text

# With fastcomp we don't support duplicate members so this should generate
# a warning. With the wasm backend (lld) this is fully supported.
cmd = [PYTHON, EMCC, 'main.c', '-L.', '-ldup']
if self.is_wasm_backend():
run_process(cmd)
self.assertContained('a\nb...\n', run_js('a.out.js'))
else:
err = self.expect_fail(cmd)
self.assertIn('libdup.a: archive file contains duplicate entries', err)
self.assertIn('error: undefined symbol: a', err)
# others are not duplicates - the hashing keeps them separate
self.assertEqual(err.count('duplicate: '), 1)
self.assertContained('a\nb...\n', run_js('a.out.js'))

def test_export_from_archive(self):
export_name = 'this_is_an_entry_point'
full_export_name = '_' + export_name
Expand Down
191 changes: 0 additions & 191 deletions tools/arfile.py

This file was deleted.

Loading

0 comments on commit fc186bd

Please sign in to comment.