Skip to content

Commit

Permalink
Replacing C++ with Rust
Browse files Browse the repository at this point in the history
Expose rust merge and add intersection
Update cbindgen, uncomment more code
Scaffolding for error catching
Add rust bindings as a submodule
exception handling working
Implement abundances
Use __eq__ instead of __richcmp__
max hash should be an int
Avoid init_once too many times, remove third-party
avoid get_mins in add_many calls
read mins buffer instead of item by item
Use buffers for abunds too
  • Loading branch information
luizirber committed Dec 7, 2018
1 parent 4aab62f commit 4b6bf18
Show file tree
Hide file tree
Showing 22 changed files with 343 additions and 1,057 deletions.
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@ omit =
doc/conf.py
setup.py
tests/*
third-party/smhasher/MurmurHash3.cc
.tox/*
benchmarks/*
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ dist
build
sourmash.egg-info
.ipynb_checkpoints
_minhash.so
.cache
*.so
.coverage
sourmash_lib/_minhash.cpp
sourmash/_minhash.cpp
.tox
.eggs
rust/target
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "rust"]
path = rust
url = https://github.com/luizirber/sourmash-rust
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ matrix:
env:
- TOX_ENV=py37
install:
- curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
- export PATH="$HOME/.cargo/bin:$PATH"
- rustc -V
- pip install tox
- sudo snap install ipfs
script:
Expand Down
9 changes: 8 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
include LICENSE Makefile Dockerfile LICENSE Makefile README.md requirements.txt
include index.ipynb
include sourmash VERSION
recursive-include sourmash_lib *
recursive-include sourmash *
recursive-include third-party *.cc *.h
recursive-include rust *.rs *.toml *.h
prune rust/target/debug
prune rust/target/release
exclude rust/.git
prune .eggs
global-exclude *.rlib
global-exclude *.orig
global-exclude *.pyc
global-exclude *.so
global-exclude *.git/
2 changes: 1 addition & 1 deletion benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import unicode_literals

from screed.fasta import fasta_iter
from sourmash_lib._minhash import MinHash
from sourmash_lib.minhash import MinHash
from tests.sourmash_tst_utils import get_test_data


Expand Down
1 change: 1 addition & 0 deletions rust
Submodule rust added at 3925d5
68 changes: 34 additions & 34 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
from __future__ import print_function
import sys
from setuptools import setup, find_packages
from setuptools import Extension
import os
from setuptools import setup, find_packages
import sys


DEBUG_BUILD = os.environ.get("SOURMASH_DEBUG") == '1'

def build_native(spec):
cmd = ['cargo', 'build', '--lib']

target = 'debug'
if not DEBUG_BUILD:
cmd.append('--release')
target = 'release'

build = spec.add_external_build(
cmd=cmd,
path='./rust'
)

rtld_flags = ['NOW']
if sys.platform == 'darwin':
rtld_flags.append('NODELETE')
spec.add_cffi_module(
module_path='sourmash._lowlevel',
dylib=lambda: build.find_dylib('sourmash', in_path='target/%s' % target),
header_filename=lambda: build.find_header('sourmash.h', in_path='include'),
rtld_flags=rtld_flags
)

# retrieve VERSION from sourmash/VERSION.
thisdir = os.path.dirname(__file__)
version_file = open(os.path.join(thisdir, 'sourmash', 'VERSION'))
VERSION = version_file.read().strip()

EXTRA_COMPILE_ARGS = ['-std=c++11', '-pedantic']
EXTRA_LINK_ARGS=[]

CLASSIFIERS = [
"Environment :: Console",
"Environment :: MacOS X",
Expand All @@ -20,7 +42,7 @@
"Natural Language :: English",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Programming Language :: C++",
"Programming Language :: Rust",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
Expand All @@ -29,19 +51,6 @@

CLASSIFIERS.append("Development Status :: 5 - Production/Stable")

if sys.platform == 'darwin': # Mac OS X?
# force 64bit only builds
EXTRA_COMPILE_ARGS.extend(['-arch', 'x86_64', '-mmacosx-version-min=10.7',
'-stdlib=libc++'])

else: # ...likely Linux
if os.environ.get('SOURMASH_COVERAGE'):
print('Turning on coverage analysis.')
EXTRA_COMPILE_ARGS.extend(['-g', '--coverage', '-lgcov'])
EXTRA_LINK_ARGS.extend(['--coverage', '-lgcov'])
else:
EXTRA_COMPILE_ARGS.append('-O3')

with open('README.md', 'r') as readme:
LONG_DESCRIPTION = readme.read()

Expand All @@ -57,31 +66,22 @@
"author_email": "[email protected]",
"license": "BSD 3-clause",
"packages": find_packages(),
"zip_safe": False,
"platforms": "any",
"entry_points": {'console_scripts': [
'sourmash = sourmash.__main__:main'
]
},
"ext_modules": [Extension("sourmash._minhash",
sources=["sourmash/_minhash.pyx",
"third-party/smhasher/MurmurHash3.cc"],
depends=["sourmash/kmer_min_hash.hh"],
include_dirs=["./sourmash",
"./third-party/smhasher/"],
language="c++",
extra_compile_args=EXTRA_COMPILE_ARGS,
extra_link_args=EXTRA_LINK_ARGS)],
"install_requires": ["screed>=0.9", "ijson", "khmer>=2.1"],
"setup_requires": ['Cython>=0.25.2', "setuptools>=38.6.0"],
"install_requires": ["screed>=0.9", "ijson", "khmer>=2.1", 'milksnake'],
"setup_requires": ["setuptools>=38.6.0", "milksnake"],
"extras_require": {
'test' : ['pytest', 'pytest-cov', 'numpy', 'matplotlib', 'scipy','recommonmark'],
'demo' : ['jupyter', 'jupyter_client', 'ipython'],
'doc' : ['sphinx'],
'10x': ['pathos', 'bamnostic>=0.9.2'],
},
"include_package_data": True,
"package_data": {
"sourmash": ['*.pxd']
},
"milksnake_tasks": [build_native],
"classifiers": CLASSIFIERS
}

Expand Down
5 changes: 4 additions & 1 deletion sourmash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import math
import os

from ._minhash import (MinHash, get_minhash_default_seed, get_minhash_max_hash)
from ._lowlevel import ffi, lib
ffi.init_once(lib.sourmash_init, 'init')

from .minhash import (MinHash, get_minhash_default_seed, get_minhash_max_hash)
DEFAULT_SEED = get_minhash_default_seed()
MAX_HASH = get_minhash_max_hash()

Expand Down
24 changes: 24 additions & 0 deletions sourmash/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys


PY2 = sys.version_info[0] == 2

if PY2:
text_type = unicode
int_types = (int, long)
string_types = (str, unicode)
range_type = xrange
itervalues = lambda x: x.itervalues()
NUL = '\x00'
def implements_to_string(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
else:
text_type = str
int_types = (int,)
string_types = (str,)
range_type = range
itervalues = lambda x: x.values()
NUL = 0
implements_to_string = lambda x: x
58 changes: 0 additions & 58 deletions sourmash/_minhash.pxd

This file was deleted.

44 changes: 44 additions & 0 deletions sourmash/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from ._compat import implements_to_string
from ._lowlevel import lib


__all__ = ['SourmashError']
exceptions_by_code = {}


@implements_to_string
class SourmashError(Exception):
code = None

def __init__(self, msg):
Exception.__init__(self)
self.message = msg
self.rust_info = None

def __str__(self):
rv = self.message
if self.rust_info is not None:
return u'%s\n\n%s' % (rv, self.rust_info)
return rv


def _make_exceptions():
for attr in dir(lib):
if not attr.startswith('SOURMASH_ERROR_CODE_'):
continue

class Exc(SourmashError):
pass

code = getattr(lib, attr)
if code < 100 or code > 10000:
Exc.__name__ = attr[20:].title().replace('_', '')
Exc.code = getattr(lib, attr)
globals()[Exc.__name__] = Exc
Exc.code = code
exceptions_by_code[code] = Exc
__all__.append(Exc.__name__)
else:
exceptions_by_code[code] = ValueError

_make_exceptions()
Loading

0 comments on commit 4b6bf18

Please sign in to comment.