Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ValueError: buffer source array is read-only #206

Closed
alimanfoo opened this issue Sep 11, 2018 · 1 comment · Fixed by #208
Closed

ValueError: buffer source array is read-only #206

alimanfoo opened this issue Sep 11, 2018 · 1 comment · Fixed by #208
Milestone

Comments

@alimanfoo
Copy link
Contributor

When using scikit-allel with the dask distributed scheduler on a pangeo-like cluster, and using adaptive scaling, the following error can occur during a computation:

import gcsfs
fs = gcsfs.GCSFileSystem(project='malariagen-jupyterhub', token='anon', access='read_only')
gcs_path = 'ag1000g-release/phase2/AR1/variation/main/zarr2/ag1000g.phase2.ar1'
store = gcsfs.mapping.GCSMap(gcs_path, gcs=fs, check=False, create=False)
import zarr
callset = zarr.Group(store=store)
chrom = '3R'
gtz = callset[chrom]['calldata/genotype']
import allel
gtda = allel.GenotypeDaskArray(gtz)
result = gtda.count_alleles(max_allele=3)
ac = result.compute()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<timed exec> in <module>()

/opt/conda/lib/python3.6/site-packages/allel/model/dask.py in compute(self, **kwargs)
    692 
    693     def compute(self, **kwargs):
--> 694         out = super(AlleleCountsDaskArray, self).compute(**kwargs)
    695         return AlleleCountsArray(out)
    696 

/opt/conda/lib/python3.6/site-packages/allel/model/dask.py in compute(self, **kwargs)
    110 
    111     def compute(self, **kwargs):
--> 112         return self.values.compute(**kwargs)
    113 
    114 

/opt/conda/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
    154         dask.base.compute
    155         """
--> 156         (result,) = compute(self, traverse=False, **kwargs)
    157         return result
    158 

/opt/conda/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
    393     keys = [x.__dask_keys__() for x in collections]
    394     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 395     results = schedule(dsk, keys, **kwargs)
    396     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    397 

/opt/conda/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, **kwargs)
   2198             try:
   2199                 results = self.gather(packed, asynchronous=asynchronous,
-> 2200                                       direct=direct)
   2201             finally:
   2202                 for f in futures.values():

/opt/conda/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
   1567             return self.sync(self._gather, futures, errors=errors,
   1568                              direct=direct, local_worker=local_worker,
-> 1569                              asynchronous=asynchronous)
   1570 
   1571     @gen.coroutine

/opt/conda/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
    643             return future
    644         else:
--> 645             return sync(self.loop, func, *args, **kwargs)
    646 
    647     def __repr__(self):

/opt/conda/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
    275             e.wait(10)
    276     if error[0]:
--> 277         six.reraise(*error[0])
    278     else:
    279         return result[0]

/opt/conda/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
    691             if value.__traceback__ is not tb:
    692                 raise value.with_traceback(tb)
--> 693             raise value
    694         finally:
    695             value = None

/opt/conda/lib/python3.6/site-packages/distributed/utils.py in f()
    260             if timeout is not None:
    261                 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262             result[0] = yield future
    263         except Exception as exc:
    264             error[0] = sys.exc_info()

/opt/conda/lib/python3.6/site-packages/tornado/gen.py in run(self)
   1097 
   1098                     try:
-> 1099                         value = future.result()
   1100                     except Exception:
   1101                         self.had_exception = True

/opt/conda/lib/python3.6/site-packages/tornado/gen.py in run(self)
   1105                     if exc_info is not None:
   1106                         try:
-> 1107                             yielded = self.gen.throw(*exc_info)
   1108                         finally:
   1109                             # Break up a reference to itself

/opt/conda/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
   1443                             six.reraise(type(exception),
   1444                                         exception,
-> 1445                                         traceback)
   1446                     if errors == 'skip':
   1447                         bad_keys.add(key)

/opt/conda/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
    690                 value = tp()
    691             if value.__traceback__ is not tb:
--> 692                 raise value.with_traceback(tb)
    693             raise value
    694         finally:

/opt/conda/lib/python3.6/site-packages/allel/model/dask.py in f()
    378             def f(block):
    379                 gb = GenotypeArray(block)
--> 380                 return gb.count_alleles(max_allele=max_allele)[:, None, :]
    381 
    382             # map blocks and reduce

/opt/conda/lib/python3.6/site-packages/allel/model/ndarray.py in count_alleles()
   1830         # use optimisations
   1831         if subpop is None and self.mask is None:
-> 1832             ac = genotype_array_count_alleles(self.values, max_allele)
   1833         elif subpop is None:
   1834             ac = genotype_array_count_alleles_masked(

allel/opt/model.pyx in allel.opt.model.genotype_array_count_alleles()

/opt/conda/lib/python3.6/site-packages/allel/opt/model.cpython-36m-x86_64-linux-gnu.so in View.MemoryView.memoryview_cwrapper()

/opt/conda/lib/python3.6/site-packages/allel/opt/model.cpython-36m-x86_64-linux-gnu.so in View.MemoryView.memoryview.__cinit__()

ValueError: buffer source array is read-only

Note this only occurs when using adaptive scaling, and it occurs during or shortly after the cluster scales up, so I'm guessing it's something to do with data being moved between nodes.

@alimanfoo
Copy link
Contributor Author

xref dask/distributed#1978

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant