You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I tried to use MultiFit project for Persian language (with fa abbreviation)… when it’s used with fa_2 folder, I can run the trainer, but for fa_100 which is a larger dataset it hangs for many hours on a line and doesn’t proceed.
The code below shows where it hangs when I interrupt the execution with Ctrl+C, it’s on label_for_lm!!!
~/multifit/notebooks/ml.py in
58
59
—> 60 exp.pretrain_lm.train_(wiki_dataset)
61
62
~/multifit/multifit/training.py in train_(self, dataset_or_path, tokenizer, **train_config)
273 else:
274 tokenizer = self.arch.new_tokenizer()
–> 275
276 dataset = self.set_dataset(dataset_or_path, tokenizer)
277 print(“get_learner call”)
~/multifit/multifit/datasets/dataset.py in load_lm_databunch(self, bs, bptt, limit)
207 bptt=bptt,
208 bs=bs,
–> 209 limit=limit)
210
211 with (self.cache_path / “itos.pkl”).open(‘wb’) as f:
~/multifit/multifit/datasets/dataset.py in load_n_cache_databunch(self, name, bunch_class, data_loader, bs, limit, **args)
260 train_df = train_df[:limit]
261 valid_df = valid_df[:limit]
–> 262 databunch = self.databunch_from_df(bunch_class, train_df, valid_df, **args)
263 print(“name:”, name)
264 databunch.save(name)
~/multifit/multifit/datasets/dataset.py in databunch_from_df(self, bunch_class, train_df, valid_df, **args)
275 mark_fields=True,
276 text_cols=list(train_df.columns.values)[1:],
–> 277 **args)
278 return databunch
279
~/multifit/fastai_contrib/text_data.py in make_data_bunch_from_df(cls, path, train_df, valid_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, chunksize, max_vocab, min_freq, mark_fields, include_bos, include_eos, processor, **kwargs)
152 if cls == TextLMDataBunch:
153 print(“cls == TextLMDataBunch”)
–> 154 src = src.label_for_lm()
155 else:
156 print(“else of cls == TextLMDataBunch”)
~/miniconda3/lib/python3.7/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
482 self.valid = fv(*args, from_item_lists=True, **kwargs)
483 self.class = LabelLists
–> 484 self.process()
485 return self
486 return _inner
~/miniconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self)
536 “Process the inner datasets.”
537 xp,yp = self.get_processors()
–> 538 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
539 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
540 for ds in self.lists:
~/miniconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self, xp, yp, name, max_warn_items)
716 p.warns = []
717 self.x,self.y = self.x[~filt],self.y[~filt]
–> 718 self.x.process(xp)
719 return self
720
~/miniconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self, processor)
82 if processor is not None: self.processor = processor
83 self.processor = listify(self.processor)
—> 84 for p in self.processor: p.process(self)
85 return self
86
~/multifit/fastai_contrib/text_data.py in process(self, ds)
122 class SPProcessor2(SPProcessor):
123 def process(self, ds):
–> 124 super().process(ds)
125 ds.vocab.sp_model = self.sp_model
126 ds.vocab.sp_vocab = self.sp_vocab
~/miniconda3/lib/python3.7/site-packages/fastai/text/data.py in process(self, ds)
472 else:
473 with ProcessPoolExecutor(self.n_cpus) as e:
–> 474 ds.items = np.array(sum(e.map(self._encode_batch, partition_by_cores(ds.items, self.n_cpus)), []))
475 ds.vocab = self.vocab
476
~/miniconda3/lib/python3.7/concurrent/futures/process.py in _chain_from_iterable_of_lists(iterable)
481 careful not to keep references to yielded objects.
482 “”"
–> 483 for element in iterable:
484 element.reverse()
485 while element:
~/miniconda3/lib/python3.7/concurrent/futures/_base.py in result_iterator()
596 # Careful not to keep a reference to the popped future
597 if timeout is None:
–> 598 yield fs.pop().result()
599 else:
600 yield fs.pop().result(end_time - time.monotonic())
~/miniconda3/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
428 return self.__get_result()
429
–> 430 self._condition.wait(timeout)
431
432 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
~/miniconda3/lib/python3.7/threading.py in wait(self, timeout)
294 try: # restore state no matter what (e.g., KeyboardInterrupt)
295 if timeout is None:
–> 296 waiter.acquire()
297 gotit = True
298 else:
KeyboardInterrupt:
When I check the cpu usage, it forks to many process and all of them takes 99% of cpu… Should I wait and something is done or it just hanged there?
I also set
defaults.cpus=1
before
src = src.label_for_lm() //training file
and even I changed the above code to
src = src.label_for_lm().databunch(bs=64)
but none of these tricks works…
I doubt if it’s really does anything
The text was updated successfully, but these errors were encountered:
I tried to use MultiFit project for Persian language (with fa abbreviation)… when it’s used with fa_2 folder, I can run the trainer, but for fa_100 which is a larger dataset it hangs for many hours on a line and doesn’t proceed.
The code below shows where it hangs when I interrupt the execution with Ctrl+C, it’s on label_for_lm!!!
When I check the cpu usage, it forks to many process and all of them takes 99% of cpu… Should I wait and something is done or it just hanged there?
I also set
defaults.cpus=1
before
src = src.label_for_lm() //training file
and even I changed the above code to
src = src.label_for_lm().databunch(bs=64)
but none of these tricks works…
I doubt if it’s really does anything
The text was updated successfully, but these errors were encountered: