From ba9c57606728d59bbeee11625a3d39e793a59153 Mon Sep 17 00:00:00 2001 From: Chris Woodward Date: Fri, 19 Jun 2020 20:10:44 -0400 Subject: [PATCH] Updated savers object creation to bulk save --- pyGeno/backends/arangodb/configuration.py | 2 +- pyGeno/backends/arangodb/savers.py | 26 ++++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pyGeno/backends/arangodb/configuration.py b/pyGeno/backends/arangodb/configuration.py index 6702253..4a2b66e 100644 --- a/pyGeno/backends/arangodb/configuration.py +++ b/pyGeno/backends/arangodb/configuration.py @@ -55,7 +55,7 @@ def prompt_setup(self): args = { "username": "root", "password": "root", - "arangoURL": "http://localhost:8529/" + "arangoURL": "http://127.0.0.1:8529/" } print("## ArangoDB backend setup") for key, value in args.items(): diff --git a/pyGeno/backends/arangodb/savers.py b/pyGeno/backends/arangodb/savers.py index f10bf72..0b022fc 100644 --- a/pyGeno/backends/arangodb/savers.py +++ b/pyGeno/backends/arangodb/savers.py @@ -1,7 +1,7 @@ from ..backend_abs import GenomeSaver_ABS from pyGeno.configuration import system_message from .objects import schemas - +import pyArango class GenomeSaver(GenomeSaver_ABS): """ Saves genome into database @@ -14,7 +14,7 @@ def __init__(self, database_configuration): def init_db(self): for col in schemas.ALL_COLLECTIONS: colname = col.__name__ - if colname not in self.db: + if not self.db.hasCollection(str(colname)): self.db.createCollection(colname) else : print("TRUNCATING (temporary for tests, should be removed)", colname) @@ -22,16 +22,22 @@ def init_db(self): def create_objects(self): for colname, objs in self.data.items(): + i = 0 + c = 0 + docs = [] + doc = {} for key, obj in objs.items(): - try : - doc = self.db[colname][key] - except : - doc = self.db[colname].createDocument() - doc["_key"] = key - doc.set(obj) + doc = self.db[colname].createDocument_(obj) + doc['_key'] = key if colname in self.accepted_contigs: doc["contig"] = self.get_subsequence(doc["seqname"], doc["start"], doc["end"]) - doc.save() + docs.append(doc) + i = i + 1 + c = c + 1 + if i == len(objs) or i == 1000 or c + i == len(objs): + self.db[colname].bulkSave(docs, onDuplicate='ignore') + docs = [] + i = 0 def create_links(self): def _get_collection(from_type, to_type): @@ -56,4 +62,4 @@ def _get_collection(from_type, to_type): def save(self) : self.init_db() self.create_objects() - self.create_links() + #self.create_links()