Skip to content

Commit

Permalink
Merge branch 'master' of github.com:brain-life/warehouse
Browse files Browse the repository at this point in the history
  • Loading branch information
soichih committed Sep 26, 2018
2 parents 538a133 + e9cf77a commit a8942e8
Show file tree
Hide file tree
Showing 42 changed files with 878 additions and 575 deletions.
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
# Brainlife Warehouse

Please see http://www.brainlife.io/warehouse/
Brainlife warehouse provides most of web UI hosted under https://brainlife.io and the API services that are unique to Brainlife.

touched
![architecture](https://docs.google.com/drawings/d/e/2PACX-1vSbxpvxhckYT5rUJReexZdbaL4xZpMDiebDP-yQAxrcy1VwKCAHYQQTWE8mMQ4lBgQg9qpcZcZmaEr1/pub?w=960&h=551)

For more information, please read [Brainlife Doc](https://brain-life.github.io/docs/)

For Warehouse API doc, please read [Warehouse API Doc](https://brain-life.github.io/warehouse/apidoc/)

### Authors
- Soichi Hayashi ([email protected])

### Project directors
- Franco Pestilli ([email protected])

### Funding
[![NSF-BCS-1734853](https://img.shields.io/badge/NSF_BCS-1734853-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1734853)
[![NSF-BCS-1636893](https://img.shields.io/badge/NSF_BCS-1636893-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1636893)
58 changes: 26 additions & 32 deletions api/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ exports.archive_task = function(task, dataset, files_override, auth, cb) {
});
}

//now start feeding the writestream
//now start feeding the writestream (/tmp/archive-XXX/thing)
request({
url: config.amaretti.api+"/task/download/"+task._id,
qs: {
Expand All @@ -157,10 +157,15 @@ exports.archive_task = function(task, dataset, files_override, auth, cb) {
if (err.file) dataset.desc = "Expected output " + (err.file.filename||err.file.dirname) + " not found";
else dataset.desc = "Failed to store all files under tmpdir";
dataset.status = "failed";
return dataset.save(cb);
//return dataset.save(cb);
dataset.save(_err=>{
if(_err) logger.error(_err); //ignore..?
cb(dataset.desc);
});
return;
}

logger.debug(filenames);
//logger.debug(filenames);

//all items stored under tmpdir! call cb, but then asynchrnously copy content to the storage
var storage = config.storage_default();
Expand All @@ -180,6 +185,9 @@ exports.archive_task = function(task, dataset, files_override, auth, cb) {
}
});

logger.debug("streaming to storage");
tar.stdout.pipe(writestream);

//TODO - I am not sure if all writestream returnes file object (pkgcloud does.. but this makes it a bit less generic)
//maybe I should run system.stat()?
//writestream.on('success', file=>{
Expand All @@ -198,9 +206,6 @@ exports.archive_task = function(task, dataset, files_override, auth, cb) {
cb(err); //return error from streaming which is more interesting
});
});

logger.debug("streaming to storage");
tar.stdout.pipe(writestream);
});
});
});
Expand Down Expand Up @@ -272,18 +277,14 @@ exports.compose_app_datacite_metadata = function(app) {
return;
}
//TODO - add <nameIdentifier nameIdentifierScheme="ORCID">12312312131</nameIdentifier>
creators.push(`<creator>
<creatorName>${xmlescape(contact.fullname)}</creatorName>
</creator>`);
creators.push(`<creator><creatorName>${xmlescape(contact.fullname)}</creatorName></creator>`);
});

let contributors = [];
app.contributors.forEach(contact=>{
if(app.contributors) app.contributors.forEach(contact=>{
//contributorType can be ..
//Value \'Contributor\' is not facet-valid with respect to enumeration \'[ContactPerson, DataCollector, DataCurator, DataManager, Distributor, Editor, HostingInstitution, Other, Producer, ProjectLeader, ProjectManager, ProjectMember, RegistrationAgency, RegistrationAuthority, RelatedPerson, ResearchGroup, RightsHolder, Researcher, Sponsor, Supervisor, WorkPackageLeader]\'. It must be a value from the enumeration.'
contributors.push(`<contributor contributorType="Other">
<contributorName>${xmlescape(contact.name)}</contributorName>
</contributor>`);
contributors.push(`<contributor contributorType="Other"><contributorName>${xmlescape(contact.name)}</contributorName></contributor>`);
});

let subjects = []; //aka "keyword"
Expand All @@ -295,13 +296,13 @@ exports.compose_app_datacite_metadata = function(app) {
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
<identifier identifierType="DOI">${app.doi}</identifier>
<creators>
${creators.join("\n")}
${creators.join('\n')}
</creators>
<contributors>
${contributors.join("\n")}
${contributors.join('\n')}
</contributors>
<subjects>
${subjects.join("\n")}
${subjects.join('\n')}
</subjects>
<titles>
<title>${xmlescape(app.name)}</title>
Expand All @@ -324,9 +325,6 @@ exports.compose_pub_datacite_metadata = function(pub) {
let year = pub.create_date.getFullYear();
let publication_year = "<publicationYear>"+year+"</publicationYear>";

//creators
//let creators = cached_contacts[pub.user_id];

//in case author is empty.. let's use submitter as author..
//TODO - we need to make author required field
if(pub.authors.length == 0) pub.authors.push(pub.user_id);
Expand All @@ -339,9 +337,7 @@ exports.compose_pub_datacite_metadata = function(pub) {
return;
}
//TODO - add <nameIdentifier nameIdentifierScheme="ORCID">12312312131</nameIdentifier>
creators.push(`<creator>
<creatorName>${xmlescape(contact.fullname)}</creatorName>
</creator>`);
creators.push(`<creator><creatorName>${xmlescape(contact.fullname)}</creatorName></creator>`);

});

Expand All @@ -356,9 +352,7 @@ exports.compose_pub_datacite_metadata = function(pub) {

//contributorType can be ..
//Value \'Contributor\' is not facet-valid with respect to enumeration \'[ContactPerson, DataCollector, DataCurator, DataManager, Distributor, Editor, HostingInstitution, Other, Producer, ProjectLeader, ProjectManager, ProjectMember, RegistrationAgency, RegistrationAuthority, RelatedPerson, ResearchGroup, RightsHolder, Researcher, Sponsor, Supervisor, WorkPackageLeader]\'. It must be a value from the enumeration.'
contributors.push(`<contributor contributorType="Other">
<contributorName>${xmlescape(contact.fullname)}</contributorName>
</contributor>`);
contributors.push(`<contributor contributorType="Other"><contributorName>${xmlescape(contact.fullname)}</contributorName></contributor>`);

});

Expand All @@ -371,13 +365,13 @@ exports.compose_pub_datacite_metadata = function(pub) {
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
<identifier identifierType="DOI">${pub.doi}</identifier>
<creators>
${creators.join("\n")}
${creators.join('\n')}
</creators>
<contributors>
${contributors.join("\n")}
${contributors.join('\n')}
</contributors>
<subjects>
${subjects.join("\n")}
${subjects.join('\n')}
</subjects>
<titles>
<title>${xmlescape(pub.name)}</title>
Expand Down Expand Up @@ -434,8 +428,7 @@ exports.doi_put_url = function(doi, url, cb) {
}

let cached_contacts = {};
function cache_contact() {
logger.info("caching auth profiles");
exports.cache_contact = function(cb) {
request({
url: config.auth.api+"/profile", json: true,
qs: {
Expand All @@ -449,11 +442,12 @@ function cache_contact() {
body.profiles.forEach(profile=>{
cached_contacts[profile.id.toString()] = profile;
});
if(cb) cb();
}
});
}
cache_contact();
setInterval(cache_contact, 1000*60*30); //cache every 30 minutes
exports.cache_contact();
setInterval(exports.cache_contact, 1000*60*30); //cache every 30 minutes

exports.deref_contact = function(id) {
return cached_contacts[id];
Expand Down
3 changes: 1 addition & 2 deletions api/config/index.js.sample
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ exports.amaretti = {
//jwt used to query things from workflow service as admin
jwt: fs.readFileSync(__dirname+'/amaretti.jwt', 'ascii').trim(),
}
exports.wf = exports.amaretti; //deprecated
exports.wf = exports.amaretti; //deprecated (use amaretti)

exports.auth = {
api: "https://dev1.soichi.us/api/auth",
Expand All @@ -42,7 +42,6 @@ exports.warehouse = {
api: "https://dev1.soichi.us/api/warehouse",

//base url
//url: "https://localhost:8080",
url: "https://localhost.brainlife.io", //to test datacite

//used to issue warehouse token to allow dataset download
Expand Down
18 changes: 11 additions & 7 deletions api/controllers/dataset.js
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,11 @@ router.get('/inventory', jwt({secret: config.express.pubkey, credentialsRequired
//{removed: false, project: mongoose.Types.ObjectId("592dcc5b0188fd1eecf7b4ec")},
]
})
.group({_id: {"subject": "$meta.subject", "datatype": "$datatype", "datatype_tags": "$datatype_tags"},
count: {$sum: 1}, size: {$sum: "$size"} })
.group({_id: {
"subject": "$meta.subject",
"datatype": "$datatype",
"datatype_tags": "$datatype_tags"
}, count: {$sum: 1}, size: {$sum: "$size"} })
.sort({"_id.subject":1})
.exec((err, stats)=>{
if(err) return next(err);
Expand Down Expand Up @@ -241,14 +244,14 @@ router.get('/prov/:id', (req, res, next)=>{
if(task.service == "soichih/sca-product-raw" || task.service == "soichih/sca-service-noop") { //TODO might change in the future
if(defer) {
add_node(defer.node);
edges.push(defer.edge);
if(defer.edge.to != defer.edge.from) edges.push(defer.edge);
}
if(dataset.prov.subdir) load_product_raw(to, dataset.prov.subdir, cb);
else load_product_raw(to, dataset._id, cb);
} else if(task.service && task.service.indexOf("brain-life/validator-") === 0) {
if(defer) {
add_node(defer.node);
edges.push(defer.edge);
if(defer.edge.to != defer.edge.from) edges.push(defer.edge);
}
cb(); //ignore validator
} else {
Expand All @@ -275,7 +278,7 @@ router.get('/prov/:id', (req, res, next)=>{
var found = false;
var from = "dataset."+dataset_id;
var found = edges.find(e=>(e.from == from && e.to == to));
if(!found) edges.push({ from, to, arrows: "to", });
if(to != from && !found) edges.push({ from, to, arrows: "to", });
return cb();
}
datasets_analyzed.push(dataset_id.toString());
Expand Down Expand Up @@ -474,7 +477,7 @@ router.post('/', jwt({secret: config.express.pubkey}), (req, res, cb)=>{
if(err) return next(err);
dataset = _dataset;
logger.debug("created dataset record......................", dataset.toObject());
res.json(dataset); //not respond back to the caller - but processing has just began
//res.json(dataset);
next(err);
});
},
Expand All @@ -486,7 +489,8 @@ router.post('/', jwt({secret: config.express.pubkey}), (req, res, cb)=>{

], err=>{
if(err) return cb(err);
else logger.debug("all done archiving");
logger.debug("all done archiving");
res.json(dataset);
});
});

Expand Down
Loading

0 comments on commit a8942e8

Please sign in to comment.