Skip to content

Commit

Permalink
prep for release
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Nov 27, 2019
1 parent c59d8a4 commit 91c30df
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 9 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
v0.2.7 (dev)
v0.2.7
======
+ new subcommand `ancestry` to predict ancestry using a simple neural network on the somalier
sketches. creates an interactive html output and a text file
+ fix for "Argument list too long" on huge cohorts (#37)
+ sub-sample .pairs.tsv output for huge cohorts -- only for unrelated samples.
+ better sub-sampling of html output

v0.2.6
======
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ to add/remove samples or adjust a pedigree file and re-run iteratively.
For example to add the **n + 1th** samples, just run `somalier extract` on the new sample and then re-use
the already extracted data from the `n` original samples.

For *huge* sample-sets, if you run into a bash error for *argument list too long*, you can pass the somalier files as quoted
glob strings like: `"/path/to/set-a/*.somalier" "/path/to/set-b/*.somalier"`.

## Usage

The usage is also described above. Briefly, run:
Expand Down
2 changes: 1 addition & 1 deletion src/somalier.nim
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ proc main() =
"extract": pair(f:extract_main, description: "extract genotype-like information for a single sample from VCF/BAM/CRAM."),
"relate": pair(f:rel_main, description: "aggregate `extract`ed information and calculate relatedness among samples."),
"ancestry": pair(f:ancestry_main, description: "perform ancestry prediction on a set of samples, given a set of labeled samples"),
"depthview": pair(f:depth_main, description: "plot per-chromosome depth for each sample for quick quality-control"),
#"depthview": pair(f:depth_main, description: "plot per-chromosome depth for each sample for quick quality-control"),
"find-sites": pair(f:findsites_main, description: "create a new sites.vcf.gz file from a population VCF (this is rarely needed)."),
}.toOrderedTable

Expand Down
10 changes: 9 additions & 1 deletion src/somalierpkg/ancestry.nim
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ type ForHtml = ref object
probs: seq[float32] # probability of maximum prediction
ancestry_label: string

proc subset(T: var Tensor[float32], Q: var Tensor[float32], labels: var Tensor[int]) =
echo T.shape
echo Q.shape
echo labels.shape

proc ancestry_main*() =

var argv = commandLineParams()
Expand Down Expand Up @@ -117,13 +122,17 @@ proc ancestry_main*() =
vec[j] = ac.ab(5).alts.float32
query_mat[i] = vec


var
nPCs = parseInt(opts.n_pcs)
T = train_mat.toTensor()
Q = query_mat.toTensor()
Y = int_labels.toTensor() #.astype(float32)#.unsqueeze(0).transpose
t0 = cpuTime()
res = T.pca(nPCs) #, center=true) #, n_power_iters=4)

#subset(T, Q, Y)

stderr.write_line &"[somalier] time for dimensionality reduction to shape {res.projected.shape}: {cpuTime() - t0:.2f} seconds"

let
Expand Down Expand Up @@ -187,7 +196,6 @@ proc ancestry_main*() =
let t_probs = model.forward(X).value.softmax #.argmax(axis=1).squeeze

let
Q = query_mat.toTensor()
q_proj = Q * res.components
q_probs = model.forward(ctx.variable q_proj).value.softmax
q_pred = q_probs.argmax(axis=1).squeeze
Expand Down
12 changes: 6 additions & 6 deletions src/somalierpkg/results.html
Original file line number Diff line number Diff line change
Expand Up @@ -157,18 +157,18 @@ <h5>Sample Depth Metrics</h5>
var sample_data = <SAMPLE_JSON>
var input = <INPUT_JSON>

var colors = ['rgba(55,126,184,0.7)', 'rgba(228,26,28,0.7)', 'rgba(77,175,74,0.7)', 'rgba(152,78,163,0.7)', 'rgba(255,127,0,0.7)', 'rgba(166,86,40,0.7)', 'rgba(247,129,191,0.7)']
var colors = ['rgba(55,126,184,0.7)', 'rgba(228,26,28,0.7)', 'rgba(152,78,163,0.7)', 'rgba(255,127,0,0.7)', 'rgba(166,86,40,0.7)', 'rgba(247,129,191,0.7)', 'rgba(77,175,74,0.7)',]
var size
if (sample_data.length > 700) {
size = 6
size = 8
} else if (sample_data.length > 200) {
size = 7
size = 9
} else if (sample_data.length > 50) {
size = 8
} else if (sample_data.length > 20) {
size = 10
} else {
} else if (sample_data.length > 20) {
size = 12
} else {
size = 15
}

function set_xy_data_by_group(input, metric, is_x) {
Expand Down

0 comments on commit 91c30df

Please sign in to comment.