Skip to content

Commit

Permalink
use new features in vcfpp.h >= v0.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Sep 13, 2024
1 parent a4907a0 commit 76ed8cb
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: vcfppR
Title: Rapid Manipulation of the Variant Call Format (VCF)
Version: 0.4.8
Version: 0.5.0
Authors@R: c(
person("Zilong", "Li", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5859-2078")),
Expand Down
3 changes: 3 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ heterozygosity <- function(vcffile, region = "", samples = "-", pass = FALSE, qu
#' @field string Return the raw string of current variant including newline
#' @field line Return the raw string of current variant without newline
#' @field output Init an output object for streaming out the variants to another vcf
#' @field updateSamples update samples name in the output VCF
#' \itemize{
#' \item Parameter: s - A comma-seperated string for new samples names}
#' @field write Streaming out current variant the output vcf
#' @field close Close the connection to the output vcf
#' @field setCHR Modify the CHR of current variant \itemize{ \item Parameter: s - A string for CHR}
Expand Down
4 changes: 4 additions & 0 deletions man/vcfreader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 21 additions & 11 deletions src/vcf-reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ using namespace std;
//' @field string Return the raw string of current variant including newline
//' @field line Return the raw string of current variant without newline
//' @field output Init an output object for streaming out the variants to another vcf
//' @field updateSamples update samples name in the output VCF
//' \itemize{
//' \item Parameter: s - A comma-seperated string for new samples names}
//' @field write Streaming out current variant the output vcf
//' @field close Close the connection to the output vcf
//' @field setCHR Modify the CHR of current variant \itemize{ \item Parameter: s - A string for CHR}
Expand Down Expand Up @@ -285,6 +288,10 @@ class vcfreader {
var.resetHeader(bw.header);
modifiable = true;
}
inline void updateSamples(const std::string& s) {
if (!modifiable) { modify(); }
bw.header.updateSamples(s);
}
inline void write() {
if (writable) {
bw.writeRecord(var);
Expand All @@ -300,21 +307,23 @@ class vcfreader {
}
}

inline void setCHR(std::string s) { var.setCHR(s.c_str()); }
inline void setID(std::string s) { var.setID(s.c_str()); }
inline void setCHR(const std::string& s) { var.setCHR(s); }
inline void setID(const std::string& s) { var.setID(s); }
inline void setPOS(int pos) { var.setPOS(pos); }
inline void setRefAlt(const std::string& s) { var.setRefAlt(s.c_str()); }
inline bool setInfoInt(std::string tag, int v) { return var.setINFO(tag, v); }
inline bool setInfoFloat(std::string tag, double v) { return var.setINFO(tag, v); }
inline bool setInfoStr(std::string tag, const std::string& s) { return var.setINFO(tag, s); }
inline bool setFormatInt(std::string tag, const vector<int>& v) {
inline void setRefAlt(const std::string& s) { var.setRefAlt(s); }
inline bool setInfoInt(const std::string& tag, int v) { return var.setINFO(tag, v); }
inline bool setInfoFloat(const std::string& tag, double v) { return var.setINFO(tag, v); }
inline bool setInfoStr(const std::string& tag, const std::string& s) {
return var.setINFO(tag, s);
}
inline bool setFormatInt(const std::string& tag, const vector<int>& v) {
return var.setFORMAT(tag, v);
}
inline bool setFormatFloat(std::string tag, const vector<double>& v) {
inline bool setFormatFloat(const std::string& tag, const vector<double>& v) {
vector<float> f(v.begin(), v.end());
return var.setFORMAT(tag, f);
}
inline bool setFormatStr(std::string tag, const std::string& s) {
inline bool setFormatStr(const std::string& tag, const std::string& s) {
if (s.length() % nsamples() != 0) {
Rcpp::Rcout << "the length of s must be divisable by nsamples()";
return false;
Expand All @@ -334,8 +343,8 @@ class vcfreader {
var.setPhasing(c);
}

inline void rmInfoTag(std::string s) { var.removeINFO(s); }
inline void rmFormatTag(std::string s) { var.removeFORMAT(s); }
inline void rmInfoTag(const std::string& s) { var.removeINFO(s); }
inline void rmFormatTag(const std::string& s) { var.removeFORMAT(s); }
inline void addINFO(const std::string& id, const std::string& number, const std::string& type,
const std::string& desc) {
if (!writable) {
Expand Down Expand Up @@ -415,6 +424,7 @@ RCPP_MODULE(vcfreader) {
.method("string", &vcfreader::string)
.method("line", &vcfreader::line)
.method("output", &vcfreader::output)
.method("updateSamples", &vcfreader::updateSamples)
.method("write", &vcfreader::write)
.method("close", &vcfreader::close)
.method("setCHR", &vcfreader::setCHR)
Expand Down
79 changes: 68 additions & 11 deletions src/vcfpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* @file https://github.com/Zilong-Li/vcfpp/vcfpp.h
* @author Zilong Li
* @email [email protected]
* @version v0.4.1
* @version v0.5.1
* @breif a single C++ file for manipulating VCF
* Copyright (C) 2022-2023.The use of this code is governed by the LICENSE file.
******************************************************************************/
Expand Down Expand Up @@ -371,9 +371,36 @@ class BcfHeader
bcf_hdr_remove(hdr, BCF_HL_FLT, tag.c_str());
}

/**
* @brief update the sample id in the header
* @param samples a comma-separated string for multiple new samples
* @note this only update the samples name in a given sequential order
* */
inline void updateSamples(const std::string & samples) const
{
int sid = 0; // index of the sample to modify
for(auto s : split_string(samples, ","))
{
if(sid < nSamples())
{
hdr->samples[sid] = strdup(s.c_str());
sid++;
}
else
{
# if defined(VERBOSE)
std::cerr << "you provided too many samples";
# endif
break;
}
}
int ret = bcf_hdr_sync(hdr);
if(ret != 0) throw std::runtime_error("could not modify " + samples + " in the header\n");
}

/**
* @brief explicitly set samples to be extracted
* @param samples samples to include or exclude as a comma-separated string
* @param samples samples to include or exclude as a comma-separated string
* */
inline void setSamples(const std::string & samples) const
{
Expand Down Expand Up @@ -1231,9 +1258,9 @@ class BcfRecord
}

/** @brief modify CHROM value */
inline void setCHR(const char * chr)
inline void setCHR(const std::string & s)
{
line->rid = bcf_hdr_name2id(header->hdr, chr);
line->rid = bcf_hdr_name2id(header->hdr, s.c_str());
}

/** @brief modify position given 1-based value */
Expand All @@ -1243,15 +1270,34 @@ class BcfRecord
}

/** @brief update ID */
inline void setID(const char * s)
inline void setID(const std::string & s)
{
bcf_update_id(header->hdr, line.get(), s);
bcf_update_id(header->hdr, line.get(), s.c_str());
}

/** @brief set REF and ALT alleles given a string seperated by comma */
inline void setRefAlt(const char * alleles_string)
inline void setRefAlt(const std::string & s)
{
bcf_update_alleles_str(header->hdr, line.get(), alleles_string);
bcf_update_alleles_str(header->hdr, line.get(), s.c_str());
}

/** @brief modify the QUAL value */
inline void setQUAL(float q)
{
line->qual = q;
}

/** @brief modify the QUAL value */
inline void setQUAL(char q)
{
bcf_float_set_missing(line->qual);
}

/** @brief modify the FILTER value */
inline void setFILTER(const std::string & s)
{
int32_t tmpi = bcf_hdr_id2int(header->hdr, BCF_DT_ID, s.c_str());
bcf_add_filter(header->hdr, line.get(), tmpi);
}

/** @brief return 0-base start of the variant (can be any type) */
Expand Down Expand Up @@ -1750,12 +1796,23 @@ class BcfWriter
hp = &h;
}

/// copy header of given VCF
void copyHeader(const std::string & vcffile)
/// copy header of given VCF and restrict on samples. if samples=="", FORMAT removed and only sites left
void copyHeader(const std::string & vcffile, std::string samples = "-")
{
htsFile * fp2 = hts_open(vcffile.c_str(), "r");
if(!fp2) throw std::invalid_argument("I/O error: input file is invalid");
header.hdr = bcf_hdr_read(fp2);
if(samples == "")
{ // site-only
bcf_hdr_t * hfull = bcf_hdr_read(fp2);
header.hdr = bcf_hdr_subset(hfull, 0, 0, 0);
bcf_hdr_remove(header.hdr, BCF_HL_FMT, NULL);
bcf_hdr_destroy(hfull);
}
else
{
header.hdr = bcf_hdr_read(fp2);
header.setSamples(samples);
}
hts_close(fp2);
initalHeader(header);
}
Expand Down
17 changes: 17 additions & 0 deletions tests/testthat/test-vcf-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,20 @@ test_that("can set genotypes for single sample", {
expect_true(vcf$gt==2)

})

test_that("can change samples name and set genotypes for single sample", {

br <- vcfreader$new(svfile, "", "HG00096")
br$variant()
br$genotypes(F)
br$setGenotypes(c(1L,1L))
outfile <- paste0(tempfile(), ".vcf.gz")
br$output(outfile)
br$updateSamples("ZZZZZ")
br$write()
br$close()

vcf <- vcftable(outfile)
expect_true(vcf$gt==2)
expect_true(vcf$samples=="ZZZZZ")
})

0 comments on commit 76ed8cb

Please sign in to comment.