Skip to content

Commit

Permalink
support bigd extract and reduce mem usage
Browse files Browse the repository at this point in the history
  • Loading branch information
Miachol committed Apr 29, 2020
1 parent d502ce6 commit f52c2cf
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 30 deletions.
48 changes: 22 additions & 26 deletions cmd/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import (

"github.com/openbiox/ligo/extract"
"github.com/openbiox/ligo/flag"
cio "github.com/openbiox/ligo/io"
"github.com/openbiox/ligo/stringo"
"github.com/spf13/cobra"
)

var stdin []byte
var keyWords []string
var cleanArgs []string
var keyWordsPat string

func parseStdin(cmd *cobra.Command) {
var err error
Expand All @@ -43,67 +45,61 @@ func simpleExtr(cmd *cobra.Command, args []string) {
keyWords = stringo.StrSplit(string(keyWordsArr), "\r\n|\n|\r|\t", 10000000)
}
keyWords = removeDuplicatesAndEmpty(keyWords)
keyWordsPat = strings.Join(keyWords, "|")
parseStdin(cmd)
var wg sync.WaitGroup
sem := make(chan struct{}, RootClis.Thread)

if len(stdin) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
defer fmt.Println(string(*parseJSON(stdin)))
defer fmt.Println(string(*parseJSON(stdin, "")))
}()
RootClis.HelpFlags = false
}
if RootClis.ListFile != "" {
cleanArgs = append(cleanArgs, cio.ReadLines(RootClis.ListFile)...)
}
if len(cleanArgs) > 0 {
for _, v := range cleanArgs {
wg.Add(1)
go func(v string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
var input []byte
var con *os.File
var err error
if con, err = os.Open(v); err != nil {
log.Warnln(err)
return
}
if input, err = ioutil.ReadAll(con); err != nil {
log.Warnln(err)
return
}
defer fmt.Println(string(*parseJSON(input)))
defer fmt.Println(string(*parseJSON(nil, v)))
}(v)
}
RootClis.HelpFlags = false
}
wg.Wait()
}

func parseJSON(dat []byte) *[]byte {
func parseJSON(dat []byte, infile string) *[]byte {
var sraFields []extract.SraFields
var pubMedFields []extract.PubmedFields
var keyWordsPat string
if RootClis.Mode == "pubmed" && len(dat) > 0 {
keyWordsPat = strings.Join(keyWords, "|")
pubMedFields, _ = extract.GetSimplePubmedFields("", &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
if len(dat) == 0 && infile == "" {
return nil
}
if RootClis.Mode == "pubmed" {
pubMedFields, _ = extract.GetSimplePubmedFields(infile, &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
dat2, _ := json.MarshalIndent(pubMedFields, "", " ")
return &dat2
} else if RootClis.Mode == "sra" && len(dat) > 0 {
keyWordsPat = strings.Join(keyWords, "|")
sraFields, _ = extract.GetSimpleSraFields("", &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
} else if RootClis.Mode == "sra" {
sraFields, _ = extract.GetSimpleSraFields(infile, &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
dat2, _ := json.MarshalIndent(sraFields, "", " ")
return &dat2
} else if len(dat) > 0 {
keyWordsPat = strings.Join(keyWords, "|")
obj, _ := extract.GetPlainFields("", &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
} else if RootClis.Mode == "bigd" {
articleFields, _ := extract.GetBigdFields(infile, &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
dat2, _ := json.MarshalIndent(articleFields, "", " ")
return &dat2
} else {
obj, _ := extract.GetPlainFields(infile, &dat, &keyWordsPat, RootClis.CallCor, RootClis.Thread)
dat2, _ := json.MarshalIndent(obj, "", " ")
return &dat2
}
return nil
}

func init() {
Expand Down
4 changes: 2 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ type RootClisT struct {
CallCor bool

// type
Mode string

Mode string
ListFile string
HelpFlags bool
}

Expand Down
1 change: 1 addition & 0 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var wd string

func setGlobalFlag(cmd *cobra.Command) {
wd, _ = os.Getwd()
cmd.PersistentFlags().StringVarP(&(RootClis.ListFile), "list-file", "l", "", "A file contains file names for extract.")
cmd.PersistentFlags().IntVarP(&(RootClis.Verbose), "verbose", "", 1, "verbose level(0:no output, 1: basic level, 2: with env info")
cmd.PersistentFlags().StringVarP(&(RootClis.TaskID), "task-id", "k", stringo.RandString(15), "task ID (default is random).")
cmd.PersistentFlags().StringVarP(&(RootClis.LogDir), "log-dir", "", path.Join(wd, "_log"), "log dir.")
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ module github.com/openanno/bioextr
go 1.13

require (
github.com/openbiox/ligo v0.0.0-20200425100435-2a85b8d3c803
github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect
github.com/openbiox/ligo v0.0.0-20200429130206-02bc2092b3ff
github.com/sirupsen/logrus v1.5.0
github.com/spf13/cobra v1.0.0
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.4.0 // indirect
golang.org/x/sys v0.0.0-20200428200454-593003d681fa // indirect
)

0 comments on commit f52c2cf

Please sign in to comment.