-
Notifications
You must be signed in to change notification settings - Fork 0
/
common_utils.jl
59 lines (49 loc) · 1.98 KB
/
common_utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
using Dates, DataFramesMeta
using DfUtils
function get_date_symbol(fname::String)
tup = split(split(fname, "/")[end], "_")
(String(tup[2][1:8]), String(tup[1]))
end
get_in_file(args, dt) = joinpath(args.task.idir, dt, "$(args.task.symbol)_$(dt).csv.gz")
function get_out_file(args, dt)
if args.one_file
if isnothing(args.snapshot_time)
return joinpath(args.out_root, "$(args.task.symbol).csv$(args.compress ? ".gz" : "")")
else
return joinpath(args.out_root, "$(dt).csv$(args.compress ? ".gz" : "")")
end
else
return joinpath(args.out_root, dt, "$(args.task.symbol)_$(dt).csv$(args.compress ? ".gz" : "")")
end
end
unix2datetime_adj(x) = unix2datetime(x / 1e6) + Hour(8)
##
function read_input_df(date, args)
ifile = get_in_file(args, date)
df_raw = CSV.File(ifile; ntasks=1, types=Dict(:Timestamp => Int, :AppSeq => Int)) |> DataFrame
if !isnothing(args.hdf_root)
X, _, Y = from_hdf(
joinpath(args.hdf_root, date, "$(args.task.symbol)_$(date).hdf"),
feature_names=args.hdf_features, label_names=args.hdf_labels, insert_ts=false
)
select!(X, [:AppSeq, :CumAmount, :CumVolume, :CumBuyTurnover, :CumSellTurnover])
select!(Y, [:AppSeq, :FirstBidPrice, :FirstAskPrice])
unique!(X, :AppSeq)
unique!(Y, :AppSeq)
leftjoin!(df_raw, X, on=:AppSeq)
leftjoin!(df_raw, Y, on=:AppSeq)
dropmissing!(df_raw)
end
df_raw
end
##
_get_intraday_time(dt::Vector{DateTime}) = Time.(ifelse.(hour.(dt) .> 12, dt .- Minute(90), dt) .- Hour(9) .- Minute(30))
function preprocess_raw!(df, symbol)
df[!, :Timestamp] = unix2datetime_adj.(df.Timestamp)
df[!, :IntradayTime] = _get_intraday_time(df.Timestamp)
insertcols!(df, 3, :Code => symbol, :Date => Date.(df.Timestamp))
end
function get_intraday_time!(args, df)
dt = unix2datetime_adj.(df.Timestamp)
args.dt_cols = DataFrame(:Date => dt, :IntradayTime => _get_intraday_time(dt))
end