using CSV
using ZipFile
using Dates
using DataFrames
using DataSets
using Underscores

# Define utility functions

function load_truefx_csv(csv_blob)
    @info "Loading $(basename(csv_blob))"
    open(IO, csv_blob) do io
        zipped_content = only(ZipFile.Reader(io).files)
        buf = read(zipped_content)
        CSV.read(buf, DataFrame,
                 header=["pair", "timestamp", "bid", "ask"],
                 dateformat=dateformat"yyyymmdd H:M:S.s",
                 types=Dict(:timestamp=>DateTime))
    end
end

# Compute open-high-low-close of bid price by partitioning via `grouping` which
# defaults to days.
function compute_ohlc(data, bid_or_ask=:bid, grouping=t->floor.(t, Dates.Day(1)))
    @_ data |>
       transform(__, :timestamp=>grouping=>:timestamp) |>
       groupby(__, [:pair, :timestamp]) |>
       combine(__, bid_or_ask=>minimum=>:low, bid_or_ask=>maximum=>:high,
                   bid_or_ask=>first=>:open,  bid_or_ask=>last=>:close)
end

#-------------------------------------------------------------------------------
# Script section

ohlc_per_file = open(BlobTree, dataset("currency_data")) do tree
    map(tree) do blob
        full_data = load_truefx_csv(blob)
        ohlc = compute_ohlc(full_data, :bid, t->floor.(t, Dates.Day(1)))
    end
end
ohlc_data = reduce(vcat, ohlc_per_file)

CSV.write("ohlc_data.csv", ohlc_data)
ENV["RESULTS_FILE"] = "ohlc_data.csv"

