import numpy as np
import pandas as pd

from glob import glob

class_identifiers = ["INST", "WW", "BR", "TMP", "VOC", "ST", "Fl", "Ob", "Cl", "Bn", "Hn", "Tpt", "Fe", "Ma", "Vn", "Va", "Vc", "Db"]

for f in glob("ann_audio_instruments_npz/*.npz"):
    ann_arr = np.load(f)["arr_0"]
    assert ann_arr.shape[1] == 18
    ann_dfs = []
    t = np.arange(ann_arr.shape[0]) * (512.0 / 22050.0)
    for i, c in enumerate(class_identifiers):
        prev_a, start_idx = 0, 0
        for j, a in enumerate(ann_arr[:, i]):
            if prev_a != a:
                if prev_a == 0:
                    start_idx = j
                else:
                    ann_dfs.append(pd.DataFrame([{"start": t[start_idx], "end": t[j], "instrument": c}]))
            prev_a = a
        if prev_a == 1:    
            ann_dfs.append(pd.DataFrame([{"start": t[start_idx], "end": t[j], "instrument": c}]))
    ann_df = pd.concat(ann_dfs)
    ann_df = ann_df.sort_values(["start", "instrument"])
    output_path = "ann_audio_instruments_csv/" + f.split("/")[-1].split(".")[-2] + ".csv"
    ann_df.to_csv(output_path, sep=";", header=True, index=False)
