Skip to content

Commit

Permalink
fix merge omni
Browse files Browse the repository at this point in the history
  • Loading branch information
Sceki committed Nov 24, 2024
1 parent fe0e342 commit 1487cc6
Showing 1 changed file with 29 additions and 22 deletions.
51 changes: 29 additions & 22 deletions scripts/input_data_prep/merge_omni_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pyfiglet import Figlet
from termcolor import colored
from tqdm import tqdm

import pandas as pd
import os
Expand All @@ -32,37 +33,43 @@ def merge_omni():
parser = argparse.ArgumentParser(description='Merging Omniweb Data', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('--input_dir', type=str, default='../../data/omniweb_data', help='Input directory of Omniweb data')
parser.add_argument('--output_dir', type=str, default='../../data/merged_datasets', help='Output directory of processed data')

opt = parser.parse_args()

#create output directory if it does not exist
os.makedirs(opt.output_dir, exist_ok=True)

filenames=os.listdir(opt.input_dir)
filenames_magnetic_field=[os.path.join(opt.input_dir,f) for f in filenames if f.endswith('.csv') and f.startswith('magnetic_field')]
filenames_solar_wind=[os.path.join(opt.input_dir,f) for f in filenames if f.endswith('.csv') and f.startswith('solar_wind')]
filenames_indices=[os.path.join(opt.input_dir,f) for f in filenames if f.endswith('.csv') and f.startswith('indices')]
#first check if it exists:
if os.path.exists(os.path.join(opt.input_dir,'merged_omni_magnetic_field.csv')):
print('merged_merged_omni_magnetic_field.csv already exists, skipping')
else:
final_df_magnetic_field=create_df(filenames_magnetic_field)
final_df_magnetic_field.sort_values('all__dates_datetime__',inplace=True)
file_path=os.path.join(opt.input_dir,'merged_omni_magnetic_field.csv')
final_df_magnetic_field.to_csv(file_path,index=False)
print(f' OMNI magnetic field merged dataframe created at: {file_path}')
del final_df_magnetic_field
#first check if it exists:
if os.path.exists(os.path.join(opt.input_dir,'merged_omni_solar_wind.csv')):
print('merged_omni_solar_wind.csv already exists, skipping')
else:
final_df_solar_wind=create_df(filenames_solar_wind)
final_df_solar_wind.sort_values('all__dates_datetime__',inplace=True)
file_path=os.path.join(opt.input_dir,'merged_omni_solar_wind.csv')
final_df_solar_wind.to_csv(file_path,index=False)
print(f' OMNI Solar Wind merged dataframe created at: {file_path}')
del final_df_solar_wind

final_df_magnetic_field=create_df(filenames_magnetic_field)
final_df_magnetic_field.sort_values('all__dates_datetime__',inplace=True)
file_path=os.path.join(opt.input_dir,'merged_omni_magnetic_field.csv')
final_df_magnetic_field.to_csv(file_path,index=False)
print(f' OMNI magnetic field merged dataframe created at: {file_path}')
del final_df_magnetic_field

final_df_solar_wind=create_df(filenames_solar_wind)
final_df_solar_wind.sort_values('all__dates_datetime__',inplace=True)
file_path=os.path.join(opt.input_dir,'merged_omni_solar_wind.csv')
final_df_solar_wind.to_csv(file_path,index=False)
print(f' OMNI Solar Wind merged dataframe created at: {file_path}')
del final_df_solar_wind

#first check if it exists:
if os.path.exists(os.path.join(opt.input_dir,'merged_omni_indices.csv')):
print('merged_omni_solar_wind.csv already exists, skipping')
else:

final_df_indices=create_df(filenames_indices)
final_df_indices.sort_values('all__dates_datetime__',inplace=True)
file_path=opt.path.join(opt.output_dir,'merged_omni_indices.csv')
final_df_indices.to_csv(file_path,index=False)
final_df_indices=create_df(filenames_indices)
final_df_indices.sort_values('all__dates_datetime__',inplace=True)
file_path=os.path.join(opt.input_dir,'merged_omni_indices.csv')
final_df_indices.to_csv(file_path,index=False)
print(f' OMNI Indices merged dataframe created at: {file_path}')

if __name__ == "__main__":
Expand Down

0 comments on commit 1487cc6

Please sign in to comment.