Source code for mhkit.wave.io

import pandas as pd
import numpy as np


[docs]def read_NDBC_file(file_name, missing_values=['MM',9999,999,99]): """ Reads a NDBC wave buoy data file (from https://www.ndbc.noaa.gov). Realtime and historical data files can be loaded with this function. Note: With realtime data, missing data is denoted by "MM". With historical data, missing data is denoted using a variable number of # 9's, depending on the data type (for example: 9999.0 999.0 99.0). 'N/A' is automatically converted to missing data. Data values are converted to float/int when possible. Column names are also converted to float/int when possible (this is useful when column names are frequency). Parameters ------------ file_name : string Name of NDBC wave buoy data file missing_value : list of values List of values that denote missing data Returns --------- data: pandas DataFrame Data indexed by datetime with columns named according to header row metadata: dict or None Dictionary with {column name: units} key value pairs when the NDBC file contains unit information, otherwise None is returned """ assert isinstance(file_name, str), 'file_name must be of type str' assert isinstance(missing_values, list), 'missing_values must be of type list' # Open file and get header rows f = open(file_name,"r") header = f.readline().rstrip().split() # read potential headers units = f.readline().rstrip().split() # read potential units f.close() # If first line is commented, remove comment sign # if header[0].startswith("#"): header[0] = header[0][1:] header_commented = True else: header_commented = False # If second line is commented, indicate that units exist if units[0].startswith("#"): units_exist = True else: units_exist = False # Check if the time stamp contains minutes, and create list of column names # to parse for date if header[4] == 'mm': parse_vals = header[0:5] date_format = '%Y %m %d %H %M' units = units[5:] #remove date columns from units else: parse_vals = header[0:4] date_format = '%Y %m %d %H' units = units[4:] #remove date columns from units # If first line is commented, manually feed in column names if header_commented: data = pd.read_csv(file_name, sep='\s+', header=None, names = header, comment = "#", parse_dates=[parse_vals]) # If first line is not commented, then the first row can be used as header else: data = pd.read_csv(file_name, sep='\s+', header=0, comment = "#", parse_dates=[parse_vals]) # Convert index to datetime date_column = "_".join(parse_vals) data['Time'] = pd.to_datetime(data[date_column], format=date_format) data.index = data['Time'].values # Remove date columns del data[date_column] del data['Time'] # If there was a row of units, convert to dictionary if units_exist: metadata = {column:unit for column,unit in zip(data.columns,units)} else: metadata = None # Convert columns to numeric data if possible, otherwise leave as string for column in data: data[column] = pd.to_numeric(data[column], errors='ignore') # Convert column names to float if possible (handles frequency headers) # if there is non-numeric name, just leave all as strings. try: data.columns = [float(column) for column in data.columns] except: data.columns = data.columns # Replace indicated missing values with nan data.replace(missing_values, np.nan, inplace=True) return data, metadata