DataStream Class#

This tutorial demonstrates the usage of the DataStream class, which provides methods for analyzing time-series data.

The following features are:
  • Trimming: Identifies steady-state regions in data.

  • Statistical Analysis: Computes mean, standard deviation, confidence intervals, and cumulative statistics.

  • Stationarity Testing: Uses the Augmented Dickey-Fuller test.

  • Effective Sample Size (ESS): Estimates the independent sample size.

  • Optimal Window Size: Determines the best window for data smoothing.

Import DataStream

import quends as qnds

GX Data Analysis#

Analysis on GX Data

# Specify the file paths
csv_file_path = "gx/tprim_2_0.out.csv"
csv2_file_path = "gx/ensemble/tprim_2_5_a.out.csv"

# Load the data from CSV files
data_stream_csv = qnds.from_csv(csv_file_path)
data_stream_gx = qnds.from_csv(csv2_file_path)

# Display the first few rows of the GX data
data_stream_gx.head()
time Phi2_t Phi2_kxt Phi2_kyt Phi2_kxkyt Phi2_zt Apar2_t Apar2_kxt Apar2_kyt Apar2_kxkyt Apar2_zt Phi2_zonal_t Phi2_zonal_kxt Phi2_zonal_zt Wg_st Wg_kxst Wg_kyst Wg_kxkyst Wg_zst Wg_lmst Wphi_st Wphi_kxst Wphi_kyst Wphi_kxkyst Wphi_zst Wapar_st Wapar_kxst Wapar_kyst Wapar_kxkyst Wapar_zst HeatFlux_st HeatFlux_kxst HeatFlux_kyst HeatFlux_kxkyst HeatFlux_zst HeatFluxES_st HeatFluxES_kxst HeatFluxES_kyst HeatFluxES_kxkyst HeatFluxES_zst HeatFluxApar_st HeatFluxApar_kxst HeatFluxApar_kyst HeatFluxApar_kxkyst HeatFluxApar_zst HeatFluxBpar_st HeatFluxBpar_kxst HeatFluxBpar_kyst HeatFluxBpar_kxkyst HeatFluxBpar_zst ParticleFlux_st ParticleFlux_kxst ParticleFlux_kyst ParticleFlux_kxkyst ParticleFlux_zst TurbulentHeating_st TurbulentHeating_kxst TurbulentHeating_kyst TurbulentHeating_kxkyst TurbulentHeating_zst
0 0.020072 0.000322 1.014122e-06 5.556982e-15 1.283244e-17 0.000004 0.0 0.0 0.0 0.0 0.0 5.556983e-15 1.283244e-17 8.195788e-17 0.001261 0.000010 2.157422e-14 7.842924e-17 0.000018 1.260900e-03 0.000090 4.728780e-07 2.252245e-15 7.758021e-18 8.470064e-07 0.0 0.0 0.0 0.0 0.0 0.000003 2.114464e-08 0.000000e+00 0.0 3.988991e-08 0.000003 2.114464e-08 0.000000e+00 0.0 3.988991e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -4.513479e-11 -1.238711e-14 0.000000e+00 0.0 -1.328687e-12
1 2.027322 0.000236 9.462429e-07 1.035192e-05 3.763393e-17 0.000004 0.0 0.0 0.0 0.0 0.0 1.477131e-10 3.763393e-17 9.182713e-17 0.001496 0.000009 2.018144e-05 4.068222e-16 0.000019 5.423472e-08 0.000064 4.470815e-07 1.587569e-06 1.941193e-17 9.091424e-07 0.0 0.0 0.0 0.0 0.0 0.000154 1.935043e-08 3.436235e-10 0.0 4.392872e-08 0.000154 1.935043e-08 3.436235e-10 0.0 4.392872e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.326710e-12 -1.907196e-13 -2.302137e-12 0.0 -1.325463e-12
2 4.034571 0.000198 1.138735e-06 9.805337e-06 1.168341e-16 0.000005 0.0 0.0 0.0 0.0 0.0 3.704297e-10 1.168341e-16 1.050660e-16 0.002205 0.000011 1.770925e-05 7.266167e-16 0.000019 1.252036e-08 0.000053 5.322058e-07 1.374501e-06 6.295199e-17 9.924838e-07 0.0 0.0 0.0 0.0 0.0 0.000196 2.208835e-08 1.294924e-09 0.0 4.903913e-08 0.000196 2.208835e-08 1.294924e-09 0.0 4.903913e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -4.022337e-12 -2.190608e-13 -1.575239e-12 0.0 -4.890709e-13
3 6.041821 0.000206 1.060234e-06 9.191237e-06 2.735980e-16 0.000005 0.0 0.0 0.0 0.0 0.0 4.042708e-10 2.735980e-16 1.229009e-16 0.002964 0.000010 1.881524e-05 1.689178e-15 0.000020 2.801751e-09 0.000053 4.835276e-07 1.562852e-06 1.470741e-16 1.104677e-06 0.0 0.0 0.0 0.0 0.0 0.000236 2.078288e-08 2.700831e-09 0.0 5.548262e-08 0.000236 2.078288e-08 2.700831e-09 0.0 5.548262e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.022327e-11 -3.670521e-13 -1.553083e-13 0.0 -8.325619e-13
4 8.049070 0.000245 1.066248e-06 1.006626e-05 1.340230e-16 0.000005 0.0 0.0 0.0 0.0 0.0 7.481377e-10 1.340230e-16 1.443730e-16 0.003818 0.000009 1.914822e-05 7.813427e-16 0.000021 4.485645e-10 0.000061 4.882390e-07 1.435011e-06 7.265576e-17 1.238737e-06 0.0 0.0 0.0 0.0 0.0 0.000298 2.149553e-08 5.369387e-09 0.0 6.278837e-08 0.000298 2.149553e-08 5.369387e-09 0.0 6.278837e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.445467e-11 -5.255198e-14 2.001172e-12 0.0 -6.776345e-13


Get available variables

data_stream_gx.variables()
Index(['time', 'Phi2_t', 'Phi2_kxt', 'Phi2_kyt', 'Phi2_kxkyt', 'Phi2_zt',
       'Apar2_t', 'Apar2_kxt', 'Apar2_kyt', 'Apar2_kxkyt', 'Apar2_zt',
       'Phi2_zonal_t', 'Phi2_zonal_kxt', 'Phi2_zonal_zt', 'Wg_st', 'Wg_kxst',
       'Wg_kyst', 'Wg_kxkyst', 'Wg_zst', 'Wg_lmst', 'Wphi_st', 'Wphi_kxst',
       'Wphi_kyst', 'Wphi_kxkyst', 'Wphi_zst', 'Wapar_st', 'Wapar_kxst',
       'Wapar_kyst', 'Wapar_kxkyst', 'Wapar_zst', 'HeatFlux_st',
       'HeatFlux_kxst', 'HeatFlux_kyst', 'HeatFlux_kxkyst', 'HeatFlux_zst',
       'HeatFluxES_st', 'HeatFluxES_kxst', 'HeatFluxES_kyst',
       'HeatFluxES_kxkyst', 'HeatFluxES_zst', 'HeatFluxApar_st',
       'HeatFluxApar_kxst', 'HeatFluxApar_kyst', 'HeatFluxApar_kxkyst',
       'HeatFluxApar_zst', 'HeatFluxBpar_st', 'HeatFluxBpar_kxst',
       'HeatFluxBpar_kyst', 'HeatFluxBpar_kxkyst', 'HeatFluxBpar_zst',
       'ParticleFlux_st', 'ParticleFlux_kxst', 'ParticleFlux_kyst',
       'ParticleFlux_kxkyst', 'ParticleFlux_zst', 'TurbulentHeating_st',
       'TurbulentHeating_kxst', 'TurbulentHeating_kyst',
       'TurbulentHeating_kxkyst', 'TurbulentHeating_zst'],
      dtype='object')

Get number of rows from the following data in GX

len(data_stream_gx)
201

Stationary Check#

# Check if a single column is stationary
data_stream_gx.is_stationary("HeatFlux_st")

# Check if multiple columns are stationary
data_stream_gx.is_stationary(["HeatFlux_st", "Wg_st", "Phi2_t"])
{'HeatFlux_st': True, 'Wg_st': True, 'Phi2_t': False}

Trimming data based to obtain steady-state portion#

Trim the data based on standard deviation method

# Returns: Dictionary with keys like "results" and "metadata"
trimmed = data_stream_gx.trim(column_name="HeatFlux_st", batch_size=50, method="std")

# Print first 5 rows of dataframe
trimmed.head()
time HeatFlux_st
0 158.592772 8.508736
1 160.600022 8.699987
2 162.607271 8.852156
3 164.614520 8.883341
4 166.621770 8.713289


Trim the data based on rolling variance method

trimmed = data_stream_gx.trim(
    column_name="HeatFlux_st", batch_size=50, method="rolling_variance", threshold=0.10
)

# Gather results
trimmed.head()
time Phi2_t Phi2_kxt Phi2_kyt Phi2_kxkyt Phi2_zt Apar2_t Apar2_kxt Apar2_kyt Apar2_kxkyt Apar2_zt Phi2_zonal_t Phi2_zonal_kxt Phi2_zonal_zt Wg_st Wg_kxst Wg_kyst Wg_kxkyst Wg_zst Wg_lmst Wphi_st Wphi_kxst Wphi_kyst Wphi_kxkyst Wphi_zst Wapar_st Wapar_kxst Wapar_kyst Wapar_kxkyst Wapar_zst HeatFlux_st HeatFlux_kxst HeatFlux_kyst HeatFlux_kxkyst HeatFlux_zst HeatFluxES_st HeatFluxES_kxst HeatFluxES_kyst HeatFluxES_kxkyst HeatFluxES_zst HeatFluxApar_st HeatFluxApar_kxst HeatFluxApar_kyst HeatFluxApar_kxkyst HeatFluxApar_zst HeatFluxBpar_st HeatFluxBpar_kxst HeatFluxBpar_kyst HeatFluxBpar_kxkyst HeatFluxBpar_zst ParticleFlux_st ParticleFlux_kxst ParticleFlux_kyst ParticleFlux_kxkyst ParticleFlux_zst TurbulentHeating_st TurbulentHeating_kxst TurbulentHeating_kyst TurbulentHeating_kxkyst TurbulentHeating_zst


Trim the data based on threshold method

trimmed = data_stream_gx.trim(
    column_name="HeatFlux_st", batch_size=50, method="threshold", threshold=0.1
)

# View trimmed data
trimmed.head()
time HeatFlux_st
0 158.592772 8.508736
1 160.600022 8.699987
2 162.607271 8.852156
3 164.614520 8.883341
4 166.621770 8.713289


Effective Sample Size#

Compute Effective Sample Size for specific columns in GX

ess_dict = data_stream_gx.effective_sample_size(column_names=["HeatFlux_st", "Wg_st"])
print(ess_dict)
{'results': {'HeatFlux_st': 24, 'Wg_st': 10}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'effective_sample_size', 'options': {'column_names': ['HeatFlux_st', 'Wg_st'], 'alpha': 0.05}}]}

Compute Effective sample size for trimmed data

ess_df = trimmed.effective_sample_size()
print(ess_df)
{'results': {'HeatFlux_st': 5}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'trim', 'options': {'column_name': 'HeatFlux_st', 'batch_size': 50, 'start_time': 0.0, 'method': 'threshold', 'threshold': 0.1, 'robust': True, 'sss_start': 158.59277222661015}}, {'operation': 'effective_sample_size', 'options': {'column_names': None, 'alpha': 0.05}}]}

UQ Analysis#

Compute Statistics on trimmed dataframe

stats = trimmed.compute_statistics(method="sliding")
print(stats)

stats_df = stats["HeatFlux_st"]
{'HeatFlux_st': {'mean': 7.9406914994528615, 'mean_uncertainty': 0.08981775761011032, 'confidence_interval': (7.764648694537045, 8.116734304368677), 'pm_std': (7.850873741842751, 8.030509257062972), 'effective_sample_size': 5, 'window_size': 24}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'trim', 'options': {'column_name': 'HeatFlux_st', 'batch_size': 50, 'start_time': 0.0, 'method': 'threshold', 'threshold': 0.1, 'robust': True, 'sss_start': 158.59277222661015}}, {'operation': 'effective_sample_size', 'options': {'column_names': None, 'alpha': 0.05}}, {'operation': 'compute_statistics', 'options': {'column_name': None, 'ddof': 1, 'method': 'sliding', 'window_size': None}}]}

Exporter Below Displays the information as a DataFrame

exporter = qnds.Exporter()
exporter.display_dataframe(stats_df)
       mean  mean_uncertainty  ...  effective_sample_size  window_size
0  7.940691          0.089818  ...                      5           24
1  7.940691          0.089818  ...                      5           24

[2 rows x 6 columns]

Below Displays the information in JSON

exporter.display_json(stats_df)
{
  "mean": 7.9406914994528615,
  "mean_uncertainty": 0.08981775761011032,
  "confidence_interval": [
    7.764648694537045,
    8.116734304368677
  ],
  "pm_std": [
    7.850873741842751,
    8.030509257062972
  ],
  "effective_sample_size": 5,
  "window_size": 24
}

Other statistical methods#

Calculate the mean with a window size of 10

mean_df = trimmed.mean(window_size=10)
print(mean_df)
{'HeatFlux_st': 7.989677796666666}

Calculate the mean with the method of sliding

mean_df = trimmed.mean(method="sliding")
print(mean_df)
{'HeatFlux_st': 7.9406914994528615}

Calculate the mean uncertainty

uq_df = trimmed.mean_uncertainty()
print(uq_df)
{'HeatFlux_st': 0.23525686516667507}

Calculate the mean uncertainty with the method of sliding

uq_df = trimmed.mean_uncertainty(method="sliding")
uq_df
{'HeatFlux_st': 0.08981775761011032}

Calculate the confidence intervale with the trimmed dataframe

ci_df = trimmed.confidence_interval()
print(ci_df)
{'HeatFlux_st': (7.528574340939983, 8.45078125239335)}

Cumlative Statistics

cumulative = trimmed.cumulative_statistics()
print(cumulative)

cumulative_df = cumulative["HeatFlux_st"]
{'HeatFlux_st': {'cumulative_mean': [8.777007562500001, 8.427817691666668, 8.308147695833334, 8.086430926041666, 7.989677796666667], 'cumulative_uncertainty': [nan, 0.4938290511758112, 0.40607424148898075, 0.553682367211838, 0.5260503426861878], 'standard_error': [nan, 0.3491898708333347, 0.23444707263463616, 0.276841183605919, 0.23525686516667502], 'window_size': 24}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'trim', 'options': {'column_name': 'HeatFlux_st', 'batch_size': 50, 'start_time': 0.0, 'method': 'threshold', 'threshold': 0.1, 'robust': True, 'sss_start': 158.59277222661015}}, {'operation': 'effective_sample_size', 'options': {'column_names': 'HeatFlux_st', 'alpha': 0.05}}, {'operation': 'cumulative_statistics', 'options': {'column_name': None, 'method': 'non-overlapping', 'window_size': None}}]}

Display Cumulative Statistics as a DataFrame

exporter.display_dataframe(cumulative_df)
   cumulative_mean  cumulative_uncertainty  standard_error  window_size
0         8.777008                     NaN             NaN           24
1         8.427818                0.493829        0.349190           24
2         8.308148                0.406074        0.234447           24
3         8.086431                0.553682        0.276841           24
4         7.989678                0.526050        0.235257           24

CGYRO Data Analysis#

Specify the file paths

csv_file_path = "cgyro/output_nu0_50.csv"
data_stream_cg = qnds.from_csv(csv_file_path)
data_stream_cg.head()
Unnamed: 0 time Q_D/Q_GBD Q_e/Q_GBD
0 0 0.5 0.003355 0.001669
1 1 1.0 0.003314 0.003338
2 2 1.5 0.003160 0.003941
3 3 2.0 0.002480 0.002337
4 4 2.5 0.002004 0.001941


Get the number of rows

len(data_stream_cg)
1748

Trim the data based on threshold method

trimmed_ = data_stream_cg.trim(column_name="Q_D/Q_GBD", method="std", robust=True)
# View trimmed data
print(trimmed_)
<quends.base.data_stream.DataStream object at 0x13c7ce510>
trimmed_.head()
time Q_D/Q_GBD
0 208.0 12.974854
1 208.5 13.264263
2 209.0 13.563313
3 209.5 13.815548
4 210.0 14.046638


To check if data stream is stationary

data_stream_cg.is_stationary("Q_D/Q_GBD")
{'Q_D/Q_GBD': True}

To Plot for DataStream

plotter = qnds.Plotter()
plot = plotter.trace_plot(data_stream_cg, ["Q_D/Q_GBD"])
Time Series Plots for Datastream, Q_D/Q_GBD
plot = plotter.steady_state_automatic_plot(
    data_stream_cg, variables_to_plot=["Q_D/Q_GBD"]
)
Q_D/Q_GBD
plot = plotter.steady_state_plot(data_stream_cg, variables_to_plot=["Q_D/Q_GBD"])
Q_D/Q_GBD
For Q_D/Q_GBD, no manual steady state start provided. Plotting raw signal.

To show additional data use:

addition_info = trimmed.additional_data(method="sliding")
print(addition_info)
{'HeatFlux_st': {'A_est': 0.03170698677588585, 'p_est': 0.5410018913986299, 'n_current': 99, 'current_sem': 0.00263944463499645, 'target_sem': 0.002375500171496805, 'n_target': 120.28580081212739, 'additional_samples': 22, 'window_size': 24}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'trim', 'options': {'column_name': 'HeatFlux_st', 'batch_size': 50, 'start_time': 0.0, 'method': 'threshold', 'threshold': 0.1, 'robust': True, 'sss_start': 158.59277222661015}}, {'operation': 'effective_sample_size', 'options': {'column_names': 'HeatFlux_st', 'alpha': 0.05}}, {'operation': 'additional_data', 'options': {'column_name': None, 'ddof': 1, 'method': 'sliding', 'window_size': None, 'reduction_factor': 0.1}}]}

To add a reduction factor

addition_info = trimmed.additional_data(reduction_factor=0.2)
print(addition_info)
{'HeatFlux_st': {'A_est': 0.03170698677588585, 'p_est': 0.5410018913986299, 'n_current': 99, 'current_sem': 0.00263944463499645, 'target_sem': 0.00211155570799716, 'n_target': 149.54291116020593, 'additional_samples': 51, 'window_size': 24}, 'metadata': [{'operation': 'is_stationary', 'options': {'columns': 'HeatFlux_st'}}, {'operation': 'trim', 'options': {'column_name': 'HeatFlux_st', 'batch_size': 50, 'start_time': 0.0, 'method': 'threshold', 'threshold': 0.1, 'robust': True, 'sss_start': 158.59277222661015}}, {'operation': 'effective_sample_size', 'options': {'column_names': 'HeatFlux_st', 'alpha': 0.05}}, {'operation': 'additional_data', 'options': {'column_name': None, 'ddof': 1, 'method': 'sliding', 'window_size': None, 'reduction_factor': 0.2}}]}

Total running time of the script: (0 minutes 4.791 seconds)

Gallery generated by Sphinx-Gallery