admin管理员组文章数量:1357294
In relation this post, I have acquired the raw disdrometer data which includes multiple pairs of Drop Size (0.062 mm, 0.187 mm etc) and Counts of raindrops (32 bins). I would like to create a time series heatmap of the disdrometer data in which the independent variables are Time and Drop Size, while the dependent variable is "log N(d)".
How can I efficiently create this heatmap time series in Python? I did try to do this with the initial code, but I didn't manage to reproduce the sample figure in the previous post, correctly (the visualization). is there a way to adjust the x and y limits, the x and y tickmarks/labels as well? (as they were quite incorrect).
To compute for N(d), the formula we use is "Sum of Counts in each time stamp" / 0.54 * 300 * 0.125
I did try my best to do this with the initial code, but I didn't manage to reproduce the sample figure in the previous post, correctly (the visualization). Like is there a way to adjust/correct the x and y limits? Also the tickmarks/labels in x and y axis were quite incorrect.
The sample disdrometer data can be acquired through here. I'll appreciate a helping hand to this matter.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# File paths for all datasets
all_file_paths = {
"0.062 mm": "07292023/0.062 mm.csv",
"0.187 mm": "07292023/0.187 mm.csv",
"0.312 mm": "07292023/0.312 mm.csv",
"0.437 mm": "07292023/0.437 mm.csv",
"0.562 mm": "07292023/0.562 mm.csv",
"0.687 mm": "07292023/0.687 mm.csv",
"0.812 mm": "07292023/0.812 mm.csv",
"0.937 mm": "07292023/0.937 mm.csv",
"1.062 mm": "07292023/1.062 mm.csv",
"1.187 mm": "07292023/1.187 mm.csv",
"1.375 mm": "07292023/1.375 mm.csv",
"1.625 mm": "07292023/1.625 mm.csv",
"1.875 mm": "07292023/1.875 mm.csv",
"2.125 mm": "07292023/2.125 mm.csv",
"2.375 mm": "07292023/2.375 mm.csv",
"3.25 mm": "07292023/3.25 mm.csv",
"3.75 mm": "07292023/3.75 mm.csv",
"4.25 mm": "07292023/4.25 mm.csv",
"4.75 mm": "07292023/4.75 mm.csv",
"5.5 mm": "07292023/5.5 mm.csv",
"6.5 mm": "07292023/6.5 mm.csv",
"7.5 mm": "07292023/7.5 mm.csv",
"8.5 mm": "07292023/8.5 mm.csv",
}
# Constants for N(D) calculation
factor = 0.54 * 300 * 0.125
# Read and process all datasets
combined_data = []
for size, path in all_file_paths.items():
df = pd.read_csv(path)
df["Size"] = float(size.split()[0]) # Ensure size is numeric
time_col = df["Time stamp"] # Keep original time format
nd_values = df.iloc[:, 2:].astype(float) / factor # Compute N(D)
# Sum bins within 1 mm intervals
size_bin = int(df["Size"].iloc[0] // 0.25)
sum_nd = nd_values.sum(axis=1) # Sum all bins in this interval
log_nd = np.log10(sum_nd.replace(0, np.nan)) # Compute log N(D)
# Create DataFrame with relevant information
result_df = pd.DataFrame({
"Time stamp": time_col,
"Size_bin": size_bin,
"log N(D)": log_nd
})
combined_data.append(result_df)
# Combine into a single DataFrame
heatmap_data = pd.concat(combined_data).reset_index(drop=True)
# Filter for log N(D) > 0
#heatmap_data = heatmap_data[heatmap_data["log N(D)"] > 0]
# Aggregate log N(D) by size bin and time
heatmap_binned = heatmap_data.groupby(["Time stamp", "Size_bin"])['log N(D)'].mean().reset_index()
# Pivot for heatmap
heatmap_pivot = heatmap_binned.pivot(index="Size_bin", columns="Time stamp", values="log N(D)")
# Plot heatmap using axes
fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(heatmap_pivot, cmap="turbo", robust=True, vmin=0, vmax=2,
ax=ax, cbar_kws={'pad': 0.1})
ax.set_xlabel("Time Stamp (LST)", fontsize=15)
ax.set_ylabel("Drop Size (mm)", fontsize=15)
#ax.set_title("Raindrop Size Distribution Heatmap (Summed Bins)")
ax.invert_yaxis() # Invert y-axis to start from lowest to highest size
# Set y-axis tick labels to represent bin intervals
ax.set_yticks(np.arange(0, heatmap_pivot.shape[0], 1))
ax.set_yticklabels(np.arange(0, heatmap_pivot.shape[0], 1))
custom_labels = np.arange(1, heatmap_pivot.shape[0] + 1)
ax.set_yticklabels(custom_labels)
#ax.minorticks_on()
ax.tick_params(axis='x', which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in')
ax.tick_params(axis='y', which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in', rotation=0)
#ax.tick_params(which='minor', length=7, width=1.5, top=True, right=True, direction='in')
# Customize tick parameters for colorbar
cbar = ax.collections[0].colorbar
cbar.set_label('log N(D)', fontsize=15)
cbar.ax.minorticks_on()
cbar.ax.tick_params(which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in')
cbar.ax.tick_params(which='minor', length=5, width=1.5, top=True, right=True, direction='in')
ax.grid(ls='--', alpha=0.35)
# Add border to the colorbar
cbar.outline.set_edgecolor('black')
cbar.outline.set_linewidth(2)
# Add border to the plot
plt.gca().spines['top'].set_visible(True)
plt.gca().spines['right'].set_visible(True)
plt.gca().spines['left'].set_visible(True)
plt.gca().spines['bottom'].set_visible(True)
#plt.rcParams['axes.linewidth'] = 2
#plt.rcParams['mathtext.fontset'] = 'cm'
#plt.rcParams['mathtext.default'] = 'regular'
#plt.rcParams['font.family'] = 'STIXGeneral'
plt.savefig('Test1.jpg', dpi=300, bbox_inches='tight')
plt.show()
In relation this post, I have acquired the raw disdrometer data which includes multiple pairs of Drop Size (0.062 mm, 0.187 mm etc) and Counts of raindrops (32 bins). I would like to create a time series heatmap of the disdrometer data in which the independent variables are Time and Drop Size, while the dependent variable is "log N(d)".
How can I efficiently create this heatmap time series in Python? I did try to do this with the initial code, but I didn't manage to reproduce the sample figure in the previous post, correctly (the visualization). is there a way to adjust the x and y limits, the x and y tickmarks/labels as well? (as they were quite incorrect).
To compute for N(d), the formula we use is "Sum of Counts in each time stamp" / 0.54 * 300 * 0.125
I did try my best to do this with the initial code, but I didn't manage to reproduce the sample figure in the previous post, correctly (the visualization). Like is there a way to adjust/correct the x and y limits? Also the tickmarks/labels in x and y axis were quite incorrect.
The sample disdrometer data can be acquired through here. I'll appreciate a helping hand to this matter.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# File paths for all datasets
all_file_paths = {
"0.062 mm": "07292023/0.062 mm.csv",
"0.187 mm": "07292023/0.187 mm.csv",
"0.312 mm": "07292023/0.312 mm.csv",
"0.437 mm": "07292023/0.437 mm.csv",
"0.562 mm": "07292023/0.562 mm.csv",
"0.687 mm": "07292023/0.687 mm.csv",
"0.812 mm": "07292023/0.812 mm.csv",
"0.937 mm": "07292023/0.937 mm.csv",
"1.062 mm": "07292023/1.062 mm.csv",
"1.187 mm": "07292023/1.187 mm.csv",
"1.375 mm": "07292023/1.375 mm.csv",
"1.625 mm": "07292023/1.625 mm.csv",
"1.875 mm": "07292023/1.875 mm.csv",
"2.125 mm": "07292023/2.125 mm.csv",
"2.375 mm": "07292023/2.375 mm.csv",
"3.25 mm": "07292023/3.25 mm.csv",
"3.75 mm": "07292023/3.75 mm.csv",
"4.25 mm": "07292023/4.25 mm.csv",
"4.75 mm": "07292023/4.75 mm.csv",
"5.5 mm": "07292023/5.5 mm.csv",
"6.5 mm": "07292023/6.5 mm.csv",
"7.5 mm": "07292023/7.5 mm.csv",
"8.5 mm": "07292023/8.5 mm.csv",
}
# Constants for N(D) calculation
factor = 0.54 * 300 * 0.125
# Read and process all datasets
combined_data = []
for size, path in all_file_paths.items():
df = pd.read_csv(path)
df["Size"] = float(size.split()[0]) # Ensure size is numeric
time_col = df["Time stamp"] # Keep original time format
nd_values = df.iloc[:, 2:].astype(float) / factor # Compute N(D)
# Sum bins within 1 mm intervals
size_bin = int(df["Size"].iloc[0] // 0.25)
sum_nd = nd_values.sum(axis=1) # Sum all bins in this interval
log_nd = np.log10(sum_nd.replace(0, np.nan)) # Compute log N(D)
# Create DataFrame with relevant information
result_df = pd.DataFrame({
"Time stamp": time_col,
"Size_bin": size_bin,
"log N(D)": log_nd
})
combined_data.append(result_df)
# Combine into a single DataFrame
heatmap_data = pd.concat(combined_data).reset_index(drop=True)
# Filter for log N(D) > 0
#heatmap_data = heatmap_data[heatmap_data["log N(D)"] > 0]
# Aggregate log N(D) by size bin and time
heatmap_binned = heatmap_data.groupby(["Time stamp", "Size_bin"])['log N(D)'].mean().reset_index()
# Pivot for heatmap
heatmap_pivot = heatmap_binned.pivot(index="Size_bin", columns="Time stamp", values="log N(D)")
# Plot heatmap using axes
fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(heatmap_pivot, cmap="turbo", robust=True, vmin=0, vmax=2,
ax=ax, cbar_kws={'pad': 0.1})
ax.set_xlabel("Time Stamp (LST)", fontsize=15)
ax.set_ylabel("Drop Size (mm)", fontsize=15)
#ax.set_title("Raindrop Size Distribution Heatmap (Summed Bins)")
ax.invert_yaxis() # Invert y-axis to start from lowest to highest size
# Set y-axis tick labels to represent bin intervals
ax.set_yticks(np.arange(0, heatmap_pivot.shape[0], 1))
ax.set_yticklabels(np.arange(0, heatmap_pivot.shape[0], 1))
custom_labels = np.arange(1, heatmap_pivot.shape[0] + 1)
ax.set_yticklabels(custom_labels)
#ax.minorticks_on()
ax.tick_params(axis='x', which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in')
ax.tick_params(axis='y', which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in', rotation=0)
#ax.tick_params(which='minor', length=7, width=1.5, top=True, right=True, direction='in')
# Customize tick parameters for colorbar
cbar = ax.collections[0].colorbar
cbar.set_label('log N(D)', fontsize=15)
cbar.ax.minorticks_on()
cbar.ax.tick_params(which='major', labelsize=15, length=10, width=2, top=True, right=True, direction='in')
cbar.ax.tick_params(which='minor', length=5, width=1.5, top=True, right=True, direction='in')
ax.grid(ls='--', alpha=0.35)
# Add border to the colorbar
cbar.outline.set_edgecolor('black')
cbar.outline.set_linewidth(2)
# Add border to the plot
plt.gca().spines['top'].set_visible(True)
plt.gca().spines['right'].set_visible(True)
plt.gca().spines['left'].set_visible(True)
plt.gca().spines['bottom'].set_visible(True)
#plt.rcParams['axes.linewidth'] = 2
#plt.rcParams['mathtext.fontset'] = 'cm'
#plt.rcParams['mathtext.default'] = 'regular'
#plt.rcParams['font.family'] = 'STIXGeneral'
plt.savefig('Test1.jpg', dpi=300, bbox_inches='tight')
plt.show()
Share
Improve this question
edited Mar 29 at 11:16
gboffi
25.1k9 gold badges60 silver badges95 bronze badges
asked Mar 28 at 9:40
CGHACGHA
377 bronze badges
6
|
Show 1 more comment
1 Answer
Reset to default 1The image above was obtained by
$ python3 raindrops_falling.py [0-9]*.csv
Minimum count*factor = 20.250
Minimum log N(D) = 1.306
$
I'm rather unsure about my procedure to compute logNd
below, but it's your field not mine, isn't it? so please modify that part as you will, you should be a better judge than me.
Eventually, here it is the code (note that I have used neither Pandas nor Seaborne)
$ cat raindrops_falling.py
import numpy as np
import matplotlib.pyplot as plt
import fileinput
import warnings
time_stamps = []
sizes = {}
for line in fileinput.input(encoding="utf-8"):
time, mm, *counts = [elt.strip() for elt in line.strip().split(',')]
if time[0] == "T" : continue # skip headers
time = int(time)
μm = int(float(mm)*1000) # over-thinking the keys of sizes
# count is a string of digits or ('') the null string
# int(count or 0) does not fail when count=='' but returns 0
total_count = sum((int(count or 0) for count in counts))
# treating new time or new drop size
if time not in time_stamps: time_stamps.append(time)
if μm not in sizes: sizes[μm] = []
# store the total count for this μm and each timestamp
sizes[μm].append(total_count)
time_stamps = np.array(time_stamps)
mm = np.array(list(sizes.keys()))/1000
factor = 0.54 * 300 * 0.125 # = 54 * 3 / 8 = 27 * 3 / 4 = 81/4 = 20.25
factored_counts = np.array([counts for counts in sizes.values()])*factor
# some checks
min_fc = (factored_counts[factored_counts>0]).min()
print("""\
Minimum count*factor = %.3f
Minimum log N(D) = %.3f"""%(min_fc, np.log10(min_fc)))
# np.log10 return NaN when the argument is negative,
# we prefer a NaN (no patch) to -Inf for plotting the color mesh
factored_counts[factored_counts==0] = -1
# when np.log10 finds a <0 argument it raises a warning, let's suppress it
with warnings.catch_warnings():
warnings.simplefilter("ignore")
logNd = np.log10(factored_counts)
# PLOTTING
fig, ax = plt.subplots(layout='constrained')
plt.pcolormesh(time_stamps, mm, logNd, cmap='turbo',
# vmin=0 → colorbar starts from 0
# shading='nearest' → patches centered on time, diam
vmin=0, shading='nearest')
# check also shading='gouraud'
plt.colorbar()
# some little adjustments
plt.yticks(mm, fontsize='xx-small')
plt.xlim(xmax=2000)
plt.grid(lw=0.15, color='#707070')
ax.patch.set_color('#e0e0e0')
fig.patch.set_color('#ddddd0')
# That's all Folks!
plt.show()
$
本文标签: pythonOn making a time series heat mapStack Overflow
版权声明:本文标题:python - On making a time series heat map - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1744045603a2581462.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
07292023/4.25 mm.csv
contains suspicious data, because a great deal of the numbers in the second column are not4.25
, as it is in every other file you've posted, but4.75
. – gboffi Commented Mar 29 at 11:07"0"
) and some are null strings (""
). There is a semantic difference or both notations mean "nothing happened here" ? – gboffi Commented Mar 29 at 11:10