数学建模学习合集 | 天气预测

  该数据集提供了来自澳大利亚许多地点的大约10 年的每日天气观测数据。我们需要做的是根据这些数据对RainTomorrow进行一个预测。

  importnumpy asnpimportpandas aspdimportseaborn assnsimportmatplotlib.pyplot aspltimportwarningswarnings.filterwarnings('ignore')

  fromsklearn.model_selection importtrain_test_splitfromsklearn.preprocessing importMinMaxScalerimporttensorflow astffromtensorflow.keras.models importSequentialfromtensorflow.keras.layers importDense, Activation,Dropoutfromtensorflow.keras.callbacks importEarlyStoppingfromtensorflow.keras.layers importDropoutfromsklearn.metrics importclassification_report,confusion_matrixfromsklearn.metrics importr2_scorefromsklearn.metrics importmean_absolute_error , mean_squared_error

  data=pd.read_csv('weatherAUS.csv')data.head()

  

  data.describe()

  

  #查看数据类型data.dtypes

  

  data['Date']=pd.to_datetime(data['Date'])data['Date']

  

  data['year'] =data['Date'].dt.yeardata['Month']=data['Date'].dt.monthdata['day'] =data['Date'].dt.day

  data.head()

  

  data.drop('Date',axis=1,inplace=True)

  data.columns

  

  探索性数据分析(EDA)

  1.数据相关性探索

  df_corr= data.corr() # 13X13

  print(df_corr)

  np.ones_like(df_corr,dtype=np.bool)

  mask= np.triu(np.ones_like(df_corr, dtype=np.bool))

  mask= mask[1:, :-1]

  corr= df_corr.iloc[1:, :-1].copy()

  #color map

  cmap= sns.diverging_palette(0, 230, 90, 60, as_cmap=True)

  plt.figure(figsize=(15,13))

  ax= sns.heatmap(corr, square=True, annot=True, fmt='.2f',

  linewidths=5,cmap=cmap, vmin=-1, vmax=1,

  mask=mask)

  ax.set_xticklabels(ax.get_xticklabels(),rotation=90)

  plt.show()

  

  2.是否会下雨

  sns.set(style="darkgrid")plt.figure(figsize=(4,3))sns.countplot(x='RainTomorrow',data=data)

  

  plt.figure(figsize=(4,3))sns.countplot(x='RainToday',data=data)

  

  x=pd.crosstab(data['RainTomorrow'],data['RainToday'])x

  

  y=x/x.transpose().sum().values.reshape(2,1)*100y

  

  我们通过数据可以看出:

  1.如果今天不下雨,那么明天下雨的机会= 15%2.如果今天下雨明天下雨的机会= 46%

  y.plot(kind="bar",figsize=(4,3),color=['#006666','#d279a6']);

  

  3.地理位置与下雨的关系

  x=pd.crosstab(data['Location'],data['RainToday'])#获取每个城市下雨天数和非下雨天数的百分比y=x/x.transpose().sum().values.reshape((-1,1))*100#按每个城市的雨天百分比排序y=y.sort_values(by='Yes',ascending=True)

  color=['#cc6699','#006699','#006666','#862d86','#ff9966' ]y.Yes.plot(kind="barh",figsize=(15,20),color=color)

  

  4.湿度和压力对下雨的影响

  data.columns

  

  plt.figure(figsize=(8,6))sns.scatterplot(data=data,x='Pressure9am',y='Pressure3pm',hue='RainTomorrow');

  

  plt.figure(figsize=(8,6))sns.scatterplot(data=data,x='Humidity9am',y='Humidity3pm',hue='RainTomorrow');

  

  低压与高湿度会增加第二天下雨的概率,尤其是下午3 点的空气湿度。

  5.气温对下雨的影响

  plt.figure(figsize=(8,6))sns.scatterplot(x='MaxTemp',y='MinTemp',data=data,hue='RainTomorrow');

  

  结论:当一天的最高气温和最低气温接近时,第二天下雨的概率会增加。

  数据预处理

  1.处理缺失值

  #每列中缺失数据的百分比data.isnull().sum()/data.shape[0]*100

  

  #在该列中随机选择数进行填充lst=['Evaporation','Sunshine','Cloud9am','Cloud3pm']forcolinlst:fill_list =data[col].dropna()data[col] =data[col].fillna(pd.Series(np.random.choice(fill_list,size=len(data.index))))

  s=(data.dtypes =="object")object_cols=list(s[s].index)object_cols

  

  #inplace=True:直接修改原对象,不创建副本#data[i].mode()[0] 返回频率出现最高的选项,众数

  foriinobject_cols:data[i].fillna(data[i].mode()[0],inplace=True)

  t=(data.dtypes =="float64")num_cols=list(t[t].index)num_cols

  

  #.median(), 中位数foriinnum_cols:data[i].fillna(data[i].median(), inplace=True)

  data.isnull().sum()

  

  2.构建数据集

  在处理数据标签时,机器学习或深度学习能识别的标签都是数字类型,分类时用0,1,2....,预测时是浮点数,而大多数数据起始时都不是这种类型,像:“男”和“女”,“是”和“否”,“猫”或“狗”或“人”这类的比较多,因此需要将它们转换为数字类型。

  LabelEncoder:将n个类别编码为0~n-1之间的整数(包含0和n-1),以下是使用LabelEncoder转换标签的实例。

  fromsklearn.preprocessing importLabelEncoder

  label_encoder=LabelEncoder()foriinobject_cols:data[i] =label_encoder.fit_transform(data[i])

  X=data.drop(['RainTomorrow','day'],axis=1).valuesy=data['RainTomorrow'].values

  X_train,X_test, y_train, y_test =train_test_split(X,y,test_size=0.25,random_state=101)

  scaler=MinMaxScaler()scaler.fit(X_train)X_train=scaler.transform(X_train)X_test =scaler.transform(X_test)

  预测是否会下雨

  1.搭建神经网络

  fromtensorflow.keras.optimizers importAdam

  model=Sequential()model.add(Dense(units=24,activation='tanh',))model.add(Dense(units=18,activation='tanh'))model.add(Dense(units=23,activation='tanh'))model.add(Dropout(0.5))model.add(Dense(units=12,activation='tanh'))model.add(Dropout(0.2))model.add(Dense(units=1,activation='sigmoid'))

  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4)

  model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics="accuracy")

  early_stop=EarlyStopping(monitor='val_loss',mode='min',min_delta=0.001,verbose=1,patience=25,restore_best_weights=True)

  2.模型训练

  history=model.fit(x=X_train,y=y_train,validation_data=(X_test,y_test), verbose=1,callbacks=[early_stop],epochs =10,batch_size =32)

  

  3.结果可视化

  frompyecharts.charts import*importpyecharts.options asoptsfrompyecharts.globalsimportThemeType

  loss=history.history['loss']val_loss=history.history['val_loss']acc=history.history['accuracy']val_acc=history.history['val_accuracy']

  line_loss=Line()line_loss.add_xaxis([iforiinrange(10)])line_loss.add_yaxis('loss',loss,label_opts=opts.LabelOpts(is_show=False))line_loss.add_yaxis('val_loss',val_loss,label_opts=opts.LabelOpts(is_show=False))line_loss.set_global_opts(legend_opts=opts.LegendOpts(pos_top='5%',pos_left='20%'),tooltip_opts=opts.TooltipOpts(trigger="axis",axis_pointer_type="line"))

  line_acc=Line()line_acc.add_xaxis([iforiinrange(10)])line_acc.add_yaxis('accuracy',acc,label_opts=opts.LabelOpts(is_show=False))line_acc.add_yaxis('val_accuracy',val_acc,label_opts=opts.LabelOpts(is_show=False))line_acc.set_global_opts(title_opts=opts.TitleOpts('模型训练过程效果记录',pos_left='center'),legend_opts=opts.LegendOpts(pos_top='5%',pos_left='65%'),yaxis_opts=opts.AxisOpts(is_scale=True),tooltip_opts=opts.TooltipOpts(trigger="axis",axis_pointer_type="line"))

  grid=Grid(init_opts=opts.InitOpts(theme=ThemeType.CHALK))grid.add(line_loss,grid_opts=opts.GridOpts(pos_left='5%',pos_right='55%'))grid.add(line_acc,grid_opts=opts.GridOpts(pos_left='55%',pos_right='5%'))grid.render_notebook()

  

  BONUS TIME

  数学建模资料、视频讲解、历年赛题

  后台回复 【校苑】领取