import numpy as np
import pandas as pd
from pandas_datareader import data
import datetime as dt
数据准备
'''
获取国内股票数据的方式是:“股票代码”+“对应股市”(港股为.hk,A股为.ss)
例如腾讯是港股是:0700.hk
'''
#字典:6家公司的股票
# gafataDict={'谷歌':'GOOG','亚马逊':'AMZN','Facebook':'FB', '苹果':'AAPL','阿里巴巴':'BABA','腾讯':'0700.hk'}
'''
定义函数
函数功能:计算股票涨跌幅=(现在股价-买入价格)/买入价格
输入参数:column是收盘价这一列的数据
返回数据:涨跌幅
'''
def change(column):
# 买入价格
buyPrice=column[0]
# 现在股价
curPrice=column[column.size-1]
priceChange=(curPrice-buyPrice)/buyPrice
# 判断股票是上涨还是下跌
if priceChange>0:
print('股票累计上涨=',round(priceChange*100,2),'%')
elif priceChange==0:
print('股票无变化=',round(priceChange*100,2)*100,'%')
else:
print('股票累计下跌=',round(priceChange*100,2)*100,'%')
# 返回数据
return priceChange
'''
三星电子
每日股票价位信息
Open:开盘价
High:最高加
Low:最低价
Close:收盘价
Volume:成交量
因雅虎连接不到,仅以三星作为获取数据示例
'''
sxDf = data.DataReader('005930', 'naver', start='2021-01-01', end='2022-01-01')
sxDf.head()
|
Open |
High |
Low |
Close |
Volume |
Date |
|
|
|
|
|
2021-01-04 |
81000 |
84400 |
80200 |
83000 |
38655276 |
2021-01-05 |
81600 |
83900 |
81600 |
83900 |
35335669 |
2021-01-06 |
83300 |
84500 |
82100 |
82200 |
42089013 |
2021-01-07 |
82800 |
84200 |
82700 |
82900 |
32644642 |
2021-01-08 |
83300 |
90000 |
83000 |
88800 |
59013307 |
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 248 entries, 2021-01-04 to 2021-12-30
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Open 248 non-null object
1 High 248 non-null object
2 Low 248 non-null object
3 Close 248 non-null object
4 Volume 248 non-null object
dtypes: object(5)
memory usage: 11.6+ KB
sxDf.iloc[:,0:4]=sxDf.iloc[:,0:4].astype('float')
sxDf.iloc[:,-1]=sxDf.iloc[:,-1].astype('int')
sxDf.info()
<class 'pandas.core.frame.DataFrame'>DatetimeIndex: 248 entries, 2021-01-04 to 2021-12-30Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Open 248 non-null float64 1 High 248 non-null float64 2 Low 248 non-null float64 3 Close 248 non-null float64 4 Volume 248 non-null int32 dtypes: float64(4), int32(1)memory usage: 10.7 KB<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 248 entries, 2021-01-04 to 2021-12-30
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Open 248 non-null float64
1 High 248 non-null float64
2 Low 248 non-null float64
3 Close 248 non-null float64
4 Volume 248 non-null int32
dtypes: float64(4), int32(1)
memory usage: 10.7 KB
阿里巴巴
# 读取数据
AliDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\阿里巴巴2017年股票数据.xlsx',index_col='Date')
AliDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
175.839996 |
176.660004 |
175.039993 |
176.289993 |
176.289993 |
12524700 |
2017-12-26 |
174.550003 |
175.149994 |
171.729996 |
172.330002 |
172.330002 |
12913800 |
2017-12-27 |
172.289993 |
173.869995 |
171.729996 |
172.970001 |
172.970001 |
10152300 |
2017-12-28 |
173.039993 |
173.529999 |
171.669998 |
172.300003 |
172.300003 |
9508100 |
2017-12-29 |
172.279999 |
173.669998 |
171.199997 |
172.429993 |
172.429993 |
9704600 |
# 查看基本信息及数据类型
AliDf.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2017-01-03 to 2017-12-29
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Open 251 non-null float64
1 High 251 non-null float64
2 Low 251 non-null float64
3 Close 251 non-null float64
4 Adj Close 251 non-null float64
5 Volume 251 non-null int64
dtypes: float64(5), int64(1)
memory usage: 13.7 KB
# 计算涨跌幅
AliChange=change(AliDf['Close'])
'''增加一列累计增长百分比'''
#一开始的股价
Close1=AliDf['Close'][0]
# # .apply(lambda x: format(x, '.2%'))
AliDf['sum_pct_change']=AliDf['Close'].apply(lambda x: (x-Close1)/Close1)
AliDf['sum_pct_change'].tail()
Date
2017-12-22 0.989729
2017-12-26 0.945034
2017-12-27 0.952257
2017-12-28 0.944695
2017-12-29 0.946162
Name: sum_pct_change, dtype: float64
谷歌
# 读取数据
GoogleDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\谷歌2017年股票数据.xlsx',index_col='Date')
GoogleDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
1061.109985 |
1064.199951 |
1059.439941 |
1060.119995 |
1060.119995 |
755100 |
2017-12-26 |
1058.069946 |
1060.119995 |
1050.199951 |
1056.739990 |
1056.739990 |
760600 |
2017-12-27 |
1057.390015 |
1058.369995 |
1048.050049 |
1049.369995 |
1049.369995 |
1271900 |
2017-12-28 |
1051.599976 |
1054.750000 |
1044.770020 |
1048.140015 |
1048.140015 |
837100 |
2017-12-29 |
1046.719971 |
1049.699951 |
1044.900024 |
1046.400024 |
1046.400024 |
887500 |
# 计算涨跌幅
GoogleChange=change(GoogleDf['Close'])
'''增加一列累计增长百分比'''
#一开始的股价
Close1=GoogleDf['Close'][0]
# # .apply(lambda x: format(x, '.2%'))
GoogleDf['sum_pct_change']=GoogleDf['Close'].apply(lambda x: (x-Close1)/Close1)
GoogleDf['sum_pct_change'].tail()
Date
2017-12-22 0.348513
2017-12-26 0.344213
2017-12-27 0.334839
2017-12-28 0.333274
2017-12-29 0.331061
Name: sum_pct_change, dtype: float64
苹果
# 读取数据
AppleDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\苹果2017年股票数据.xlsx',index_col='Date')
AppleDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
174.679993 |
175.419998 |
174.500000 |
175.009995 |
174.299362 |
16349400 |
2017-12-26 |
170.800003 |
171.470001 |
169.679993 |
170.570007 |
169.877396 |
33185500 |
2017-12-27 |
170.100006 |
170.779999 |
169.710007 |
170.600006 |
169.907272 |
21498200 |
2017-12-28 |
171.000000 |
171.850006 |
170.479996 |
171.080002 |
170.385315 |
16480200 |
2017-12-29 |
170.520004 |
170.589996 |
169.220001 |
169.229996 |
168.542831 |
25999900 |
# 计算涨跌幅
AppleChange=change(AppleDf['Close'])
'''增加一列累计增长百分比'''
#一开始的股价
Close1=AppleDf['Close'][0]
# # .apply(lambda x: format(x, '.2%'))
AppleDf['sum_pct_change']=AppleDf['Close'].apply(lambda x: (x-Close1)/Close1)
AppleDf['sum_pct_change'].tail()
Date
2017-12-22 0.506758
2017-12-26 0.468532
2017-12-27 0.468790
2017-12-28 0.472923
2017-12-29 0.456995
Name: sum_pct_change, dtype: float64
腾讯
# 读取数据
TencentDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\腾讯2017年股票数据.xlsx',index_col='Date')
TencentDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
403.799988 |
405.799988 |
400.799988 |
405.799988 |
405.799988 |
16146080 |
2017-12-27 |
405.799988 |
407.799988 |
401.000000 |
401.200012 |
401.200012 |
16680601 |
2017-12-28 |
404.000000 |
408.200012 |
402.200012 |
408.200012 |
408.200012 |
11662053 |
2017-12-29 |
408.000000 |
408.000000 |
403.399994 |
406.000000 |
406.000000 |
16601658 |
2018-01-02 |
406.000000 |
406.000000 |
406.000000 |
406.000000 |
406.000000 |
0 |
# 读取数据
TencentDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\腾讯2017年股票数据.xlsx',index_col='Date')
TencentDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
403.799988 |
405.799988 |
400.799988 |
405.799988 |
405.799988 |
16146080 |
2017-12-27 |
405.799988 |
407.799988 |
401.000000 |
401.200012 |
401.200012 |
16680601 |
2017-12-28 |
404.000000 |
408.200012 |
402.200012 |
408.200012 |
408.200012 |
11662053 |
2017-12-29 |
408.000000 |
408.000000 |
403.399994 |
406.000000 |
406.000000 |
16601658 |
2018-01-02 |
406.000000 |
406.000000 |
406.000000 |
406.000000 |
406.000000 |
0 |
# 计算涨跌幅
TencentChange=change(TencentDf['Close'])
'''增加一列累计增长百分比'''
#一开始的股价
Close1=TencentDf['Close'][0]
# # .apply(lambda x: format(x, '.2%'))
TencentDf['sum_pct_change']=TencentDf['Close'].apply(lambda x: (x-Close1)/Close1)
TencentDf['sum_pct_change'].tail()
Date
2017-12-22 1.142555
2017-12-27 1.118268
2017-12-28 1.155227
2017-12-29 1.143611
2018-01-02 1.143611
Name: sum_pct_change, dtype: float64
亚马逊
# 读取数据
AmazonDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\亚马逊2017年股票数据.xlsx',index_col='Date')
AmazonDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
1172.079956 |
1174.619995 |
1167.829956 |
1168.359985 |
1168.359985 |
1585100 |
2017-12-26 |
1168.359985 |
1178.319946 |
1160.550049 |
1176.760010 |
1176.760010 |
2005200 |
2017-12-27 |
1179.910034 |
1187.290039 |
1175.609985 |
1182.260010 |
1182.260010 |
1867200 |
2017-12-28 |
1189.000000 |
1190.099976 |
1184.380005 |
1186.099976 |
1186.099976 |
1841700 |
2017-12-29 |
1182.349976 |
1184.000000 |
1167.500000 |
1169.469971 |
1169.469971 |
2688400 |
# 计算涨跌幅
AmazonChange=change(AmazonDf['Close'])
'''增加一列累计增长百分比'''
#一开始的股价
Close1=AmazonDf['Close'][0]
# # .apply(lambda x: format(x, '.2%'))
AmazonDf['sum_pct_change']=AmazonDf['Close'].apply(lambda x: (x-Close1)/Close1)
AmazonDf['sum_pct_change'].tail()
Date
2017-12-22 0.550228
2017-12-26 0.561373
2017-12-27 0.568671
2017-12-28 0.573766
2017-12-29 0.551700
Name: sum_pct_change, dtype: float64
Facebook
# 读取数据
FacebookDf=pd.read_excel(r'C:\Users\EDY\Desktop\吧哩吧啦\学习\Untitled Folder\Facebook2017年股票数据.xlsx',index_col='Date')
FacebookDf.tail()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
Date |
|
|
|
|
|
|
2017-12-22 |
177.139999 |
177.529999 |
176.229996 |
177.199997 |
177.199997 |
8509500 |
2017-12-26 |
176.630005 |
177.000000 |
174.669998 |
175.990005 |
175.990005 |
8897300 |
2017-12-27 |
176.550003 |
178.440002 |
176.259995 |
177.619995 |
177.619995 |
9496100 |
2017-12-28 |
177.949997 |
178.940002 |
177.679993 |
177.919998 |
177.919998 |
12220800 |
2017-12-29 |
178.000000 |
178.850006 |
176.460007 |
176.460007 |
176.460007 |
10261500 |
# 计算涨跌幅
FacebookChange=change(FacebookDf['Close'])
'''增加一列每日增长百分比'''
# .pct_change()返回变化百分比,第一行因没有可对比的,返回Nan,填充为0
FacebookDf['pct_change']=FacebookDf['Close'].pct_change(1).fillna(0)
FacebookDf['pct_change'].head()
Date
2017-01-03 0.000000
2017-01-04 0.015660
2017-01-05 0.016682
2017-01-06 0.022707
2017-01-09 0.012074
Name: pct_change, dtype: float64
'''增加一列累计增长百分比'''
#一开始的股价
Close1=FacebookDf['Close'][0]
# .apply(lambda x: format(x, '.2%'))
FacebookDf['sum_pct_change']=FacebookDf['Close'].apply(lambda x: (x-Close1)/Close1)
FacebookDf['sum_pct_change'].tail()
Date
2017-12-22 0.516344
2017-12-26 0.505990
2017-12-27 0.519938
2017-12-28 0.522506
2017-12-29 0.510012
Name: sum_pct_change, dtype: float64
数据可视化
import matplotlib.pyplot as plt
# 查看成交量与股价之间的关系
fig=plt.figure(figsize=(10,5))
AliDf.plot(x='Volume',y='Close',kind='scatter')
plt.xlabel('成交量')
plt.ylabel('股价')
plt.title('成交量与股价之间的关系')
plt.show()
<Figure size 720x360 with 0 Axes>
# 查看各个参数之间的相关性,与股价与成交量之间呈中度相关
AliDf.corr()
|
Open |
High |
Low |
Close |
Adj Close |
Volume |
sum_pct_change |
Open |
1.000000 |
0.999281 |
0.998798 |
0.998226 |
0.998226 |
0.424686 |
0.998226 |
High |
0.999281 |
1.000000 |
0.998782 |
0.999077 |
0.999077 |
0.432467 |
0.999077 |
Low |
0.998798 |
0.998782 |
1.000000 |
0.999249 |
0.999249 |
0.401456 |
0.999249 |
Close |
0.998226 |
0.999077 |
0.999249 |
1.000000 |
1.000000 |
0.415801 |
1.000000 |
Adj Close |
0.998226 |
0.999077 |
0.999249 |
1.000000 |
1.000000 |
0.415801 |
1.000000 |
Volume |
0.424686 |
0.432467 |
0.401456 |
0.415801 |
0.415801 |
1.000000 |
0.415801 |
sum_pct_change |
0.998226 |
0.999077 |
0.999249 |
1.000000 |
1.000000 |
0.415801 |
1.000000 |
查看各个公司的股价平均值
'''数据准备'''
# 计算每家公司的收盘价平均值
Close_mean={'Alibaba':AliDf['Close'].mean(),
'Google':GoogleDf['Close'].mean(),
'Apple':AppleDf['Close'].mean(),
'Tencent':TencentDf['Close'].mean(),
'Amazon':AmazonDf['Close'].mean(),
'Facebook':FacebookDf['Close'].mean()}
CloseMeanSer=pd.Series(Close_mean)
CloseMeanSer.sort_values(ascending=False,inplace=True)
'''绘制柱状图'''
# 创建画板
fig=plt.figure(figsize=(10,5))
# 绘图
CloseMeanSer.plot(kind='bar')
# 设置x、y轴标签及标题
plt.xlabel('公司')
plt.ylabel('股价平均值(美元)')
plt.title('2017年各公司股价平均值')
# 设置y周标签刻度
plt.yticks(np.arange(0,1100,100))
# 显示y轴网格
plt.grid(True,axis='y')
# 显示图像
plt.show()
亚马逊和谷歌的平均股价很高,远远超过其他4家,但是仅看平均值并不能代表什么,下面从分布和走势方面查看
查看各公司股价分布情况
'''数据准备'''
# 将6家公司的收盘价整合到一起
CloseCollectDf=pd.concat([AliDf['Close'],
GoogleDf['Close'],
AppleDf['Close'],
TencentDf['Close'],
AmazonDf['Close'],
FacebookDf['Close']],axis=1)
CloseCollectDf.columns=['Alibaba','Google','Apple','Tencent','Amazon','Facebook']
'''绘制箱型图'''
# 创建画板
fig=plt.figure(figsize=(20,10))
fig.suptitle('2017年各公司股价分布',fontsize=18)
# 子图1
ax1=plt.subplot(121)
CloseCollectDf.plot(ax=ax1,kind='box')
plt.xlabel('公司')
plt.ylabel('股价(美元)')
plt.title('2017年各公司股价分布')
plt.grid(True,axis='y')
# 因谷歌和亚马逊和两外四家的差别较大,分开查看,
# 子图2
ax2=plt.subplot(222)
CloseCollectDf[['Google','Amazon']].plot(ax=ax2,kind='box')
# 设置x、y轴标签及标题
plt.ylabel('股价(美元)')
plt.title('2017年谷歌和亚马逊股价分布')
# 设置y周标签刻度
# plt.yticks(np.arange(0,1300,100))
# 显示y轴网格
plt.grid(True,axis='y')
# 子图3
ax3=plt.subplot(224)
CloseCollectDf[['Alibaba','Apple','Tencent','Facebook']].plot(ax=ax3,kind='box')
# 设置x、y轴标签及标题
plt.xlabel('公司')
plt.ylabel('股价(美元)')
plt.title('2017年阿里、苹果、腾讯、Facebook股价分布')
# 设置y周标签刻度
# plt.yticks(np.arange(0,1300,100))
# 显示y轴网格
plt.grid(True,axis='y')
plt.subplot
# 显示图像
plt.show()
从箱型图看,谷歌和亚马逊的股价分布较广,且中位数偏上,腾讯股价最为集中,波动最小,相对稳定。
股价走势对比
# 创建画板并设置大小,constrained_layout=True设置自动调整子图之间间距
fig=plt.figure(figsize=(15,10),constrained_layout=True)
# ax=plt.subplots(2,1,sharex=True)
fig.suptitle('股价走势对比',fontsize=18)
'''绘制图像1 '''
ax1=plt.subplot(211)
plt.plot(AliDf.index,AliDf['Close'],label='Alibaba')
plt.plot(GoogleDf.index,GoogleDf['Close'],label='Google')
plt.plot(AppleDf.index,AppleDf['Close'],label='Apple')
plt.plot(TencentDf.index,TencentDf['Close'],label='Tencent')
plt.plot(AmazonDf.index,AmazonDf['Close'],label='Amazon')
plt.plot(FacebookDf.index,FacebookDf['Close'],label='Facebook')
# # 设置xy轴标签
plt.xlabel('时间')
plt.ylabel('股价')
# 设置标题
# plt.title('股价走势对比')
# 图例显示位置、大小
plt.legend(loc='upper left',fontsize=12)
# 设置x,y轴间隔,设置旋转角度,以免重叠
plt.xticks(AliDf.index[::10],rotation=45)
plt.yticks(np.arange(0, 1300, step=100))
# 显示网格
plt.grid(True)
'''绘制图像2'''
ax2=plt.subplot(212)
plt.plot(AliDf.index,AliDf['sum_pct_change'],label='Alibaba')
plt.plot(GoogleDf.index,GoogleDf['sum_pct_change'],label='Google')
plt.plot(AppleDf.index,AppleDf['sum_pct_change'],label='Apple')
plt.plot(TencentDf.index,TencentDf['sum_pct_change'],label='Tencent')
plt.plot(AmazonDf.index,AmazonDf['sum_pct_change'],label='Amazon')
plt.plot(FacebookDf.index,FacebookDf['sum_pct_change'],label='Facebook')
# 设置xy轴标签
plt.xlabel('时间')
plt.ylabel('累计增长率')
# 设置标题
# plt.title('股价走势对比')
# 图例显示位置、大小
plt.legend(loc='upper left',fontsize=12)
# 设置x,y轴间隔,设置旋转角度,以免重叠
plt.xticks(AliDf.index[::10],rotation=45)
plt.yticks(np.arange(0, 1.2, step=0.1))
# 显示网格
plt.grid(True)
# 调整子图间距,subplots_adjust(left=None, bottom=None, right=None, top=None,wspace=None, hspace=None)
# 显示图像
plt.show()
可以看出,在2017年间,亚马逊和谷歌的股价虽然偏高,涨幅却不如阿里巴巴和腾讯。
总结
观察以上图形,可以得出一下结果:
1、2017年谷歌和亚马逊股价偏高,波动较大,但其涨幅并不高;
2、2017年阿里巴巴和腾讯的股价平均值相对较小,股价波动比较小,其涨幅却很高,分别达到了94.62%和114.36%。