import numpy as np
import requests
import pandas as pd
import datetime
import csv, json, sys
import matplotlib.pyplot as plt
def transform_date(date):
y, m, d = date.split('/')
return str(int(y)+1911) + '/' + m + '/' + d #民國轉西元
def transform_data(data):
data[0] = datetime.datetime.strptime(transform_date(data[0]), '%Y/%m/%d')
data[1] = int(data[1].replace(',', '')) #把千進位的逗點去除
data[2] = int(data[2].replace(',', ''))
data[3] = float(data[3].replace(',', ''))
data[4] = float(data[4].replace(',', ''))
data[5] = float(data[5].replace(',', ''))
data[6] = float(data[6].replace(',', ''))
data[7] = float(0.0 if data[7].replace(',', '') == 'X0.00' else data[7].replace(',', '')) # +/-/X表示漲/跌/不比價
data[8] = int(data[8].replace(',', ''))
#print(data)
return data
def transform(data):
return [transform_data(d) for d in data]
def save_data_file(path, stock_date,stock_id):
#http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=20190901&stockNo=1314
url = 'http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=%s&stockNo=%s' % ( stock_date, stock_id)
r = requests.get(url)
jdata = r.json()
filename = '%s%s_%s.json' % (path, stock_id, stock_date)
with open(filename, 'w') as json_file:
json.dump(jdata, json_file)
return jdata
def get_data_file(path, stock_date,stock_id):
filename = '%s%s_%s.json' % (path, stock_id, stock_date)
with open(filename) as f:
jdata = json.load(f)
#print(transform(jdata['data']))
#print(jdata['data'])
return (transform(jdata['data']))
def get_data_result(data,stock_no):
s = pd.DataFrame(data)
s.columns = ['date', 'shares', 'amount', 'open', 'high', 'low', 'close', 'change', 'turnover']
#"日期","成交股數","成交金額","開盤價","最高價","最低價","收盤價","漲跌價差","成交筆數"
stock = []
for i in range(len(s)):
stock.append(stock_no)
s['stockno'] = pd.Series(stock ,index=s.index) #新增股票代碼欄,之後所有股票進入資料表才能知道是哪一張股票
datelist = []
for i in range(len(s)):
datelist.append(s['date'][i])
s.index = datelist #索引值改成日期
s2 = s.drop(['date'],axis = 1) #刪除日期欄位
mlist = []
for item in s2.index:
mlist.append(item.month)
s2['month'] = mlist #新增月份欄位
return s2
def process_stock():
path = 'json/'
date = ['20190901']
listID = ['2330']
for i in range(len(listID)):
for month in range(len(date)):
#result = save_data_file(path, date[month], listID[i])
result = get_data_file(path, date[month], listID[i])
result = get_data_result(result, listID[i])
print(result)
#print(result.groupby('month').close.count()) #每個月幾個營業日
#print(result.groupby('month').shares.sum()) #每個月累計成交股數
dfTotal = result['amount']
#print('mean = %s' % (dfTotal.mean()) )
#print('std = %s' % (dfTotal.std()) )
#print('max = %s' % (dfTotal.max()) )
#print('min = %s' % (dfTotal.min()) )
#print('median = %s' % (dfTotal.median()) )
print('%s' % (dfTotal.describe()) )
dfTotal = result['close']
#print('mean = %s' % (dfTotal.mean()) )
#print('std = %s' % (dfTotal.std()) )
#print('max = %s' % (dfTotal.max()) )
#print('min = %s' % (dfTotal.min()) )
#print('median = %s' % (dfTotal.median()) )
print('%s' % (dfTotal.describe()) )
process_stock()
def process_stock():
df = pd.read_csv(r'./AQI.csv')
dfTotal = df['AQI']
print('mean = %s' % (dfTotal.mean()) )
print('std = %s' % (dfTotal.std()) )
print('max = %s' % (dfTotal.max()) )
print('min = %s' % (dfTotal.min()) )
print('median = %s' % (dfTotal.median()) )