2019年9月19日 星期四

pandas查看數據值列的彙總統計



import numpy as np
import requests
import pandas as pd
import datetime

import csv, json, sys
import matplotlib.pyplot as plt

def transform_date(date):
        y, m, d = date.split('/')
        return str(int(y)+1911) + '/' + m  + '/' + d  #民國轉西元
   
def transform_data(data):
    data[0] = datetime.datetime.strptime(transform_date(data[0]), '%Y/%m/%d')
    data[1] = int(data[1].replace(',', ''))  #把千進位的逗點去除
    data[2] = int(data[2].replace(',', ''))
    data[3] = float(data[3].replace(',', ''))
    data[4] = float(data[4].replace(',', ''))
    data[5] = float(data[5].replace(',', ''))
    data[6] = float(data[6].replace(',', ''))
    data[7] = float(0.0 if data[7].replace(',', '') == 'X0.00' else data[7].replace(',', ''))  # +/-/X表示漲/跌/不比價
    data[8] = int(data[8].replace(',', ''))
    #print(data)
    return data

def transform(data):
    return [transform_data(d) for d in data]



       


def save_data_file(path, stock_date,stock_id):
    #http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=20190901&stockNo=1314
    url = 'http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=%s&stockNo=%s' % ( stock_date, stock_id)
    r = requests.get(url)
    jdata = r.json()

    filename = '%s%s_%s.json' % (path, stock_id, stock_date)
    with open(filename, 'w') as json_file:
      json.dump(jdata, json_file)
    return jdata
   
def get_data_file(path, stock_date,stock_id):
    filename = '%s%s_%s.json' % (path, stock_id, stock_date)
    with open(filename) as f:
      jdata = json.load(f)
    #print(transform(jdata['data']))
    #print(jdata['data'])
    return (transform(jdata['data']))

def get_data_result(data,stock_no):
    s = pd.DataFrame(data)
    s.columns = ['date', 'shares', 'amount', 'open', 'high', 'low', 'close', 'change', 'turnover']
                                                                                                        #"日期","成交股數","成交金額","開盤價","最高價","最低價","收盤價","漲跌價差","成交筆數"
    stock = []
    for i in range(len(s)):
        stock.append(stock_no)
    s['stockno'] = pd.Series(stock ,index=s.index)                                                          #新增股票代碼欄,之後所有股票進入資料表才能知道是哪一張股票
    datelist = []
    for i in range(len(s)):
        datelist.append(s['date'][i])
    s.index = datelist  #索引值改成日期
    s2 = s.drop(['date'],axis = 1)  #刪除日期欄位
    mlist = []
    for item in s2.index:
        mlist.append(item.month)
    s2['month'] = mlist  #新增月份欄位
   
   
    return s2

def process_stock():
    path = 'json/'
    date = ['20190901']
    listID = ['2330']

    for i in range(len(listID)):
        for month in range(len(date)):
            #result = save_data_file(path, date[month], listID[i])
            result = get_data_file(path, date[month], listID[i])
            result = get_data_result(result, listID[i])
            print(result)
            #print(result.groupby('month').close.count())  #每個月幾個營業日
            #print(result.groupby('month').shares.sum())  #每個月累計成交股數
           
            dfTotal = result['amount']

            #print('mean = %s' % (dfTotal.mean()) )
            #print('std = %s' % (dfTotal.std()) )
            #print('max = %s' % (dfTotal.max()) )
            #print('min = %s' % (dfTotal.min()) )
            #print('median = %s' % (dfTotal.median()) )
            print('%s' % (dfTotal.describe()) )
           
            dfTotal = result['close']
            #print('mean = %s' % (dfTotal.mean()) )
            #print('std = %s' % (dfTotal.std()) )
            #print('max = %s' % (dfTotal.max()) )
            #print('min = %s' % (dfTotal.min()) )
            #print('median = %s' % (dfTotal.median()) )
            print('%s' % (dfTotal.describe()) )

process_stock()

def process_stock():
    df = pd.read_csv(r'./AQI.csv')
    dfTotal = df['AQI']
    print('mean = %s' % (dfTotal.mean()) )
    print('std = %s' % (dfTotal.std()) )
    print('max = %s' % (dfTotal.max()) )
    print('min = %s' % (dfTotal.min()) )
    print('median = %s' % (dfTotal.median()) )

沒有留言: