goodinfo
#! /usr/bin/python
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import csv
import sys
import pandas as pd
# python2
reload(sys)
sys.setdefaultencoding('utf-8')
def GetHtmlcode(ID):
source = 'http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID='
url = source + ID
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
res = requests.get('http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID=2103', headers=headers)
res.encoding = 'utf-8'
#print res.text
soup = BeautifulSoup(res.text.replace(' ', '').replace(' ',''), 'lxml')
return soup
def main():
soup = GetHtmlcode('2324')
for head in soup.select('.head_td b'):
if head.text == u'合併獲利狀況':
df = pd.read_html(str(head.parent.parent.parent.parent), encoding = "UTF-8")
df2=pd.DataFrame(df[0])
df2 = df2[2:] ## 只要第二列之後
df2.columns = df2.iloc[0] # 把第一列當作 columns 名稱
df2 = df2.drop(df2.index[0]) # 刪除第一列
df2.columns.name = ''
df2.index = range(len(df2)) # 重新產生 index
print df2
if __name__ == "__main__":
main()
#! /usr/bin/python
# -*- coding: utf-8 -*-
import re
import urllib2
import csv
import pandas as pd
def GetHtmlcode(ID):
# Get the webpage's source html code
source = 'http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID='
url = source + ID
#print url
# Header
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset' : 'Big5,utf-8;q=0.7,*;q=0.3',
#'Accept-Encoding' : 'gzip,deflate,sdch',
'Accept-Language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4,ja;q=0.2' ,
'Cache-Control' : 'max-age=0',
'Connection' : 'keep-alive',
'Cookie' : '427 bytes were stripped',
'Host' : 'www.goodinfo.tw',
'Referer' : url }
# 連到網頁抓取資料
req= urllib2.Request(url,"",headers)
response = urllib2.urlopen(req)
result = response.read().decode('utf-8')
#print result
return result
def main():
page = GetHtmlcode('2103')
df=pd.read_html(page)
df2=pd.DataFrame(df[41])
df2 = df2[2:] ## 只要第二列之後
df2.columns = df2.iloc[0] # 把第一列當作 columns 名稱
df2 = df2.drop(df2.index[0]) # 刪除第一列
df2.columns.name = ''
df2.index = range(len(df2)) # 重新產生 index
print df2
if __name__ == "__main__":
main()
#! /usr/bin/python
# -*- coding: utf-8 -*-
import re
import urllib2
import csv
import pandas as pd
def GetHtmlcode(ID):
# Get the webpage's source html code
source = 'http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID='
url = source + ID
#print url
# Header
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset' : 'Big5,utf-8;q=0.7,*;q=0.3',
#'Accept-Encoding' : 'gzip,deflate,sdch',
'Accept-Language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4,ja;q=0.2' ,
'Cache-Control' : 'max-age=0',
'Connection' : 'keep-alive',
'Cookie' : '427 bytes were stripped',
'Host' : 'www.goodinfo.tw',
'Referer' : url }
# 連到網頁抓取資料
req= urllib2.Request(url,"",headers)
response = urllib2.urlopen(req)
result = response.read().decode('utf-8')
#print result
return result
def main():
page = GetHtmlcode('2103')
df=pd.read_html(page)
for i in range(len(df)):
print type(df[i].loc[0,0])
# 欄位內容比對
if df[i].loc[0,0] == u"獲 利 狀 況 (/)":
print i
if __name__ == "__main__":
main()