import urllib, urllib2, csv
from bs4 import BeautifulSoup
from BeautifulSoup import BeautifulSoup
from urllib2 import Request, urlopen, URLError, HTTPError
import sys, string, codecs, time, re
from httplib import BadStatusLine, IncompleteRead
#CSVでリストを読み込む。
f = open('url_list_ss.csv', 'rb')
dataReader = csv.reader(f)
#結果の出力用のリストを作る。
data01 =[]
data02 =[]
data03 =[]
data04 =[]
data05 =[]
for row in dataReader:
for url in row:
try:
res = urllib2.urlopen(url)
soup = BeautifulSoup(res.read(),fromEncoding="utf-8")
time.sleep(5.0) #sleep(秒指定)
for title in soup.findAll("div",{"class":"small-10 columns"}):
for views in soup.findAll("div",{"class":"small-2 columns text-right format-views"}):
for times in soup.findAll("time",{"itemprop":"datePublished"}):
data01.append(url)
data02.append(''.join(title.findAll(text=True)))
data03.append(''.join(views.findAll(text=True)))
data04.append(''.join(times.findAll(text=True)))
data05.append("contentslist")
#データ結合
data = zip(data01,data02,data03,data04,data05)
#CSV出力
f= open('ss_extract_result.csv','w')
writecsv = csv.writer(f,lineterminator='\n')
for x in range(len(data)):
writecsv.writerows([data[x]])
except HTTPError, e:
print e.code
except BadStatusLine:
print "could not fetch"
except IncompleteRead:
print "IncompleteRead"
except IndexError:
print "IndexError"