自动下载google reader里面的星标文章.

1. google reader马上就要关闭了,最后一次看看俺的浏览记录吧

2. 对于以前订阅的资料和星标文章,可以通过takeout功能把链接下载下来,加到别的rss阅读器里面(俺用的是greatnews客户端阅读器)

对于星标的文章,按自己写了个py script把链接的内容也一起下载下来

#E:\soft\Python27\down_googlereader_starred.py

import urllib,os,sys
import json,string

def getUrlContent(url):
    fp =urllib.urlopen(url)
    cont =fp.read()
    fp.close()
    return cont
    
def ToFname(title):
    rep_chars ='\n;:%&^|\\/?<>'
    s= title
    for i in rep_chars:
        s = s.replace('%c'%(i),'')
    return s
    
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    return ''.join(c for c in title if c in valid_chars)

def GetCfg_latest_id():
    fname ='cfg.txt'
    latest_id =0
    if os.path.isfile(fname):
        fp =open(fname,'r')
        latest_id =int(fp.readline().strip())
        fp.close()
    return latest_id
    
def logFile(fname, cont):
    #if os.path.isfile(fname):
    #    print fname, 'esist!'
    
    fp=open(fname,'w')
    fp.write(cont)
    fp.close()
    
def SetCfg_latest_id(id):
    fname ='cfg.txt'
    logFile(fname, '%d'%(id))
    
cont =open(r'F:\sw\<foldername>-takeout\Reader\starred.json','r').read()
li =json.loads(cont)

latest_id =GetCfg_latest_id()
for item in li['items'][latest_id:]:
    url =item['alternate'][0]['href']
    title =item['title']
    print title
    fname ='%.3d.%s.htm'%(latest_id, ToFname(title))
    print fname
    isExcept =0
    try:
        urlcont =getUrlContent(url)
        open(fname,'w').write(urlcont)
    except Exception, e:
        print e
        isExcept =1
        urlcont ='%s\n<br>\n%s<br>\n%s'%(url,str(e),fname)
        urlcont= urlcont.encode('gb2312')
        fname ='%.3d.Except.htm'%(latest_id)
        open(fname,'w').write(urlcont)
    
    latest_id =latest_id+1
    SetCfg_latest_id(latest_id)
View Code

 

Powered by Jekyll and Theme by solid

本站总访问量