カテゴリー
Uncategorized

Blogger: import from XML with images

Google 様の Blogger ですが XML から import しても、img.src とか a.href とか取り込んでくれません。どれが post された image か判別できないんでまあそんなもんと言えばそんなもん。Python 2.7 では動かないかも (mimetypes の返す値が違う)。

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import sys, os, re, urllib2, urlparse, mimetypes
#http://code.google.com/p/gdata-python-client/
import atom
import gdata.photos.service
class BloggerImportHelper(object):
def __init__(self):
self.username = self.password = ''
self.handleUriPrefixes = []
def __call__(self, xmlcontent):
feed = atom.FeedFromString(xmlcontent)
self.pws = gdata.photos.service.PhotosService()
self.pws.ClientLogin(self.username, self.password)
for album in self.pws.GetUserFeed().entry:
if album.title.text == feed.title.text:
self.album = album
break
else:
self.album = self.pws.InsertAlbum(feed.title.text, '')
self.cache = {}
for entry in feed.entry:
if entry.content.text:
entry.content.text = self.processContent(entry.content.text)
return feed.ToString()
def processContent(self, content):
return re.sub(
'https?://[^"\']+', lambda m: self.processUri(m.group(0)), content)
def processUri(self, uri):
if uri in self.cache:
return self.cache[uri]
for prefix in self.handleUriPrefixes:
if uri.startswith(prefix):
break
else:
return uri
basename = urlparse.urlparse(uri).path.rsplit('/', 1)[1]
try:
photo = self.pws.InsertPhotoSimple(
self.album.GetPhotosUri(),
basename,
'',
urllib2.urlopen(uri),
mimetypes.guess_type('mime.' + basename.rsplit('.', 1)[1])[0],
)
except Exception as e:
#TODO: check retry condition
print uri, e
return self.processUri(uri)
self.cache[uri] = photo.media.content[0].url
return self.cache[uri]
if __name__ == '__main__':
converter = BloggerImportHelper()
# Picasa username and password
converter.username = 'you@gmail.com'
converter.password = 'password'
# URI prefix list to upload
converter.handleUriPrefixes = [
'http://localhost/images/',
]
# sys.argv[1] is XML file (Blogger atom feed)
i = open(sys.argv[1], 'rb')
o = open('test.atom', 'wb')
o.write(converter(i.read()))

コメントを残す

メールアドレスが公開されることはありません。 * が付いている欄は必須項目です