Google 様の Blogger ですが XML から import しても、img.src とか a.href とか取り込んでくれません。どれが post された image か判別できないんでまあそんなもんと言えばそんなもん。Python 2.7 では動かないかも (mimetypes の返す値が違う)。
#!/usr/bin/env python #-*- coding:utf-8 -*- import sys, os, re, urllib2, urlparse, mimetypes #http://code.google.com/p/gdata-python-client/ import atom import gdata.photos.service class BloggerImportHelper(object): def __init__(self): self.username = self.password = '' self.handleUriPrefixes = [] def __call__(self, xmlcontent): feed = atom.FeedFromString(xmlcontent) self.pws = gdata.photos.service.PhotosService() self.pws.ClientLogin(self.username, self.password) for album in self.pws.GetUserFeed().entry: if album.title.text == feed.title.text: self.album = album break else: self.album = self.pws.InsertAlbum(feed.title.text, '') self.cache = {} for entry in feed.entry: if entry.content.text: entry.content.text = self.processContent(entry.content.text) return feed.ToString() def processContent(self, content): return re.sub( 'https?://[^"\']+', lambda m: self.processUri(m.group(0)), content) def processUri(self, uri): if uri in self.cache: return self.cache[uri] for prefix in self.handleUriPrefixes: if uri.startswith(prefix): break else: return uri basename = urlparse.urlparse(uri).path.rsplit('/', 1)[1] try: photo = self.pws.InsertPhotoSimple( self.album.GetPhotosUri(), basename, '', urllib2.urlopen(uri), mimetypes.guess_type('mime.' + basename.rsplit('.', 1)[1])[0], ) except Exception as e: #TODO: check retry condition print uri, e return self.processUri(uri) self.cache[uri] = photo.media.content[0].url return self.cache[uri] if __name__ == '__main__': converter = BloggerImportHelper() # Picasa username and password converter.username = 'you@gmail.com' converter.password = 'password' # URI prefix list to upload converter.handleUriPrefixes = [ 'http://localhost/images/', ] # sys.argv[1] is XML file (Blogger atom feed) i = open(sys.argv[1], 'rb') o = open('test.atom', 'wb') o.write(converter(i.read()))