Skip to content

Commit

Permalink
collect_samples.pyの更新
Browse files Browse the repository at this point in the history
* ダウンロードの進捗状況を表示
* imgをcloseしないように
  • Loading branch information
garaemon committed Nov 2, 2014
1 parent 7697e98 commit 1c89be9
Showing 1 changed file with 18 additions and 10 deletions.
28 changes: 18 additions & 10 deletions collect_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ def getUrls( word, key, skip=0, urls=[] ):
if skip:
params.update( { '$skip': str( skip ) } )

results = requests.get( prefix, auth=( key, key ), params=params )
results = results.json

results = requests.get( prefix, auth=( key, key ), params=params)
results = results.json()
for result in results['d']['results']:
typ = result[ 'ContentType' ]
if typ== 'image/jpg' or typ == 'image/jpeg':
Expand All @@ -35,15 +34,25 @@ def getUrls( word, key, skip=0, urls=[] ):
return urls

def saveImages( urls, dir ):
counter = 0
for url in urls:
try:
img = requests.get( url ).content
f = open( os.path.join( dir, os.path.basename( url ) ), 'wb' )
f.write( img )
img.close()
f.close()
except:
counter = counter + 1
print "writing [%d/%d]: %s" % (counter, len(urls), url)
fname = os.path.join( dir, os.path.basename( url ) )
if not os.path.exists(fname):
img = requests.get(url, timeout=5).content
f = open(fname , 'wb' )
f.write( img )
img.close()
f.close()
except Exception, e:
print "failed to get "
print url
print e.message
pass
except requests.exceptions.ReadTimeout:
print "timeout"

if __name__ == '__main__':
word = settings.word
Expand All @@ -52,4 +61,3 @@ def saveImages( urls, dir ):

urls = getUrls( word, key )
saveImages( urls, dir )

0 comments on commit 1c89be9

Please sign in to comment.