Marian Moldovan
Uploaded December 16, 2014

Automatically analyzing buildings and architectural styles

Inspired on hack-night at PAPIs.io @ Barcelona. By Marian Moldovan & Enrique Otero, from Beeva.com

You will need to install Pillow and scipy for this notebook.

Get Image dataset

Using Google images query searching for "Barcelona buildings" ~ 100 images

In [1]:
import json
import os
import time
import requests
from PIL import Image
from StringIO import StringIO
from requests.exceptions import ConnectionError
from scipy.misc import imread
 
size = 256,256

def go(query, path):
  BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
             'v=1.0&q=' + query + '&start=%d'
  
  if not os.path.exists(path):
    os.makedirs(path)
 
  start = 0
  while start < 60:
    r = requests.get(BASE_URL % start)
    if json.loads(r.text)['responseData']['results'] is not None:
      for image_info in json.loads(r.text)['responseData']['results']:
        url = image_info['unescapedUrl']
        try:
          image_r = requests.get(url)
        except ConnectionError, e:
          print 'could not download %s' % url
          continue
        title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
        file = open(os.path.join(path, '%s.jpg') % title, 'w')
        try:
          img = Image.open(StringIO(image_r.content))
          img.thumbnail(size, Image.ANTIALIAS)
          Image.open(StringIO(image_r.content)).save(file, 'JPEG')
          imread(file.name)
        except IOError, e:
          os.remove(file.name)
          continue
        finally:
          file.close()
      start += 4

def clean(path):
  i = 0
  for f in os.listdir(path):
    os.rename(path + '/' + f, path + '/' + str(i) + '.jpg')
    i += 1

 
go('barcelona building', 'images')
go('barcelona buildings', 'images')

clean('images')

Load the images into an SFrame

In [2]:
import graphlab as gl
In [3]:
images = gl.image_analysis.load_images('images', random_order=False, with_path=True)
In [4]:
images_resized = gl.SFrame()
images_resized['image'] = gl.image_analysis.resize(images['image'], 256, 256, 3)
images_resized = images_resized.add_row_number()

Extract image features with deeplearning

In [5]:
pretrained_model = gl.load_model('https://static.turi.com/models/imagenet_model_iter45')
PROGRESS: Downloading https://static.turi.com/models/imagenet_model_iter45/dir_archive.ini to /var/tmp/graphlab-roman/79863/78461131-9792-4b89-9561-d764b4b2f384.ini
PROGRESS: Downloading https://static.turi.com/models/imagenet_model_iter45/objects.bin to /var/tmp/graphlab-roman/79863/11dee552-eaf7-4361-b85a-6e8770a0ff3e.bin
In [6]:
images_resized['extracted_features'] = pretrained_model.extract_features(images_resized)
In [7]:
images_resized.show()
Canvas is accessible via web browser at the URL: http://localhost:56661/index.html
Opening Canvas in default web browser.


Example 1: clusters

  1. Get k=2 nearest neighbors
  2. Get similarity graph
  3. Plot
In [8]:
model = gl.nearest_neighbors.create(images_resized, features=['extracted_features'], label = 'id', distance='euclidean')
PROGRESS: Starting brute force nearest neighbors model training.
In [9]:
sf_nn = model.query(images_resized, k=2)
sf_nn = sf_nn[sf_nn['distance'] > 0]
sg_similarities = gl.SGraph().add_edges(sf_nn, src_field='query_label', dst_field='reference_label')
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 157          | 6123    | 24.8408     | 64.123ms     |
PROGRESS: | Done         | 24649   | 100         | 93.323ms     |
PROGRESS: +--------------+---------+-------------+--------------+
In [10]:
sp = gl.shortest_path.create(sg_similarities, weight_field='distance', source_vid=16)
sp_graph = sp['graph']
sp_graph.show(vlabel='id', highlight=[16,23])
PROGRESS: +----------------------------+
PROGRESS: | Number of vertices updated |
PROGRESS: +----------------------------+
PROGRESS: | 1                          |
PROGRESS: | 0                          |
PROGRESS: +----------------------------+
Canvas is updated and available in a tab in the default browser.

In [11]:
subgraph = sp_graph.get_neighborhood(ids=[16], radius=3, full_subgraph=True)
cluster = subgraph.get_vertices()
print cluster
images_resized['path'] = images['path']       
+------+---------------+
| __id |    distance   |
+------+---------------+
|  16  |      0.0      |
|  97  | 56.3197101266 |
|  94  |     1e+30     |
+------+---------------+
[3 rows x 2 columns]


In [12]:
from IPython.display import display
from IPython.display import Image
def show_graphlab_image(i):
    img = Image(filename=images_resized['path'][i], width=100, height=100)
    display(img)
def show_images(my_images, field):
    for x in my_images:
        x = x[field]
        print "id " + str(x) + " -> " + images_resized['path'][x] + ":"
        show_graphlab_image(x)
show_images(cluster, '__id') 
id 16 -> /Users/roman/repos/turi.com/src/learn/gallery/notebooks/images/113.jpg:

id 97 -> /Users/roman/repos/turi.com/src/learn/gallery/notebooks/images/187.jpg:

id 94 -> /Users/roman/repos/turi.com/src/learn/gallery/notebooks/images/184.jpg:

In [13]:
show_images(sp_graph.get_neighborhood(ids=[23], radius=3, full_subgraph=True).get_vertices(), '__id')
id 155 -> /Users/roman/repos/turi.com/src/learn/gallery/notebooks/images/98.jpg:

id 113 -> /Users/roman/repos/turi.com/src/learn/gallery/notebooks/images/34.jpg: