This Jupyter notebook contains analysis used to develop our guidance on findng the local authority for an address.
The software is open source, and published under an MIT licence. The data in the spatialite database used for this analysis was built using OS AddressBase, combined with open data sources used under the licenses identified in the README and Makefile.
# load spatialte database
import os
import sqlite3
conn = sqlite3.connect("addresses.db")
conn.enable_load_extension(True)
conn.load_extension(os.environ.get("SPATIALITE_EXTENSION", "/usr/lib/x86_64-linux-gnu/mod_spatialite.so"))
c = conn.cursor()
# number of UPRNs
c.execute("SELECT count(*) FROM uprn")
uprn_total = c.fetchone()[0]
print(f"{uprn_total:,}")
# number of postcodes
c.execute("SELECT count(*) FROM postcode")
postcode_total = c.fetchone()[0]
print(f"{postcode_total:,}")
# postcodes with the most number of UPRNs ..
c.execute("""
SELECT postcode, count(*)
FROM uprn
GROUP BY postcode
ORDER BY count(*) DESC
LIMIT 50
""")
c.fetchall()
# number of UPRNs where ONSUD and ONSPD differ
c.execute("""
SELECT
COUNT(*)
FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE uprn.onsud != postcode.onspd
""")
onsud_onspd_differ = c.fetchone()[0]
print(f"{onsud_onspd_differ:,}")
# percentage of GB UPRNs where ONSUD and ONSPD differ
print("%2.1f%%" % (onsud_onspd_differ / uprn_total * 100))
# number of UPRNs in an English ONSPD postcode
c.execute("""
SELECT
COUNT(*)
FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE substr(onsud, 1, 1) == 'E'
""")
england_uprn_total = c.fetchone()[0]
print(f"{england_uprn_total:,}")
# number of UPRNs in an English ONSPD postcode where ONSUD and ONSPD differ
c.execute("""
SELECT
COUNT(*)
FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE substr(uprn.onsud, 1, 1) == 'E'
AND uprn.onsud != postcode.onspd
""")
england_onsud_onspd_differ = c.fetchone()[0]
print(f"{england_onsud_onspd_differ:,}")
# percentage of English UPRNs where ONSUD and ONSPD differ
print("%2.1f%%" % (england_onsud_onspd_differ / england_uprn_total * 100))
%config InlineBackend.figure_formats = ['svg']
import matplotlib.pyplot as plt
%matplotlib inline
from numpy import array
# postcodes with UPRNs which are attributed to different LAs by ONSUD
c.execute("""
SELECT COUNT(DISTINCT onsud) count, postcode
FROM uprn
GROUP BY postcode
ORDER BY count DESC
""")
def dict_from_fetch(l):
d = dict()
for a, b in l:
d.setdefault(a, [])
d[a].append(b)
return d
postcode_lad_count = dict_from_fetch(c.fetchall())
postcode_lad_counts = array([(n, len(postcode_lad_count[n])) for n in sorted(postcode_lad_count)])
print(postcode_lad_counts)
fig, ax = plt.subplots()
data = array(postcode_lad_counts)
ax.set_title('Local authority districts by postcode')
ax.set_xlabel("Number of different districts")
ax.set_ylabel("Number of postcodes")
x = data[:, :-1].flatten()
y = data[:, -1]
plt.xticks( x )
ax.ticklabel_format(style='plain')
p = plt.bar(x, y)
# postcodes with 3 or 4 multiple local authority districts
print([(n, postcode_lad_count[n]) for n in sorted(postcode_lad_count, reverse=True) if n > 2])
# postcodes with UPRNs attributed to 3 or more LAs by ONSUD
c.execute("""
SELECT postcode, lad_count, uprn_count
FROM (SELECT postcode, COUNT(DISTINCT onsud) lad_count, COUNT(uprn) uprn_count
FROM uprn
GROUP BY postcode
ORDER BY lad_count DESC)
WHERE lad_count > 2
""")
postcodes = c.fetchall()
print(postcodes)
from ipyleaflet import Map, Marker, MarkerCluster, GeoJSON
import json
def postcode_map(postcode):
c.execute("""
SELECT uprn, X(point), Y(point), onsud, geography.name
FROM uprn
INNER JOIN geography
ON geography.geography == uprn.onsud
WHERE uprn.postcode == "%s"
""" % (postcode))
clusters = {}
lads = {}
for p in c.fetchall():
lad = p[3]
lads[lad] = 1
clusters.setdefault(lad, [])
clusters[lad].append(Marker(location=(p[2], p[1]), title="%s %s (%s)" % (p[0], p[4], p[3])))
m = Map(center=(50.8532019, -0.3528908), zoom=4)
for cluster in clusters:
m.add_layer(MarkerCluster(markers=clusters[cluster]));
for lad in lads:
c.execute('SELECT AsGeoJSON(geometry) FROM geography WHERE geography == "%s"' % (lad))
geo_json = GeoJSON(data=json.loads(c.fetchone()[0]), style={'fillOpacity': 0})
m.add_layer(geo_json)
return m
postcode_map("BN15 0AY")
postcode_map("CV4 7ES")
postcode_map("LA22 9JU")
# postcodes where ONSUD UPRNs span multiple nations
c.execute("""
SELECT postcode
FROM (SELECT
postcode, COUNT(DISTINCT substr(onsud, 1, 1)) count
FROM uprn
GROUP BY postcode
ORDER BY count DESC)
WHERE count > 1
""")
postcodes = list(array(c.fetchall()).flatten())
print(len(postcodes))
print(postcodes)
# number of UPRNs which ONSUD says are in England, but the ONSPD LAD is in another nation
c.execute("""
SELECT COUNT(*) FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE substr(uprn.onsud, 1, 1) != 'E'
AND substr(postcode.onspd, 1, 1) == 'E'
""")
c.fetchone()[0]
# number of UPRNs which the ONSPD LAD is in England, but ONSUD says are in another nation
c.execute("""
SELECT COUNT(*) FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE substr(uprn.onsud, 1, 1) == 'E'
AND substr(postcode.onspd, 1, 1) != 'E'
""")
c.fetchone()[0]
c.execute("""
SELECT postcode FROM postcode
WHERE postcode.nspl != postcode.onspd
""")
postcodes = list(array(c.fetchall()).flatten())
print(len(postcodes))
print(postcodes)
# number of UPRNs where the ONSPD LAD differs to the NSPL LAD
c.execute("""
SELECT COUNT(*) FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE postcode.nspl != postcode.onspd
""")
c.fetchone()[0]
# postcodes where the OS codepo LAD differes to the ONSPD LAD
c.execute("""
SELECT COUNT(*) FROM postcode
WHERE postcode.codepo != postcode.onspd
""")
print('{:,}'.format(c.fetchone()[0]))
# number of UPRNs where the OS codepo LAD differs to the ONSPD LAD
c.execute("""
SELECT COUNT(*) FROM uprn
INNER JOIN postcode
ON postcode.postcode = uprn.postcode
WHERE postcode.codepo != postcode.onspd
""")
print('{:,}'.format(c.fetchone()[0]))