Transport Analytics Training Series - Last Revision: October 2022

Estimating Levels of Service¶

In [1]:
from sklearn.datasets import make_blobs

import math
import scipy

import pandas as pd

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

%matplotlib inline

plot_size   = 14
plot_width  = 5
plot_height = 5

params = {'legend.fontsize': 'large',
          'figure.figsize': (plot_width,plot_height),
          'axes.labelsize': plot_size,
          'axes.titlesize': plot_size,
          'xtick.labelsize': plot_size*0.75,
          'ytick.labelsize': plot_size*0.75,
          'axes.titlepad': 25}
plt.rcParams.update(params)
In [2]:
center_box = (100, 200) # defines the box that cluster centres are allowed to be in

n_customer = 200
n_facility = 5

facil_capacity  = 15000

custm_dem_low  = 30
custm_dem_high = 80

transp_unit_cost = 2
capacity_unit_cost = 100

np.random.seed(2)
In [3]:
custm_demands = np.random.randint(low=custm_dem_low, 
                                  high=custm_dem_high, 
                                  size=n_customer)
In [4]:
print(f'sum_facil ({n_facility* facil_capacity}) > sum_cust ({sum(custm_demands)}) = {n_facility* facil_capacity > sum(custm_demands)}')
sum_facil (75000) > sum_cust (11293) = True
In [5]:
cust_coord , _ = make_blobs(n_samples=n_customer, 
                            centers=2, 
                            cluster_std=20, 
                            center_box=center_box, 
                            random_state =  2)

facil_coord, _ = make_blobs(n_samples=n_facility, 
                            centers=1, 
                            cluster_std=30, 
                            center_box=center_box, 
                            random_state = 50)
In [6]:
custm_node_size = plot_size*10*((custm_demands/custm_dem_high)**2)
facil_node_size = plot_size*100

plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1],  
            s=custm_node_size, 
            cmap='viridis', 
            zorder=1500);

plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=facil_node_size, 
            cmap='viridis', 
            zorder=2000);

plt.axis('off')
Out[6]:
(83.64235900634193, 232.54148477049307, 48.169009398292125, 214.30310861724314)
In [7]:
from scipy.spatial import distance

dist_matrix = distance.cdist(cust_coord, facil_coord, 'euclidean')
transp_cost_matrix = dist_matrix * transp_unit_cost
In [8]:
n_customer = len(cust_coord)
n_facility = len(facil_coord)
In [9]:
from pulp import *

prob = LpProblem('prob', LpMinimize)
In [10]:
S = [('S'+str(i+1)) for i in range(n_facility)]
D = [('C'+str(i+1)) for i in range(n_customer)]
In [11]:
t = {customer:{facility:(transp_cost_matrix[i][j]) for j,facility in enumerate(S)} for i,customer in enumerate(D)}
P = {facility:(facil_capacity) for j,facility in enumerate(S)}
c = {facility:(facil_capacity*capacity_unit_cost) for j,facility in enumerate(S)}
Q = {customer:(custm_demands[i]) for i,customer in enumerate(D)}
In [12]:
n_min = 5
n_max = 5
In [13]:
x = LpVariable.dicts('x', (D,S), lowBound = 0, cat = LpContinuous)
y = LpVariable.dicts('y', S, lowBound = 0, upBound = 1, cat = LpInteger)
In [14]:
prob += lpSum([lpSum([x[i][j]*t[i][j] for j in S]) for i in D]) + lpSum([c[j]*y[j] for j in S])
In [15]:
for i in D:
    prob += lpSum([x[i][j] for j in S]) == Q[i]

for j in S:
    prob += lpSum([x[i][j] for i in D]) <= P[j]*y[j]

prob += lpSum([y[j] for j in S]) >= n_min
prob += lpSum([y[j] for j in S]) <= n_max
In [16]:
status = prob.solve()

print(f'STATUS\n{LpStatus[status]}\n')
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/pa01/opt/anaconda3/lib/python3.9/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/kk/14m2mg8s0w9c_tln749srnrw0000gp/T/2340fab067f84665b4f8716f83e07a12-pulp.mps timeMode elapsed branch printingOptions all solution /var/folders/kk/14m2mg8s0w9c_tln749srnrw0000gp/T/2340fab067f84665b4f8716f83e07a12-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 212 COLUMNS
At line 3243 RHS
At line 3451 BOUNDS
At line 3457 ENDATA
Problem MODEL has 207 rows, 1005 columns and 2015 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 8.01037e+06 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 8.01037e+06 to -1.79769e+308
Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)

Result - Optimal solution found

Objective value:                8010365.00234909
Enumerated nodes:               0
Total iterations:               0
Time (CPU seconds):             0.00
Time (Wallclock seconds):       0.02

Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.01   (Wallclock seconds):       0.03

STATUS
Optimal

In [17]:
chosen_facil = []
for j,facility in enumerate(S):
    if y[facility].varValue == 1:
        chosen_facil.append(j)
        print(f'We will be establishing a warehouse in {facility}')

chosen_count = len(chosen_facil)

print(f'\nWe are establishing a total of {chosen_count} facilities')
We will be establishing a warehouse in S1
We will be establishing a warehouse in S2
We will be establishing a warehouse in S3
We will be establishing a warehouse in S4
We will be establishing a warehouse in S5

We are establishing a total of 5 facilities
In [18]:
vect_orig = []
vect_dest = []
u = v = np.zeros((len(vect_orig),len(vect_orig)))

cust_distance_dict = dict()

for j,facility in enumerate(S):
    for i,customer in enumerate(D):
        if x[customer][facility].varValue > 0:
            vect_orig.append(facil_coord[j])
            vect_dest.append(cust_coord[i])
            dist =  distance.euclidean(facil_coord[j],cust_coord[i])
            cust_distance_dict[customer] = round(dist,1)
            
vect_orig = np.array(vect_orig)
vect_dest = np.array(vect_dest)
In [19]:
plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1], 
            s=plot_size*2, 
            cmap='viridis',
            zorder = 10000);


plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=plot_size*5, 
            cmap='viridis');

for i in range(len(vect_orig)):
    plt.arrow(vect_orig[i][0],
              vect_orig[i][1],
              vect_dest[i][0]-vect_orig[i][0],
              vect_dest[i][1]-vect_orig[i][1],
              color='black')
    
plt.scatter(facil_coord[chosen_facil, 0], 
            facil_coord[chosen_facil, 1], 
            s=plot_size*10, 
            cmap='viridis',
            zorder=99999)

plt.axis('off')
Out[19]:
(83.63856852546967, 232.5461688022704, 48.16625778748879, 214.30759451598573)
In [20]:
df_distances = pd.DataFrame(cust_distance_dict.items(), columns=['Customer', 'Distance'])
In [21]:
df_distances.head(5)
Out[21]:
Customer Distance
0 C7 15.2
1 C42 11.3
2 C53 36.7
3 C58 27.2
4 C65 33.4
In [22]:
df_distances['Distance'].describe()
Out[22]:
count    200.000000
mean      22.608000
std       10.418081
min        0.000000
25%       14.500000
50%       22.350000
75%       30.900000
max       56.100000
Name: Distance, dtype: float64
In [23]:
dist_min =  df_distances['Distance'].min()
dist_max =  df_distances['Distance'].max()
dist_avg =  df_distances['Distance'].mean()
dist_sum =  df_distances['Distance'].sum()


print(f'Analysis for { chosen_count } facilities: \n')

print(f'Total distance  : { dist_sum:.2f}')
print(f'Minimum distance: { dist_min:.2f}')
print(f'Maximum distance: { dist_max:.2f}')
print(f'Average distance: { dist_avg:.2f}')
Analysis for 5 facilities: 

Total distance  : 4521.60
Minimum distance: 0.00
Maximum distance: 56.10
Average distance: 22.61
In [24]:
df_distances.hist(column='Distance')
Out[24]:
array([[<AxesSubplot:title={'center':'Distance'}>]], dtype=object)
In [25]:
hist_bins = [0, 25, 50, 75, 100]
df_distances.hist(column='Distance', bins = hist_bins)
Out[25]:
array([[<AxesSubplot:title={'center':'Distance'}>]], dtype=object)
In [26]:
freq_counts = df_distances['Distance'].value_counts(bins=hist_bins,sort=False)

df_counts = freq_counts.to_frame().reset_index()

df_counts
Out[26]:
index Distance
0 (-0.001, 25.0] 115
1 (25.0, 50.0] 84
2 (50.0, 75.0] 1
3 (75.0, 100.0] 0
In [27]:
def count_less(imax):
    return len(df_distances[df_distances['Distance']<imax])
In [28]:
df_cum = pd.DataFrame(range(10,110,10), columns=['Max'])
df_cum['Range'] = '0 - ' + df_cum['Max'].astype(str)
df_cum['Count'] =df_cum['Max'].apply(count_less)

df_cum['Percentage'] = round((df_cum['Count']/n_customer*100),1)
df_cum['Percentage'] = df_cum['Percentage'].astype(str) + '%'

df_cum
Out[28]:
Max Range Count Percentage
0 10 0 - 10 25 12.5%
1 20 0 - 20 80 40.0%
2 30 0 - 30 144 72.0%
3 40 0 - 40 193 96.5%
4 50 0 - 50 199 99.5%
5 60 0 - 60 200 100.0%
6 70 0 - 70 200 100.0%
7 80 0 - 80 200 100.0%
8 90 0 - 90 200 100.0%
9 100 0 - 100 200 100.0%