Notebook 5.5 - Estimating Level of Service

In [1]:
from sklearn.datasets import make_blobs

import math
import scipy

import pandas as pd

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

%matplotlib inline

plot_size   = 14
plot_width  = 5
plot_height = 5

params = {'legend.fontsize': 'large',
          'figure.figsize': (plot_width,plot_height),
          'axes.labelsize': plot_size,
          'axes.titlesize': plot_size,
          'xtick.labelsize': plot_size*0.75,
          'ytick.labelsize': plot_size*0.75,
          'axes.titlepad': 25}
plt.rcParams.update(params)
In [2]:
center_box = (100, 200) # defines the box that cluster centres are allowed to be in

n_customer = 200
n_facility = 5

facil_capacity  = 15000

custm_dem_low  = 30
custm_dem_high = 80

transp_unit_cost = 2
capacity_unit_cost = 100

np.random.seed(2)
In [3]:
custm_demands = np.random.randint(low=custm_dem_low, 
                                  high=custm_dem_high, 
                                  size=n_customer)
In [4]:
print(f'sum_facil ({n_facility* facil_capacity}) > sum_cust ({sum(custm_demands)}) = {n_facility* facil_capacity > sum(custm_demands)}')
sum_facil (75000) > sum_cust (11293) = True
In [5]:
cust_coord , _ = make_blobs(n_samples=n_customer, 
                            centers=2, 
                            cluster_std=20, 
                            center_box=center_box, 
                            random_state =  2)

facil_coord, _ = make_blobs(n_samples=n_facility, 
                            centers=1, 
                            cluster_std=30, 
                            center_box=center_box, 
                            random_state = 50)
In [6]:
custm_node_size = plot_size*10*((custm_demands/custm_dem_high)**2)
facil_node_size = plot_size*100

plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1],  
            s=custm_node_size, 
            cmap='viridis', 
            zorder=1500);

plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=facil_node_size, 
            cmap='viridis', 
            zorder=2000);

plt.axis('off')
Out[6]:
(83.6308555178748, 232.55563883233972, 48.15714676590879, 214.31878035176794)
In [7]:
from scipy.spatial import distance

dist_matrix = distance.cdist(cust_coord, facil_coord, 'euclidean')
transp_cost_matrix = dist_matrix * transp_unit_cost
In [8]:
n_customer = len(cust_coord)
n_facility = len(facil_coord)
In [9]:
from pulp import *

prob = LpProblem('prob', LpMinimize)
In [10]:
S = [('S'+str(i+1)) for i in range(n_facility)]
D = [('C'+str(i+1)) for i in range(n_customer)]
In [11]:
t = {customer:{facility:(transp_cost_matrix[i][j]) for j,facility in enumerate(S)} for i,customer in enumerate(D)}
P = {facility:(facil_capacity) for j,facility in enumerate(S)}
c = {facility:(facil_capacity*capacity_unit_cost) for j,facility in enumerate(S)}
Q = {customer:(custm_demands[i]) for i,customer in enumerate(D)}
In [12]:
n_min = 5
n_max = 5
In [13]:
x = LpVariable.dicts('x', (D,S), lowBound = 0, cat = LpContinuous)
y = LpVariable.dicts('y', S, lowBound = 0, upBound = 1, cat = LpInteger)
In [14]:
prob += lpSum([lpSum([x[i][j]*t[i][j] for j in S]) for i in D]) + lpSum([c[j]*y[j] for j in S])
In [15]:
for i in D:
    prob += lpSum([x[i][j] for j in S]) == Q[i]

for j in S:
    prob += lpSum([x[i][j] for i in D]) <= P[j]*y[j]

prob += lpSum([y[j] for j in S]) >= n_min
prob += lpSum([y[j] for j in S]) <= n_max
In [16]:
status = prob.solve()

print(f'STATUS\n{LpStatus[status]}\n')
STATUS
Optimal

In [17]:
chosen_facil = []
for j,facility in enumerate(S):
    if y[facility].varValue == 1:
        chosen_facil.append(j)
        print(f'We will be establishing a warehouse in {facility}')

chosen_count = len(chosen_facil)

print(f'\nWe are establishing a total of {chosen_count} facilities')
We will be establishing a warehouse in S1
We will be establishing a warehouse in S2
We will be establishing a warehouse in S3
We will be establishing a warehouse in S4
We will be establishing a warehouse in S5

We are establishing a total of 5 facilities
In [18]:
vect_orig = []
vect_dest = []
u = v = np.zeros((len(vect_orig),len(vect_orig)))

cust_distance_dict = dict()

for j,facility in enumerate(S):
    for i,customer in enumerate(D):
        if x[customer][facility].varValue > 0:
            vect_orig.append(facil_coord[j])
            vect_dest.append(cust_coord[i])
            dist =  distance.euclidean(facil_coord[j],cust_coord[i])
            cust_distance_dict[customer] = round(dist,1)
            
vect_orig = np.array(vect_orig)
vect_dest = np.array(vect_dest)
In [19]:
plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1], 
            s=plot_size*2, 
            cmap='viridis',
            zorder = 10000);


plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=plot_size*5, 
            cmap='viridis');

for i in range(len(vect_orig)):
    plt.arrow(vect_orig[i][0],
              vect_orig[i][1],
              vect_dest[i][0]-vect_orig[i][0],
              vect_dest[i][1]-vect_orig[i][1],
              color='black')
    
plt.scatter(facil_coord[chosen_facil, 0], 
            facil_coord[chosen_facil, 1], 
            s=plot_size*10, 
            cmap='viridis',
            zorder=99999)

plt.axis('off')
Out[19]:
(83.63192772877143, 232.5519160480636, 48.15830179548796, 214.3138162200473)
In [20]:
df_distances = pd.DataFrame(cust_distance_dict.items(), columns=['Customer', 'Distance'])
In [21]:
df_distances.head(5)
Out[21]:
Customer Distance
0 C7 15.2
1 C42 11.3
2 C53 36.7
3 C58 27.2
4 C65 33.4
In [22]:
df_distances['Distance'].describe()
Out[22]:
count    200.000000
mean      22.608000
std       10.418081
min        0.000000
25%       14.500000
50%       22.350000
75%       30.900000
max       56.100000
Name: Distance, dtype: float64
In [23]:
dist_min =  df_distances['Distance'].min()
dist_max =  df_distances['Distance'].max()
dist_avg =  df_distances['Distance'].mean()
dist_sum =  df_distances['Distance'].sum()


print(f'Analysis for { chosen_count } facilities: \n')

print(f'Total distance  : { dist_sum:.2f}')
print(f'Minimum distance: { dist_min:.2f}')
print(f'Maximum distance: { dist_max:.2f}')
print(f'Average distance: { dist_avg:.2f}')
Analysis for 5 facilities: 

Total distance  : 4521.60
Minimum distance: 0.00
Maximum distance: 56.10
Average distance: 22.61
In [24]:
df_distances.hist(column='Distance')
Out[24]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fdce05058d0>]],
      dtype=object)
In [25]:
hist_bins = [0, 25, 50, 75, 100]
df_distances.hist(column='Distance', bins = hist_bins)
Out[25]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fdd13732cd0>]],
      dtype=object)
In [26]:
freq_counts = df_distances['Distance'].value_counts(bins=hist_bins,sort=False)

df_counts = freq_counts.to_frame().reset_index()

df_counts
Out[26]:
index Distance
0 (-0.001, 25.0] 115
1 (25.0, 50.0] 84
2 (50.0, 75.0] 1
3 (75.0, 100.0] 0
In [27]:
def count_less(imax):
    return len(df_distances[df_distances['Distance']<imax])
In [28]:
df_cum = pd.DataFrame(range(10,110,10), columns=['Max'])
df_cum['Range'] = '0 - ' + df_cum['Max'].astype(str)
df_cum['Count'] =df_cum['Max'].apply(count_less)

df_cum['Percentage'] = round((df_cum['Count']/n_customer*100),1)
df_cum['Percentage'] = df_cum['Percentage'].astype(str) + '%'

df_cum
Out[28]:
Max Range Count Percentage
0 10 0 - 10 25 12.5%
1 20 0 - 20 80 40.0%
2 30 0 - 30 144 72.0%
3 40 0 - 40 193 96.5%
4 50 0 - 50 199 99.5%
5 60 0 - 60 200 100.0%
6 70 0 - 70 200 100.0%
7 80 0 - 80 200 100.0%
8 90 0 - 90 200 100.0%
9 100 0 - 100 200 100.0%