Generating Initial Populations#

import math
import numpy as np
import matplotlib.pyplot as plt

Sequences#

Below are some examples of how sequences can be generated using popular random number generation tools.

Pseudo-random sampling#

# Generate 200 "random" numbers
np.random.seed(6345245)
N=200 # number of samples
P_random_pseudo=np.random.rand(N,N)

This method is “pseudo”-random as it is actually completely deterministic, if you know some key information. Both random.random and numpy.random use the Mersenne Twister. Because new values are generated using a list of old values, sufficient observations of “random” data allows prediction of future values. Also, the same “random” values can be generated simply by using the same seed.

Generalized Halton Number Generator#

# Sampling using Generalized Halton Number Quasi-random Generator
import ghalton
sequencer = ghalton.GeneralizedHalton(7,23)
P_random_quasi = np.array(sequencer.get(N))
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Input In [3], in <cell line: 2>()
      1 # Sampling using Generalized Halton Number Quasi-random Generator
----> 2 import ghalton
      3 sequencer = ghalton.GeneralizedHalton(7,23)
      4 P_random_quasi = np.array(sequencer.get(N))

ModuleNotFoundError: No module named 'ghalton'

As is evident in the summary graph at the end of this page, the Halton sequence has many disadvantages. Depending on the input parameters, results could show overt correlation, or at least uneven sampling. In this example, we can see some obvious diagonal bands forming, as well as patterns of blank space.

Box-Muller Transform#

# Sampling using Box-Muller 
# 1. generate uniformly distributed values between 0 and 1
u1 = np.random.uniform(size=(N))
u2 = np.random.uniform(size=(N))

# 2. Tranform u1 to s
ss = -np.log(u1)

# 3. Transform u2 to theta
thetas = 2*math.pi*u2

# 4. Convert s to r
rs = np.sqrt(2*ss)

# 5. Calculate x and y from r and theta
P_BM_x, P_BM_y = rs*np.cos(thetas), rs*np.sin(thetas)

One of the drawbacks of the Box-Muller transform is the tendency for values to cluster around the mean. This has to do with 32-bit integers and how numbers are represented by processors. Additionally, calculating the square root (step 4) can be costly.

Central Limit Theorem#

# Sampling using the Central Limit Theorem
from scipy import stats
import random

P_CLT_x=[2.0 * math.sqrt(N) * (sum(random.randint(0,1) for x in range(N)) / N - 0.5) for x in range(N)]
P_CLT_y=[2.0 * math.sqrt(N) * (sum(random.randint(0,1) for x in range(N)) / N - 0.5) for x in range(N)]

Sobol Sequence#

# Sobol low-discrepancy-sequence (LDS)
import sobol_seq
P_sobel=sobol_seq.i4_sobol_generate(2,N)

Latin Hypercube Sampling#

# Latin Hypercube sampling
from pyDOE import *
from scipy.stats.distributions import norm
P_LHS=lhs(2, samples=N, criterion='center')
f, (ax1, ax2) = plt.subplots(ncols=2, figsize=(18,8))
f, (ax3,ax4) = plt.subplots(ncols=2, figsize=(18,8))
f, (ax5, ax6) = plt.subplots(ncols=2, figsize=(18,8))
ax1.scatter(P_random_pseudo[:,0], P_random_pseudo[:,1], color="gray")
ax2.scatter(P_random_quasi[:100], P_random_quasi[100:], color="red")
ax3.scatter(P_BM_x, P_BM_y, color="green")
ax4.scatter(P_CLT_x, P_CLT_y, color="cyan")
ax5.scatter(P_sobel[:,0], P_sobel[:,1], color="magenta")
ax6.plot(P_LHS[:,0], P_LHS[:,1], "o")
                          
ax1.set_title("Pseudo-random")
ax2.set_title("Quasi-random")
ax3.set_title("Box-Muller")
ax4.set_title("Central Limit Theorem")
ax5.set_title("Sobol")
ax6.set_title("Latin Hypercube")
Text(0.5, 1.0, 'Latin Hypercube')
../../_images/9959a44d42e0f3b1c71a9e8e6b3482206a30e4f664825b9b64c06da136bded45.png ../../_images/eb0e75b0d12d8424a28e9ece9a81ec5093295683556792adafcb9611de55b682.png ../../_images/520aea383902dabdb0b0cd516a44d8393c24ebe28fd5570eb9040464602297b8.png

Permutations#

Random permutations can also be generated for existing datasets. Below are a few examples.

np.random.permutation#

# randomly permute a sequence, or return a permuted range.
per1=np.random.permutation(10)
print(per1)
[9 2 7 1 8 6 5 3 4 0]

np.shuffle#

# another method
per2 = np.array([5, 4, 9, 0, 1, 2, 6, 8, 7, 3])
np.random.shuffle(per2)
print(per2)
[6 3 5 8 4 0 7 9 1 2]

Initial Population as real-value permutations#

# population of initial solution as real-value permuations 
pop_init = np.arange(50).reshape((10,5))
np.random.permutation(pop_init)
array([[10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [ 0,  1,  2,  3,  4],
       [35, 36, 37, 38, 39],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34],
       [45, 46, 47, 48, 49],
       [40, 41, 42, 43, 44],
       [25, 26, 27, 28, 29],
       [ 5,  6,  7,  8,  9]])

Initial Population as binary permutations#

# population of initial solution as binary permutations 
from itertools import combinations
size=5 # number of bits in the binary string
ones=2 # number of ones in each binary string

for pos in map(set, combinations(range(size), ones)):
     print([int(i in pos) for i in range(size)], sep='\n')
[1, 1, 0, 0, 0]
[1, 0, 1, 0, 0]
[1, 0, 0, 1, 0]
[1, 0, 0, 0, 1]
[0, 1, 1, 0, 0]
[0, 1, 0, 1, 0]
[0, 1, 0, 0, 1]
[0, 0, 1, 1, 0]
[0, 0, 1, 0, 1]
[0, 0, 0, 1, 1]

Example: Generating a random route between two points#

import osmnx as ox
import random
from collections import deque

G = ox.graph_from_place("University of Toronto")
fig, ax = ox.plot_graph(G)
../../_images/095f7d360f8683c4adc199cc1e906c43224a5e1180959d7be1c26462d2c12d80.png
from smart_mobility_utilities.common import Node

# this is just a typical graph search with shuffled frontier
def randomized_search(G, source, destination):
    origin = Node(graph = G, osmid = source)
    destination = Node(graph = G, osmid = destination)
    
    route = [] 
    frontier = deque([origin])
    explored = set()
    while frontier:
        node = random.choice(frontier)   # here is the randomization part
        frontier.remove(node)
        explored.add(node.osmid)

        for child in node.expand():
            if child not in explored and child not in frontier:
                if child == destination:
                    route = child.path()
                    return route
                frontier.append(child)

    raise Exception("destination and source are not on same component")

The function above modifies a typical graph search algorithm by scrambling the frontier nodes. This means that candidates for expansion are “random”, which means different routes are yielded when called repeatedly.

# generate random route between 2 nodes 
random_route = randomized_search(G, 24959528, 1480794706)
fig, ax = ox.plot_graph_route(G, random_route)
../../_images/b26e9ca8b39948d5cbf07ba24bb65e422f4ede70343557f891a18ab54cc18326.png
random_hexa = lambda: random.randint(0,255) # generate random hexadecimal color

# generate 5 random routes with 5 different colors -- overlapping routes cancel each other color's
routes = [randomized_search(G, 24959528, 1480794706) for _ in range(5)]
rc = ['#%02X%02X%02X' % (random_hexa(),random_hexa(),random_hexa()) for _ in range(5)]
fig, ax = ox.plot_graph_routes(G, routes, route_colors=rc, route_linewidth=6, node_size=0)
../../_images/5d46ab9ec72be048982c9353f84248ba7baeb16bf1db5c7c267cec22311382ec.png