# -*- coding: utf-8 -*-
"""
Created on Tue Nov 22 17:20:31 2016

@author: cgalli
"""

import numpy as np
import time 

start_time = time.time()


# Generate random arrays
data1 = np.random.rand(1000 * 1000)
data2 = np.random.rand(1000 * 1000)

#consider 1d array first
diff = np.abs(data1 - data2)

# At this point we are done needing the arrays of data1 and data2
# all "work" will be done on the diff array

# Here we get the index positions of the diff array sorted.
# we don't care about sorting the array for data values, we only care
# that we can find the 10 smallest and 10 largest values
# by using positions 0:10 and -10:: (the last 10)
# In IDL the SORT(diff) provides the index positions just as numpy argsort does.
# http://www.harrisgeospatial.com/docs/sort.html

r = np.argsort(diff)

# in the returned index positions, 
# we now know that the first 10 elements represent the smallest values,
# and the last 10 elements are the largest ones

#min_vals = diff[r[0:10]] # we don't need this, but it might be useful to verify things later
min_pos = r[0:10]

#max_vals = diff[r[-10::]] # we don't need this, but it might be useful to verify things later
max_pos = r[-10::]

#the above min_pos and max_pos index positions represent positions based on a flat 1D data array of 1000 X 1000
# but the 1000 x 1000 index positions of a 2D array is meaningful for this exercise.
# we can easily create index positions for the 10 minimum value locations.
# In fact, we can use the modulus of 1000 to know the X coordinate, and 
# dividing by 1000 gives us the Y coordinate.

#'''
#For example, if I created an array of 1 through 9 as 1 dimensional (1D), it would look like this:

#[0, 1, 2, 3, 4, 5, 6, 7, 8]

#We can also say this could be a 2D array of 3 x 3 rows and columns like this:
    
#[0, 1, 2]
#[3, 4, 5]
#[6, 7, 8]

#The data is the same, and the positions are the same IF it was still 1D. Assuming that
#the first position of 0 is coordinate [0,0] in the new 2D array, we can apply the above 
#modulus and division to get index positions. If we wanted to know where the location of
#5 is located in the 2D array, we take this:

#x position = 5 % 3, which equals 2
#y position = 5 / 3, which equals 1 (remember division of integers will truncate, which is what we wanted)

#If the index positoin of 5 was in our min_pos from above, we can now say it has the coordinate system
#of (x, y) = (2, 1). So array1D[2, 1] = 5.

#This relationship allows us to find the distance between any two points in the 2D cartesian space by using
#a very basic distance formula of d = sqrt((y2 - y1)**2 + (x2 - x1)**2).
#'''

#of the 10 smallest, find distance, but first get the positions so we have an x and y "point"
xpos = (min_pos % 1000)
ypos = (min_pos / 1000)

#Up to now, we have only written 6 lines of code to represent all of our relationships
# to find the distances of minimum values.

#the next part is up to you on how to approach. Because we only have points to compare,
# we might not care that looping over 10 points recursively ends up with a O(n**2) complexity.
# in fact, trying to get only 10 data points into a form that is more elegant in complexity, 
# will probably result in adding more time because the "setup" or "approach" is going to 
# require more overhead than looping 10**2 times. So here's one approach in python.
        
# First, I want a set of x,y pairs to compare. 
#'''
#For example, if I had these two arrays from xpos and ypos:
#xpos = [10,23,54,18,40]
#ypos = [45,11,76,13,16]

#I have x,y pairs like (10,45) and (23,11) and (54,76) and so on...
#So comparing one point to another with a basic distance equation yields distance in meters
#if we assumed index positions are meters. Remember we said the 1000 x 1000 was 1 square kilometer.
#'''

#reshape xpos and ypos to pairs of points
d = np.array([xpos,ypos]).reshape(10,2)

#now loop 10 iterations and loop the same amount in a nested loop
# it only takes 9.9 micro seconds. That's 0.00099 seconds. It passes the "good enough test." :)

#set a large minimum distance to start with
min_dist = 99999
mpos = (-1,-1)

t2 = time.time()
for i in xrange(len(xpos)):
    point1 = d[i,:]
    #compare this point to all others
    for j in xrange(len(xpos)):
        
        if i == j:
            # no need to compare itself to itself!
            # so continue onto next inner loop iteration
            continue
        
        point2 = d[j,:]
        this_dist = np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
        if this_dist < min_dist:
            min_dist = this_dist
            mpos = (point1, point2)
            
print time.time() - start_time
print 'The terrible O(n**2) approach took this much time:', time.time() - t2
print 'positions and distance of closest minimum values from 10 points', mpos, min_dist

#then do the same to find the maximum distance for the maximum values.

#Note: we could probably find a clever way to make this O(n**2) problem a little easier, but again
# this requires overhead to setup the problem, and that setup is likely more expensive than 
# just looping 100 times. So I might conclude that this is an optimzed "good enough" version of code.
# It passes the test of "Is this practical and good enough?" I think yes.