Python |
emails = pandas.read_csv("emails.csv")
emails.sample(NUM_WINNERS, random_state=SEED)
def pi_digits():
"""generator for digits of pi"""
q, r, t, k, n, l = 1, 0, 1, 1, 3, 3
while True:
if 4 * q + r - t < n * t:
yield n
q, r, t, k, n, l = (10*q, 10*(r-n*t), t, k, (10*(3*q+r))/t-10*n, l)
else:
q, r, t, k, n, l = (q*k, (2*q+r)*l, t*l, k+1, (q*(7*k+2)+r*l)/(t*l), l+2)
np.random.seed(SEED)
emails["num"] = np.random.randint(10 ** (NUM_DIGITS - 1), 10 ** NUM_DIGITS - 1, size=len(emails))
class _Num(object):
def __init__(self, n):
self.n = n
self.s = str(n)
self.p = 0 # pointer in number string representation
self.l = len(self.s)
def move_p(self, d):
if d == self.s[self.p]:
self.p += 1
else:
self.p = 0
def find_nums_in_pi(nums, first_n=None):
MAX_POS = 10 ** 6
pi_gen = pi_digits()
first_n = first_n if first_n is not None else len(nums)
_nums = [_Num(n) for n in nums]
nums_pos = {}
for pos in itertools.count():
if pos % 1000 == 0:
print "Current Pi position: %s. Nums found: %s" % (pos, len(nums_pos))
if pos == MAX_POS:
raise RuntimeError("Circuit breaker!")
d = str(pi_gen.next())
for cur_num in _nums:
cur_num.move_p(d)
# whole number found
if cur_num.p == cur_num.l:
nums_pos[cur_num.n] = pos - cur_num.l + 1
# found enough numbers
if len(nums_pos) == first_n:
return nums_pos
# create new search array without found number
_nums = [num for num in _nums if num.n != cur_num.n]
break