From e1c0c7a1d8d51620005791c1c235fe4dba20732a Mon Sep 17 00:00:00 2001
From: Daniel Richman <main@danielrichman.co.uk>
Date: Thu, 13 Dec 2012 22:49:36 +0000
Subject: [PATCH 1/2] Add python dependencies (exact versions important)

---
 README.md        | 4 +++-
 requirements.txt | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
index e42d866..638251f 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,9 @@ The following items need to be executable (`chmod +x ./predict.py`) by the user
 *   `cron/clear-pydap-cache-cronjob.sh`
 *   `cron/purge-predictions-cronjob.sh`
 
-The `predict/preds/` and `gfs/` directories need to have rwx access by the PHP interpreter and the `predict.py` python script. You will need to install the following python packages: pydap, numpy, json, simple-json. We use `at` to automatically background the predictor, so you will need that installed.  
+The `predict/preds/` and `gfs/` directories need to have rwx access by the PHP interpreter and the `predict.py` python script. You will need to install the python dependencies listed in requirements.txt. In the case of PyDAP, the exact version is important; the easiest way is:
+
+    $ pip install -r requirements.txt
 
 Other than that, just clone this repo to a non web-accessible folder and create symlinks to the `predict/` directory in the repo.  
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..bc7e1d0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+Pydap==3.0.1
+numpy==1.5.1
+statsd-client==1.0.2
+simplejson==2.1.1
+

From 869a4203bb2b7d5f0e5388e2e47122f5aef5900f Mon Sep 17 00:00:00 2001
From: Daniel Richman <main@danielrichman.co.uk>
Date: Sun, 23 Dec 2012 19:50:15 +0000
Subject: [PATCH 2/2] Fix: C predictor may choose stale data in gfs dir

---
 predict.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/predict.py b/predict.py
index 7cb0630..a675806 100755
--- a/predict.py
+++ b/predict.py
@@ -15,6 +15,9 @@ import calendar
 import optparse
 import subprocess
 import statsd
+import tempfile
+import shutil
+import bisect
 import simplejson as json
 
 statsd.init_statsd({'STATSD_BUCKET_PREFIX': 'habhub.predictor'})
@@ -91,10 +94,6 @@ def main():
         help='search for dataset covering the POSIX timestamp TIME \t[default: now]', 
         metavar='TIME', type='int',
         default=calendar.timegm(datetime.datetime.utcnow().timetuple()))
-    parser.add_option('-o', '--output', dest='output',
-        help='file to write GFS data to with \'%(VAR)\' replaced with the the value of VAR [default: %default]',
-        metavar='FILE',
-        default='gfs/gfs_%(time)_%(lat)_%(lon)_%(latdelta)_%(londelta).dat')
     parser.add_option('-v', '--verbose', action='count', dest='verbose',
         help='be verbose. The more times this is specified the more verbose.', default=False)
     parser.add_option('-p', '--past', dest='past',
@@ -259,7 +258,14 @@ def main():
             options.lat, options.latdelta, \
             options.lon, options.londelta)
 
-    write_file(options.output, dataset, \
+    gfs_dir = "/var/www/cusf-standalone-predictor/gfs/"
+
+    gfs_dir = tempfile.mkdtemp(dir=gfs_dir)
+
+    gfs_filename = "gfs_%(time)_%(lat)_%(lon)_%(latdelta)_%(londelta).dat"
+    output_format = os.path.join(gfs_dir, gfs_filename)
+
+    write_file(output_format, dataset, \
             window, \
             time_to_find - datetime.timedelta(hours=options.past), \
             time_to_find + datetime.timedelta(hours=options.future))
@@ -273,7 +279,9 @@ def main():
     else:
         alarm_flags = []
 
-    subprocess.call([pred_binary, '-i/var/www/cusf-standalone-predictor/gfs/', '-v', '-o'+uuid_path+'flight_path.csv', uuid_path+'scenario.ini'] + alarm_flags)
+    subprocess.call([pred_binary, '-i' + gfs_dir, '-v', '-o'+uuid_path+'flight_path.csv', uuid_path+'scenario.ini'] + alarm_flags)
+
+    shutil.rmtree(gfs_dir)
 
     update_progress(pred_running=False, pred_complete=True)
     statsd.increment('success')
@@ -302,8 +310,10 @@ def write_file(output_format, data, window, mintime, maxtime):
     assert(hgtprs_global.dimensions == ('time', 'lev', 'lat', 'lon'))
 
     # Work out what times we want to download
-    times = filter(lambda x: (x >= mintime) & (x <= maxtime),
-        map(timestamp_to_datetime, hgtprs_global.maps['time']))
+    times = sorted(map(timestamp_to_datetime, hgtprs_global.maps['time']))
+    times_first = max(0, bisect.bisect_right(times, mintime) - 1)
+    times_last = min(len(times), bisect.bisect_left(times, maxtime) + 1)
+    times = times[times_first:times_last]
 
     num_times = len(times)
     current_time = 0