From e1c0c7a1d8d51620005791c1c235fe4dba20732a Mon Sep 17 00:00:00 2001 From: Daniel Richman Date: Thu, 13 Dec 2012 22:49:36 +0000 Subject: [PATCH 1/2] Add python dependencies (exact versions important) --- README.md | 4 +++- requirements.txt | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index e42d866..638251f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ The following items need to be executable (`chmod +x ./predict.py`) by the user * `cron/clear-pydap-cache-cronjob.sh` * `cron/purge-predictions-cronjob.sh` -The `predict/preds/` and `gfs/` directories need to have rwx access by the PHP interpreter and the `predict.py` python script. You will need to install the following python packages: pydap, numpy, json, simple-json. We use `at` to automatically background the predictor, so you will need that installed. +The `predict/preds/` and `gfs/` directories need to have rwx access by the PHP interpreter and the `predict.py` python script. You will need to install the python dependencies listed in requirements.txt. In the case of PyDAP, the exact version is important; the easiest way is: + + $ pip install -r requirements.txt Other than that, just clone this repo to a non web-accessible folder and create symlinks to the `predict/` directory in the repo. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bc7e1d0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +Pydap==3.0.1 +numpy==1.5.1 +statsd-client==1.0.2 +simplejson==2.1.1 + From 869a4203bb2b7d5f0e5388e2e47122f5aef5900f Mon Sep 17 00:00:00 2001 From: Daniel Richman Date: Sun, 23 Dec 2012 19:50:15 +0000 Subject: [PATCH 2/2] Fix: C predictor may choose stale data in gfs dir --- predict.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/predict.py b/predict.py index 7cb0630..a675806 100755 --- a/predict.py +++ b/predict.py @@ -15,6 +15,9 @@ import calendar import optparse import subprocess import statsd +import tempfile +import shutil +import bisect import simplejson as json statsd.init_statsd({'STATSD_BUCKET_PREFIX': 'habhub.predictor'}) @@ -91,10 +94,6 @@ def main(): help='search for dataset covering the POSIX timestamp TIME \t[default: now]', metavar='TIME', type='int', default=calendar.timegm(datetime.datetime.utcnow().timetuple())) - parser.add_option('-o', '--output', dest='output', - help='file to write GFS data to with \'%(VAR)\' replaced with the the value of VAR [default: %default]', - metavar='FILE', - default='gfs/gfs_%(time)_%(lat)_%(lon)_%(latdelta)_%(londelta).dat') parser.add_option('-v', '--verbose', action='count', dest='verbose', help='be verbose. The more times this is specified the more verbose.', default=False) parser.add_option('-p', '--past', dest='past', @@ -259,7 +258,14 @@ def main(): options.lat, options.latdelta, \ options.lon, options.londelta) - write_file(options.output, dataset, \ + gfs_dir = "/var/www/cusf-standalone-predictor/gfs/" + + gfs_dir = tempfile.mkdtemp(dir=gfs_dir) + + gfs_filename = "gfs_%(time)_%(lat)_%(lon)_%(latdelta)_%(londelta).dat" + output_format = os.path.join(gfs_dir, gfs_filename) + + write_file(output_format, dataset, \ window, \ time_to_find - datetime.timedelta(hours=options.past), \ time_to_find + datetime.timedelta(hours=options.future)) @@ -273,7 +279,9 @@ def main(): else: alarm_flags = [] - subprocess.call([pred_binary, '-i/var/www/cusf-standalone-predictor/gfs/', '-v', '-o'+uuid_path+'flight_path.csv', uuid_path+'scenario.ini'] + alarm_flags) + subprocess.call([pred_binary, '-i' + gfs_dir, '-v', '-o'+uuid_path+'flight_path.csv', uuid_path+'scenario.ini'] + alarm_flags) + + shutil.rmtree(gfs_dir) update_progress(pred_running=False, pred_complete=True) statsd.increment('success') @@ -302,8 +310,10 @@ def write_file(output_format, data, window, mintime, maxtime): assert(hgtprs_global.dimensions == ('time', 'lev', 'lat', 'lon')) # Work out what times we want to download - times = filter(lambda x: (x >= mintime) & (x <= maxtime), - map(timestamp_to_datetime, hgtprs_global.maps['time'])) + times = sorted(map(timestamp_to_datetime, hgtprs_global.maps['time'])) + times_first = max(0, bisect.bisect_right(times, mintime) - 1) + times_last = min(len(times), bisect.bisect_left(times, maxtime) + 1) + times = times[times_first:times_last] num_times = len(times) current_time = 0