# Generate charts from WHO updates on Influenza A(H1N1) # # Copyright (C) 2009 http://www.vyvy.org/ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # # Notes: # - This simple program depends on a well-formatted RSS feed from WHO updates # - This program relies on uncaught exceptions for very simple error handling! # # 10-Jun-09: First working version (First used for Update 47) # 13-Jun-09: Added code for uploading files to the website # 09-Jul-09: Improve code to tackle format since Update 50 import os import urllib.request import re import datetime import ftplib # Read existing data file print('Reading existing data file...') re_update = '^([0-9]+)(\.[0-9]+)?\t' reo_update = re.compile(re_update) entries = {} with open('h1n1.gpdat') as fp_data: for i, line in enumerate(fp_data): # Ignore the header if line[:7] == '#Update': continue # Parse the line line = line.rstrip() # Get rid of newline characters mo = reo_update.match(line) update = int(mo.group(1)) # Update number assert i == update update_minor = mo.group(2) # Minor update version # Add the entry entry = line entries[update] = line for i in sorted(entries.keys()): print(' ' + entries[i]) # Fetch the page of RSS feed print('Fetching RSS feed...') rssfeed_url = "http://www.who.int/feeds/entity/csr/disease/swineflu/en/rss.xml" response = urllib.request.urlopen(rssfeed_url) page = response.read().decode() # Parse the page re_item = ('.*?' '\s*?(.+?)\s*?.*?' '\s*?(.+?)\s*?.*?' '\s*?(.+?)\s*?.*?' '\s*?(.+?)\s*?.*?' '') reo_item = re.compile(re_item, re.DOTALL) ms = reo_item.findall(page) assert len(ms) == 10 # Expect 10 items print('Parsing ' + str(len(ms)) + ' entries...') #re_title = '^Influenza A\(H1N1\) - update ([0-9]+)(\.[0-9]+)?$' re_title = 'update ([0-9]+)(\.[0-9]+)?$' reo_title = re.compile(re_title) re_description_v1 = '^As of ([0-2][0-9]):([0-5][0-9]) GMT,' \ ' ([1-3]?[0-9]) ([A-Z][a-z]+) ([0-9]{4}),' \ ' ([1-9]?[0-9]+) countries have officially reported' \ ' ([1-9]?[0-9, ]+) cases of influenza A\(H1N1\) infection, including' \ ' ([1-9]?[0-9,]+) deaths.' reo_description_v1 = re.compile(re_description_v1) months_num2month = [ None, 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] months_month2num = { n:m for m,n in enumerate(months_num2month) } entries_new = {} for m in reversed(ms): # Update Number mo = reo_title.search(m[0]) if not mo: # E.g., after update 58 continue update = int(mo.group(1)) update_minor = mo.group(2) if update_minor is None: update_minor = '' print('Processing update ' + str(update) + '...') if update <= 49: # Update ?? - 49 mo = reo_description_v1.match(m[1]) assert mo d = datetime.datetime(int(mo.group(5)), # Year int(months_month2num[mo.group(4)]), # Month int(mo.group(3)), # Day int(mo.group(1)), # Hour int(mo.group(2)), # Minute ) areas = int(mo.group(6)) confirmeds = int(mo.group(7).replace(',', '').replace(' ', '')) deaths = int(mo.group(8).replace(',', '').replace(' ', '')) else: # Update 50 - ?? (latest) response = urllib.request.urlopen(m[3]) page = response.read().decode() #print('=>', m[3]) re_date = ( '([1-3]?[0-9]) ([A-Z][a-z]+) ([0-9]{4})' ' ([0-2][0-9]):([0-5][0-9]) GMT' ) reo_date = re.compile(re_date, re.DOTALL) ms = reo_date.findall(page) assert len(ms) == 1 m = ms[0] d = datetime.datetime(int(m[2]), # Year int(months_month2num[m[1]]), # Month int(m[0]), # Day int(m[3]), # Hour int(m[4]), # Minute ) re_table = ( 'Cases.*?Deaths.*?Cases.*?Deaths.*?' '(.+?)' 'Grand\s+Total(.+?)' ) reo_table = re.compile(re_table, re.DOTALL) ms_table = reo_table.findall(page) assert len(ms_table) == 1 m_table = ms_table[0] re_tr = '.*?(.+?).*?' reo_tr = re.compile(re_tr, re.DOTALL) ms_tr = reo_tr.findall(m_table[0]) #for i, m_tr in enumerate(ms_tr): # print('{0:3}: {1}'.format(i+1, m_tr)) areas = len(ms_tr) re_total = '.*?.*?([0-9]+).*?.*?.*?([0-9]+).*?' reo_total = re.compile(re_total, re.DOTALL) ms_total = reo_total.match(m_table[1]) confirmeds = int(ms_total.group(1)) deaths = int(ms_total.group(2)) line = (str(update) + update_minor + '\t' + d.strftime('%Y-%m-%d %H:%M') + '\t' + str(areas) + '\t' + str(confirmeds) + '\t' + str(deaths)) entry_new = line entries_new[update] = entry_new for i in sorted(entries_new.keys()): print(' ' + entries_new[i]) # Update data print('Updating existing data...') content = set(entries.values()) entries.update(entries_new) content_updated = set(entries.values()) if content == content_updated: print('No new entry and thus no update is required.') quit() # Make a backup file print('Creating a backup data file...') filename_new = 'h1n1.gpdat.' + str(len(entries)) os.system('cp h1n1.gpdat ' + filename_new) # Update data file print('Updating data file...') with open('h1n1.gpdat', 'w') as fp_data: fp_data.write('#Update\tDate & Time (GMT)\tAreas\tConfirmed\tDeaths\n') for i in range(0, len(entries)): line = entries[i+1] + '\n' fp_data.write(line) # Generate and preview charts print('Generating charts...') os.system('gnuplot h1n1.gp') print('Previewing charts...') os.system('display h1n1_confirmed.png') os.system('display h1n1_death.png') os.system('display h1n1_mortality.png') os.system('display h1n1_areas.png') # Upload files via FTP print('Uploading files to webpage...') print(' Logging in...') ftp = ftplib.FTP() ftp.connect('vyvy.org') username = input('Username? ') # You think I will write this down? password = input('Password? ') # You think I will just give you this??? ftp.login(username, password) ftp.cwd('public_html/main/sites/vyvy.org/files/') filenames = [ 'h1n1_confirmed.png', 'h1n1_areas.png', 'h1n1_death.png', 'h1n1_mortality.png', 'h1n1.gp', 'h1n1.gpdat', ] for filename in filenames: print(" Transferring '" + filename + "'...") with open(filename, mode='rb') as fp: ftp.storbinary('STOR ' + filename, fp) ftp.quit() print('Done!')