# Generate charts from WHO updates on Influenza A(H1N1)
#
# Copyright (C) 2009 http://www.vyvy.org/
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
#
# Notes:
# - This simple program depends on a well-formatted RSS feed from WHO updates
# - This program relies on uncaught exceptions for very simple error handling!
#
# 10-Jun-09: First working version (First used for Update 47)
# 13-Jun-09: Added code for uploading files to the website
# 09-Jul-09: Improve code to tackle format since Update 50
import os
import urllib.request
import re
import datetime
import ftplib
# Read existing data file
print('Reading existing data file...')
re_update = '^([0-9]+)(\.[0-9]+)?\t'
reo_update = re.compile(re_update)
entries = {}
with open('h1n1.gpdat') as fp_data:
for i, line in enumerate(fp_data):
# Ignore the header
if line[:7] == '#Update':
continue
# Parse the line
line = line.rstrip() # Get rid of newline characters
mo = reo_update.match(line)
update = int(mo.group(1)) # Update number
assert i == update
update_minor = mo.group(2) # Minor update version
# Add the entry
entry = line
entries[update] = line
for i in sorted(entries.keys()):
print(' ' + entries[i])
# Fetch the page of RSS feed
print('Fetching RSS feed...')
rssfeed_url = "http://www.who.int/feeds/entity/csr/disease/swineflu/en/rss.xml"
response = urllib.request.urlopen(rssfeed_url)
page = response.read().decode()
# Parse the page
re_item = ('.*?'
'\s*?(.+?)\s*?.*?'
'\s*?(.+?)\s*?.*?'
'\s*?(.+?)\s*?.*?'
'\s*?(.+?)\s*?.*?'
'')
reo_item = re.compile(re_item, re.DOTALL)
ms = reo_item.findall(page)
assert len(ms) == 10 # Expect 10 items
print('Parsing ' + str(len(ms)) + ' entries...')
#re_title = '^Influenza A\(H1N1\) - update ([0-9]+)(\.[0-9]+)?$'
re_title = 'update ([0-9]+)(\.[0-9]+)?$'
reo_title = re.compile(re_title)
re_description_v1 = '^As of ([0-2][0-9]):([0-5][0-9]) GMT,' \
' ([1-3]?[0-9]) ([A-Z][a-z]+) ([0-9]{4}),' \
' ([1-9]?[0-9]+) countries have officially reported' \
' ([1-9]?[0-9, ]+) cases of influenza A\(H1N1\) infection, including' \
' ([1-9]?[0-9,]+) deaths.'
reo_description_v1 = re.compile(re_description_v1)
months_num2month = [ None,
'January', 'February', 'March', 'April',
'May', 'June', 'July', 'August',
'September', 'October', 'November', 'December']
months_month2num = { n:m for m,n in enumerate(months_num2month) }
entries_new = {}
for m in reversed(ms):
# Update Number
mo = reo_title.search(m[0])
if not mo: # E.g., after update 58
continue
update = int(mo.group(1))
update_minor = mo.group(2)
if update_minor is None:
update_minor = ''
print('Processing update ' + str(update) + '...')
if update <= 49: # Update ?? - 49
mo = reo_description_v1.match(m[1])
assert mo
d = datetime.datetime(int(mo.group(5)), # Year
int(months_month2num[mo.group(4)]), # Month
int(mo.group(3)), # Day
int(mo.group(1)), # Hour
int(mo.group(2)), # Minute
)
areas = int(mo.group(6))
confirmeds = int(mo.group(7).replace(',', '').replace(' ', ''))
deaths = int(mo.group(8).replace(',', '').replace(' ', ''))
else: # Update 50 - ?? (latest)
response = urllib.request.urlopen(m[3])
page = response.read().decode()
#print('=>', m[3])
re_date = (
'([1-3]?[0-9]) ([A-Z][a-z]+) ([0-9]{4})'
' ([0-2][0-9]):([0-5][0-9]) GMT'
)
reo_date = re.compile(re_date, re.DOTALL)
ms = reo_date.findall(page)
assert len(ms) == 1
m = ms[0]
d = datetime.datetime(int(m[2]), # Year
int(months_month2num[m[1]]), # Month
int(m[0]), # Day
int(m[3]), # Hour
int(m[4]), # Minute
)
re_table = (
'Cases.*?Deaths.*?Cases.*?Deaths.*?'
'(.+?)'
'Grand\s+Total(.+?)'
)
reo_table = re.compile(re_table, re.DOTALL)
ms_table = reo_table.findall(page)
assert len(ms_table) == 1
m_table = ms_table[0]
re_tr = '
.*?(.+?).*?'
reo_tr = re.compile(re_tr, re.DOTALL)
ms_tr = reo_tr.findall(m_table[0])
#for i, m_tr in enumerate(ms_tr):
# print('{0:3}: {1}'.format(i+1, m_tr))
areas = len(ms_tr)
re_total = '.*?.*?([0-9]+).*?.*?.*?([0-9]+).*?'
reo_total = re.compile(re_total, re.DOTALL)
ms_total = reo_total.match(m_table[1])
confirmeds = int(ms_total.group(1))
deaths = int(ms_total.group(2))
line = (str(update) + update_minor
+ '\t' + d.strftime('%Y-%m-%d %H:%M')
+ '\t' + str(areas)
+ '\t' + str(confirmeds)
+ '\t' + str(deaths))
entry_new = line
entries_new[update] = entry_new
for i in sorted(entries_new.keys()):
print(' ' + entries_new[i])
# Update data
print('Updating existing data...')
content = set(entries.values())
entries.update(entries_new)
content_updated = set(entries.values())
if content == content_updated:
print('No new entry and thus no update is required.')
quit()
# Make a backup file
print('Creating a backup data file...')
filename_new = 'h1n1.gpdat.' + str(len(entries))
os.system('cp h1n1.gpdat ' + filename_new)
# Update data file
print('Updating data file...')
with open('h1n1.gpdat', 'w') as fp_data:
fp_data.write('#Update\tDate & Time (GMT)\tAreas\tConfirmed\tDeaths\n')
for i in range(0, len(entries)):
line = entries[i+1] + '\n'
fp_data.write(line)
# Generate and preview charts
print('Generating charts...')
os.system('gnuplot h1n1.gp')
print('Previewing charts...')
os.system('display h1n1_confirmed.png')
os.system('display h1n1_death.png')
os.system('display h1n1_mortality.png')
os.system('display h1n1_areas.png')
# Upload files via FTP
print('Uploading files to webpage...')
print(' Logging in...')
ftp = ftplib.FTP()
ftp.connect('vyvy.org')
username = input('Username? ') # You think I will write this down?
password = input('Password? ') # You think I will just give you this???
ftp.login(username, password)
ftp.cwd('public_html/main/sites/vyvy.org/files/')
filenames = [
'h1n1_confirmed.png',
'h1n1_areas.png',
'h1n1_death.png',
'h1n1_mortality.png',
'h1n1.gp',
'h1n1.gpdat',
]
for filename in filenames:
print(" Transferring '" + filename + "'...")
with open(filename, mode='rb') as fp:
ftp.storbinary('STOR ' + filename, fp)
ftp.quit()
print('Done!')