ServerPortfolio  2.0
Python parsers and server
 All Classes Namespaces Files Functions Variables Properties Pages
UtilsParsers.py
Go to the documentation of this file.
1 ## @package serverportfolio.Parsers.UtilsParsers
2 # @brief Utilities functions specific to the parsers
3 #
4 # Last Changed $Id: UtilsParsers.py 13 2015-04-12 19:45:14Z michael $
5 
6 import sys, types, re
7 import urllib2
8 
9 from serverportfolio.PortfolioException import QueryError
10 
11 ## @brief Execute the url (GET call only, could extend)
12 # @return web page in a string format
13 def web_query( url ):
14  print "Entry Utils.web_query url ", url
15  # contains the whole download page
16  s = None
17  # load web page, works for all
18  try :
19  f = urllib2.urlopen(url)
20  s = f.read()
21  # should be finally
22  f.close()
23  # do report the bug seen at work ??
24  # not sure about those errors
25  except urllib2.URLError, e:
26  # re-throw error
27  #raise
28  #print "Caught urllib2.URLError "
29  message_str="QueryError with"
30  if hasattr(e, 'reason'):
31  message_str += ' reason: %s,' % e.reason
32  if hasattr(e, 'code'):
33  message_str += ' error code: %s' % e.code
34  raise QueryError( message_str, url)
35  # should not happen
36  except Exception, ex:
37  print "Got general exception from urllib2 ", ex
38  raise
39  #finally:
40  # print "Error close the file"
41  # f.close()
42  return s
43 
44 # not sure general is good, local to Parsers will be more specific (too many different cases)
45 # copy from Parser_Bourso, made general here or keep in Yahoo/Specific Yahoo YQL
46 
47 ## @brief Extract a float from a string.
48 # General function which checks for 'N/A', '-' and delete blank characters.
49 # Specific parsers may implement more pseific functions.
50 # @param line string to parse
51 # @return float or nan
52 # may throw different type of error
53 def extract_number ( line ):
54  #print "UtilsParsers.extract_number "
55  #print "line: ", line
56  tmp = ''
57  if ('N/A' in line) | (line.strip() == '-'):
58  #print "Not a number"
59  return float('nan')
60  #print "result: ", float(tmp.join(re.findall(r'([\d+\s\.\+\-])',line)).replace(' ',''))
61  # do not run, more check to do with re. Should compile the query
62  #print "result: ", float(tmp.join(re.findall(r'([\d+\s\.\+\-\[N\\\A]])',line)).replace(' ',''))
63  #return float(tmp.join(re.findall(r'([\d+\s\.\+\-])',line)).replace(' ',''))
64  return float(tmp.join(re.findall(r'([\d+\s\.\+\-])',line)).replace(' ',''))
65 
66 ## @brief Check for N/A in Yahoo output, 'N/A' no error
67 # @param line string to parse
68 def check_nan( line ):
69  if ('N/A' in line):
70  return True
71  return False
def check_nan
Check for N/A in Yahoo output, 'N/A' no error.
Definition: UtilsParsers.py:68
Specific exception related to the web query (Utils.WebQuery)
Define custom and specific exceptions for the complete package.
def web_query
Execute the url (GET call only, could extend)
Definition: UtilsParsers.py:13
def extract_number
Extract a float from a string.
Definition: UtilsParsers.py:53