ServerPortfolio  2.0
Python parsers and server
 All Classes Namespaces Files Functions Variables Properties Pages
YahooYQL.py
Go to the documentation of this file.
1 ## @package serverportfolio.Parsers.YahooYQL
2 #
3 # Specific parser using Yahoo YQL interface
4 #
5 # Last changed $Id: YahooYQL.py 21 2015-04-19 19:27:46Z michael $
6 
7 # doc YQL
8 # https://developer.yahoo.com/yql/docs/
9 # YQL console
10 # https://developer.yahoo.com/yql/console/
11 
12 import sys, re, copy, logging
13 import urllib2
14 from urllib import quote_plus # urlencode
15 import datetime, time
16 
17 from lxml import html, etree
18 
19 # base class
20 from serverportfolio.Parsers.Abstract import AbstractParser
21 
22 # working, best way found to get direct access to GlobalDicts
23 from serverportfolio import GlobalDicts
24 from serverportfolio.GlobalDicts import EAction
25 from serverportfolio.StockTemplates import StTmpl
26 from serverportfolio.PortfolioException import PortfolioError, ParserError, QueryError
27 from serverportfolio import Utils
28 from serverportfolio.Parsers import UtilsParsers
29 
30 MULTI_PLUS = re.compile(r"\+{2,}")
31 MULTI_SPACE = re.compile(r" {2,}")
32 
33 #taken from yql/utils
34 def clean_url(url):
35  """Cleans up a uri/url"""
36  url = url.replace("\n", "")
37  return MULTI_PLUS.sub("+", url)
38 
39 def clean_query(query):
40  """Cleans up a query"""
41  query = query.replace("\n", "")
42  # added for tab in """ """" strings
43  query = query.replace("\t", "")
44  query = MULTI_SPACE.sub(" ", query)
45  return query
46 
47 ## @class YahooYQL
48 # @brief Use the Yahoo YQL API
50 
51  # Unique public address for all YQL queries, otherwise can register
52  __YQL_PUBLIC_URL='https://query.yahooapis.com/v1/public/yql?q='
53 
54  ## @brief Constructor.
55  # @param e_action EAction to perform
56  def __init__(self, e_action):
57  # init parent
58  super(YahooYQL, self).__init__(e_action)
59 
60  self.logger = logging.getLogger('SP.YahooYQL')
61  self.logger.debug("Constructor Parser_YahooYQL")
62 
63  self.source = "YQL"
64 
65  ## @brief Create the URL from a sql query.
66  # table (quote/quotes/stocks/.. defined in the query)
67  # all YQL queries accept multi-stocks ? seems yes
68  def create_url(self):
69  self.logger.debug("create_url")
70  self.logger.debug("e_action: %s" % self.e_action)
71  self.logger.debug("local stock: %s", self.local_stock)
72 
73  # only action supported by this function
74  if (self.e_action == EAction.Fundamental):
75  print "define query fundamental"
76  query="""
77  use "http://www.datatables.org/yahoo/finance/yahoo.finance.quotes.xml";
78  select * from yahoo.finance.quotes where symbol in (%s)
79  """
80  else:
81  raise ParserError("wrong action for YahooYQL %s" % self.e_action.name, \
82  self.local_stock.keys(), self.e_action.name, 'None')
83 
84  str_stocks_yahoo = self._format_symbols_url()
85  query = query % ( str_stocks_yahoo )
86  # case of tabulated string and mutlispace, clean the string
87  query=clean_query(query)
88  self.logger.debug("query: %s" % query)
89  self.logger.debug("encoded query: %s" % quote_plus(query) )
90 
91  self.url = self.__YQL_PUBLIC_URL + quote_plus(query)
92  self.logger.info("url: %s" % self.url)
93 
94  ## @brief Only for testing, take a full sql query as input(hard-coded in main section)
95  # @param sql string query
96  def create_url_test(self, sql_query):
97 
98  self.logger.debug("YahooYQL::create_url_test")
99 
100  # case of tabulated string, mutlispace
101  query=clean_query(sql_query)
102  print "query after clean ", query
103  # query_sql='select * from yahoo.finance.quote where symbol in ("YHOO","GSZ.PA")'
104  # urlib.quote_plus
105  print "encoded: ", quote_plus(query)
106  self.url = self.__YQL_PUBLIC_URL + quote_plus(query)
107  print "self.url: ", self.url
108 
109 
110  ## @brief Parse YQL request, always XML format.
111  # Name of the fields are available in the XML output.
112  # @param s page retrived by web_query
113  def parse(self, s):
114  self.logger.debug("parse()")
115  # load the XML file
116  root = etree.fromstring( s )
117  # nice output of the XML
118  self.logger.debug("%s" % etree.tostring(root, pretty_print=True, encoding='unicode'))
119 
120  try:
121  # Info done by run_parser_info
122  #if (self.e_action == EAction.Info):
123  #print "EAction == Info"
124  # self.parse_info( root )
125  if self.e_action == EAction.Fundamental:
126  self.parse_fund( root )
127  else:
128  self.logger.error("wrong action in parse: %s" % self.e_action.name )
129  except ParserError as ex:
130  self.logger.error("parse() catch ParserError: %s", ex)
131  raise
132  except Exception as ex:
133  self.logger.error("parse() catch Exception: %s", ex)
134  raise ParserError(ex, self.local_stock.keys(), self.e_action.name, self.url)
135 
136  ## @brief Parse XML for info action
137  # To get Industry,Sector and Name, this function is called 2 times by run_parser_info()\n
138  # \warning not fully working, table for yahoo.finance.stocks deprecated !
139  # @param s page retrieved by web_query
140  def parse_info(self, s):
141  self.logger.debug("parse_info()")
142 
143  new_created = False
144  # first call is None, creates list of template_parser
145  if self.list_return_data is None:
146  self.list_return_data = list()
147 
148  # format for Info, certainly common to other quote/quotes
149  # <query>
150  # <results>
151  # <stock symbol="faz"> (lower) or GSZ.PA FSLR
152  # do not crash if empty e.g. CAC40 Info
153 
154  # load the XML file into lxml structure
155  root = etree.fromstring( s )
156  # nice output of the XML
157  self.logger.debug("%s" % etree.tostring(root, pretty_print=True, encoding='unicode'))
158 
159  # loop over the stocks in xml
160  for stock_xml in root[0]:
161  # to assign to the list of dictionary
162  new_created = True
163  dict_data_stock = None
164 
165  symbol_yahoo = stock_xml.get("symbol")
166  symbol = self.get_symbol_from_code( symbol_yahoo, 'code_yahoo' )
167  self.logger.debug("symbol: %s" % symbol)
168 
169  # to be sure at least one element is added, crash(later ?) if toto alone, CAC40 is ok
170  # should check about symbol,action to be more specific
171  for elem in self.list_return_data:
172 
173  if elem['symbol'] == symbol:
174  dict_data_stock = elem
175  self.logger.debug("found symbol in list_return_data break")
176  new_created = False
177  break
178 
179  if new_created:
180  # first call create the template parser and the action
181  self.logger.debug("symbol not found in dict_return_data, create a new template parser")
182  #dict_data_stock = None
183  dict_data_stock = StTmpl.get_template_parser( self.e_action.name )
184  dict_data_stock['symbol'] = symbol
185  self.list_return_data.append( dict_data_stock )
186 
187  # exist for sure
188  self._fill_dict_from_xml( stock_xml, dict_data_stock['action_templ'][self.e_action.name] )
189 
190  ## @brief Extract fundamental data from the XML root element
191  # @param root XML root element
192  def parse_fund(self, root):
193  self.logger.debug("parse_fund")
194 
195  self.list_return_data = list()
196 
197  for stock_xml in root[0]:
198  # can check if an error is present, looking for the tag.
199  # should easily save it from here
200  error_code = stock_xml.find('ErrorIndicationreturnedforsymbolchangedinvalid').text
201  if error_code != None:
202  self.logger.warning("error code in YahooYQL:parse_fund %s" % error_code)
203  continue
204 
205  # get symbol of the stock, yahoo code is always present in output
206  symbol_yahoo = stock_xml.get("symbol")
207  symbol = self.get_symbol_from_code( symbol_yahoo, 'code_yahoo' )
208  self.logger.debug("symbol: %s" % symbol)
209  # assign template and symbol
210  dict_data_stock = StTmpl.get_template_parser( self.e_action.name )
211  dict_data_stock['symbol'] = symbol
212 
213  # make shortcut
214  tmpl_action = dict_data_stock['action_templ'][ self.e_action.name ]
215  # fill tmp_dict with values from stock_xml
216  self._fill_dict_from_xml( stock_xml, tmpl_action )
217 
218  # add this stock to the list
219  self.list_return_data.append( dict_data_stock )
220 
221  ## @brief Specific call to get Info.
222  # This version should be called at the place of run_parser only in the case of the EAction.Info with YahooYQL.\n
223  # Two parsing are needed to fill the template.
224  # @param stock list of stock symbol(s)
225  def run_parser_info(self, stock, option_post=None):
226  self.logger.debug("Entry run_parser_info")
227 
228  if option_post != None:
229  self.option_post = option_post
230  self.logger.debug('option_post is present: %s' % option_post)
231 
232  try :
233  self.store_stock_copy( Utils.to_list(stock) )
234  # could return an exception... to see with post-processing
235  if len(self.local_stock) == 0:
236  self.logger.error("No valid Stock after store_stock_copy")
237  return
238  except Exception as ex:
239  self.logger.debug("Caught Exception in store_stock_copy: ex", ex)
240  raise
241 
242  # first_url, get Sector/Industry
243  query = """
244  use "http://www.datatables.org/yahoo/finance/yahoo.finance.stocks.xml";
245  select * from yahoo.finance.stocks where symbol in (%s)
246  """
247  str_stocks_yahoo = self._format_symbols_url()
248  query = query % ( str_stocks_yahoo )
249 
250  # case of tabulated string and mutlispace
251  query=clean_query(query)
252  self.url = self.__YQL_PUBLIC_URL + quote_plus(query)
253  self.logger.info("url: %s" % self.url)
254 
255  # first query
256  s = None
257  try :
258  s = UtilsParsers.web_query( self.url )
259  # now correct QueryError
260  except QueryError as ex:
261  self.logger.error("Got a QueryError from web_query %s" % ex )
262  raise
263  # here crash
264  except Exception as ex:
265  self.logger.error("Got a general Exception from web_query %s" % ex )
266  # should raise a QueryError normally
267  raise ParserError( ex, self.local_stock.keys(), self.e_action.name, self.url )
268 
269  # first parsing
270  # try
271  self.parse_info(s)
272 
273  # second parsing, quote
274  query="""
275  use "http://www.datatables.org/yahoo/finance/quote/yahoo.finance.quote.xml";
276  select * from yahoo.finance.quote where symbol in (%s)
277  """
278  #str_stocks_yahoo = None
279  str_stocks_yahoo = self._format_symbols_url()
280  query = query % ( str_stocks_yahoo )
281  #print "intermediate query", query
282  # case of tabulated string and mutlispace
283  query=clean_query(query)
284  self.logger.debug("query: %s" % query)
285  self.logger.debug("encoded query: %s" % quote_plus(query) )
286 
287  self.url = self.__YQL_PUBLIC_URL + quote_plus(query)
288  self.logger.info("url: %s" % self.url)
289 
290  # second query
291  s = None
292  self.logger.debug("web_query")
293  try :
294  s = UtilsParsers.web_query( self.url )
295  except Exception as ex:
296  self.logger.error("Got a general Exception from web_query %s" % ex )
297  # should raise a QueryError normally
298  raise ParserError( ex, self.local_stock.keys(), self.e_action.name, self.url )
299 
300  # second parsing, same function
301  # try
302  # original, call the same, but sector/industry not present !
303  self.parse_info(s)
304 
305  # update the stock
306  self.update_stock()
307 
308 # could be static(except logger), or in utils
309 
310  ## @brief Fill a dictionary template from the tags in the XML.
311  # All unknown entries are stored in an 'other' sub-dictionary.\n
312  # @param stock_xml lxml element as input
313  # @param dict_stock dictionary to be filled as output
314  def _fill_dict_from_xml(self, stock_xml, dict_stock):
315  self.logger.debug("fill_dict_from_xml")
316  self.logger.debug("size stock_xml %d" % len(stock_xml) )
317  #self.logger.debug("%s" % etree.tostring(stock_xml, pretty_print=True, encoding='unicode'))
318 
319  for elem in stock_xml:
320  #print "elem.tag ", elem.tag
321  #print "elem.text ", elem.text
322  # entry is fixed in template
323  if elem.tag in dict_stock:
324  # print "Fixed entry"
325  # need to clean, check if empty or other
326  dict_stock[ elem.tag ] = elem.text
327  # assign to other, to test if 'other' exists in a specific templ_dict
328  else:
329  # print "Assign to other"
330  dict_stock['other'][ elem.tag ] = elem.text
331 
332 # from YahooCSV, to delete function or replace or utils.parsers
333 # code_yahoo = ','.join( stock.get_action('Static','code_yahoo') for stock in self.local_stock.values() )
335  compt = 0
336  str_stocks_yahoo = str()
337  for stock_symbol in self.local_stock:
338  str_stocks_yahoo += '"' + self.local_stock[stock_symbol].get_action('Static','code_yahoo') + '"'
339 
340  if compt < len(self.local_stock)-1:
341  str_stocks_yahoo += ','
342  compt += 1
343 
344  self.logger.debug("string stocks for url:%s",str_stocks_yahoo)
345  return str_stocks_yahoo
346 
347 # ####### Main for test only
348 # Implemented Info from yahoo.finance.stocks.xml:
349 # - Sector
350 # - Industry
351 # But NameCompany is not present !
352 # Can get from yahoo.finance.quote/quotes, quotes more complete or from any HTML page(maybe tricky)
353 #
354 # Can get from YahooCSV Fundamental, parsing more tricky move to Fundamental not very logic, or deal in update...
355 #
356 # Can merge Info/Fundamental or make 2 parsing for Info only.
357 # Update Name/Sector/Industry to do only once ! update info to write in YQL !
358 #
359 # todo add arguments (normal use Stock Action) and option to test simple sql query, partly done
360 if __name__ == "__main__":
361 
362  print "YahooYQL main"
363 
364  from serverportfolio.DictionaryStocks import DictionaryStocks
365  import UtilsParsers
366  import types, traceback
367  #print "End import"
368 
369  logging.basicConfig(level=logging.DEBUG)
370  # logging.basicConfig(filename='myapp.log', level=logging.INFO) # filemode='w' to not append to file
371  logging.getLogger('SP')
372  m_logger = logging.getLogger("SP.main")
373 
374  # call not needed, loaded on demand by parsers or stocks
375  #dict_stocks = DictionaryStocks()
376 
377  try:
378  m_stock = sys.argv[1]
379  m_action = sys.argv[2]
380  except IndexError:
381  print " Only for debugging, should call ./Run_Parsers for all options"
382  print " Usage:"
383  print " YahooYQL FSLR,GSZ Info [-opt_yql]"
384  print " Action: Info / Fundamental"
385  print " optional -opt_yql to run a fixed(hard-coded) sql query and print the XML only, for testing"
386  sys.exit(1)
387 
388  # transform to a list, from format "Stock1,Stock2,.."
389  m_list_stock = m_stock.split(',')
390 
391  try :
392  if type(m_action) == types.StringType:
393  m_e_action = EAction[m_action]
394  # isisntance(EAction)
395  else:
396  m_e_action = m_action
397  except :
398  m_logger.error("Exception from EAction enumeration, cannot assign an e_action from action: %s" % m_action)
399  sys.exit(1)
400 
401  # read option, should test argument 1
402  m_option_sql = str()
403  if len(sys.argv) == 4:
404  m_option_sql = sys.argv[3]
405 
406  m_opt_sql = False
407  if m_option_sql == '-opt_sql':
408  m_opt_sql = True
409  print "opt_sql ", m_opt_sql
410 
411 # test with opt_sql use an explicit sql request
412  if m_opt_sql:
413  print "\n== Test only SQL query"
414 
415  # action not used. At least can get the XML file to compare
416  m_parser = YahooYQL( EAction.Info )
417 
418  # example use finance.stocks.xml table
419  #query_sql="""
420  # use "http://www.datatables.org/yahoo/finance/yahoo.finance.stocks.xml";
421  # select * from yahoo.finance.stocks where symbol in ("GSZ.PA","FSLR")
422  #"""
423 
424  # example use finance.quotes.xml
425  #query_sql="""
426  # use "http://www.datatables.org/yahoo/finance/yahoo.finance.quotes.xml";
427  # select * from yahoo.finance.quotes where symbol in ("GSZ.PA","FSLR")
428  #"""
429 
430  # example use finance.quote.xml, less output, a filter is applied
431  # can be used for Info
432  m_query_sql="""
433  use "http://www.datatables.org/yahoo/finance/quote/yahoo.finance.quote.xml";
434  select * from yahoo.finance.quote where symbol in ("GSZ.PA","FSLR")
435  """
436 
437  m_parser.create_url_test( m_query_sql )
438  print "m_parser.url ", m_parser.url
439 
440  try:
441  m_s = UtilsParsers.web_query( m_parser.url )
442 
443  except PortfolioError as ex:
444  print "Catch PortfolioError from web_query"
445  print "ex: ", ex
446  print "get_format_string\n", ex.get_format_string()
447  sys.exit(1)
448 
449  except Exception as ex:
450  print "Catch exception from web_query"
451  print "ex: ", ex
452  #print "get_format_string ", ex.get_format_string()
453  sys.exit(1)
454 
455  print m_s
456 
457  # at least can get formatted XML to see available data
458  # general load the XML file
459  m_root = etree.fromstring( m_s )
460  #print "XML pretty:"
461  print(etree.tostring(m_root, pretty_print=True, encoding='unicode'))
462 
463  # exit after execution
464  sys.exit(0)
465 
466 # normal run with Info or Fundamental
467  m_parser = YahooYQL( m_e_action )
468 
469  try :
470  if m_e_action == EAction.Info:
471  m_parser.run_parser_info( m_list_stock )
472  else:
473  m_parser.run_parser( m_list_stock )
474 
475  except PortfolioError as ex:
476  print "Caught PortfolioError from run_parser_info()"
477  print ex.get_format_string()
478  except Exception as ex:
479  m_logger.debug("Caught Exception from store_stock_copy: %s", ex)
480  raise
481 
482  # simple output
483  for stock in m_list_stock:
484  print Utils.pretty_dict( DictionaryStocks().get_stocks( stock ).get_action() )
485 
486  # can skip the config explicitly or include in pretty_dict
487  #for stock in m_list_stock:
488  # print Utils.pretty_dict( DictionaryStocks().get_stocks( stock ).get_action(m_e_action.name) )
489 
490  sys.exit(0)
491 
492  # Example from console
493  # desc yahoo.finance.quotes
494  #url='https://query.yahooapis.com/v1/public/yql?q=desc%20yahoo.finance.quotes&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys'
495  # same with json, default XML
496  #url='https://query.yahooapis.com/v1/public/yql?q=desc%20yahoo.finance.quotes&format=json&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback='
497 
498  # quote table
499  #url='https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quote%20where%20symbol%20in%20(%22YHOO%22%2C%22GSZ%22)&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys'
500  # without additional info, Table not found, GSZ code is wrong, should be GSZ.PA, good for test
501  # last test working url + url+env
502  #url='https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quote%20where%20symbol%20in%20(%22YHOO%22%2C%22GSZ%22)'
503 
504  # test1, ok works with env use all tables
505  # query = 'https://query.yahooapis.com/v1/public/yql?q='
506  # test test2 specify the table to use in SQL request, ok working also
507  # query_sql='use "http://www.datatables.org/yahoo/finance/quote/yahoo.finance.quote.xml" as foo;select * from foo where symbol in ("YHOO","GSZ.PA")'
508  # CreateQuerySQL
509  # working in test version
510  # query_sql="""
511  # use "http://www.datatables.org/yahoo/finance/quote/yahoo.finance.quote.xml" as foo;
512  # select * from foo where symbol in ("YHOO","GSZ.PA")
513  #"""
514  # ok in test version
515  #query_sql="""
516  # use "http://www.datatables.org/yahoo/finance/yahoo.finance.stocks.xml";
517  # select * from yahoo.finance.stocks where symbol="%s"
518  #"""
519 
520  # for test1
521  # example, http://stackoverflow.com/questions/16783175/yql-no-definition-found-for-table
522  # to load all Open table: env="store://datatables.org/alltableswithkeys"
523  # normal use with env= in url, here use all tables
524  #url+='&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys'
525 
526  # test2, table to use in the sql command, could use other table, split use and table and query
527  # allow to test more OpenTable quote/quotes/stocks,
528  # query = 'use "http://www.datatables.org/yahoo/finance/yahoo.finance.option_contracts.xml" as foo;
529  # SELECT * FROM foo WHERE symbol="SPY"'
e_action
enumeration (EAction) of the type of query to perform
Definition: Abstract.py:40
def _fill_dict_from_xml
Fill a dictionary template from the tags in the XML.
Definition: YahooYQL.py:314
def store_stock_copy
Make a local copy of the Stock objects (linked of the original in DictionaryStocks in fact) into a lo...
Definition: Abstract.py:163
def parse
Parse YQL request, always XML format.
Definition: YahooYQL.py:113
Define the global variable StockTemplates.StTmpl and dictionary templates.
def parse_info
Parse XML for info action To get Industry,Sector and Name, this function is called 2 times by run_par...
Definition: YahooYQL.py:140
Define 2 abstract methods which need to be overridden by the Parsers and a generic algorithm (run_pa...
Definition: Abstract.py:29
Define custom and specific exceptions for the complete package.
Derived class specific to the parsers.
Define an abstract base class for specific Parsers.
Definition: Abstract.py:1
def get_symbol_from_code
Retrieve the symbol (or Stock) from the code_yahoo or code_bourso.
Definition: Abstract.py:211
def create_url
Create the URL from a sql query.
Definition: YahooYQL.py:68
Container of all Stocks objects, it also reads the static stocks configuration file "dictstocks...
Global variables for configuration: paths, TCP ports and generic definitions.
Definition: GlobalDicts.py:1
url
save url, useful for reporting errors and exceptions
Definition: Abstract.py:37
def update_stock
Update the local stocks with the new retrieved data.
Definition: Abstract.py:183
def run_parser_info
Specific call to get Info.
Definition: YahooYQL.py:225
def create_url_test
Only for testing, take a full sql query as input(hard-coded in main section)
Definition: YahooYQL.py:96
def parse_fund
Extract fundamental data from the XML root element.
Definition: YahooYQL.py:192
Define singleton class DictionaryStocks, act as the main container of Stocks objects.