13 import types, re, copy, logging
15 from time
import strftime, localtime
17 from lxml
import html, etree
19 from serverportfolio
import Utils
35 'variation': [
'Variation'],
36 'dernier echange': [
'Dernier'],
37 'volume': [
'Volume '],
38 'ouverture': [
'Ouverture'],
39 'plushaut': [
'+ Haut'],
44 __INSTVALUE_STOCK_URL=
"http://www.boursorama.com/cours.phtml?symbole="
49 AbstractParser.__init__(self, action )
50 self.
logger = logging.getLogger(
'SP.Parser_Bourso')
57 self.logger.debug(
"create_url()")
58 self.logger.debug(
"local stock: %s", self.
local_stock)
61 "Parser_Bourso deals only with one Stock at a time, nb: %d" % len(self.
local_stock)
63 stock_symbol = self.local_stock.keys()[0]
68 self.logger.info(
"url: %s" % self.
url)
70 except Exception
as ex:
71 self.logger.error(
'Caught Exception in create_url: %s', ex)
72 raise ParserError(
"Cannot create url", stock_symbol, self.e_action.name,
None )
78 self.logger.debug(
"entry parse")
88 self.logger.debug(
"list_return_data action template: %s" % self.
list_return_data)
94 htmltree = html.document_fromstring( html_page )
97 table_tmp_extracted = htmltree.xpath(
"//table[@class='info-valeur list']")
103 if len(table_tmp_extracted) > 0:
104 table_extracted = table_tmp_extracted[0]
109 self.logger.error(
"Could not extract the table from the html page, url, self.url %s", self.
url)
112 self.logger.error(
"full html page:\n%s" % html_page)
113 self.logger.error(
"list_return_data with error: %s" % self.
list_return_data)
114 raise ParserError(
"Could not extract the table from the html page", \
118 for name
in self.dict_record.keys():
123 except Exception
as ex:
124 self.logger.debug(
"Catch Exception in _extract_data: %s" % ex)
125 raise ParserError(ex, self.local_stock.keys()[0], self.e_action.name, self.
url)
137 stock_symbol = self.local_stock.keys()[0]
141 if name ==
"dernier echange":
144 if stock_obj.get_market() ==
'devise':
145 dict_inst_value[
'date'] = strftime(
"%Y-%m-%d", localtime() )
146 dict_inst_value[
'time'] = strftime(
"%H:%M:%S", localtime() )
152 dict_inst_value[
'date'] = date
153 dict_inst_value[
'time'] = time
158 elif name ==
"value":
162 dict_inst_value[
'value'] = value
163 dict_inst_value[
'state'] = state
170 dict_inst_value[name] = tmp_data
181 query =
".//*[text()='%s']" % ( self.
dict_record[name][0] )
190 source = table.xpath( query )
196 tmp_data = source.xpath(
"..//td[last()]//text()")
201 except Exception
as ex:
202 print "got exception in _extract_data_generic ex:", ex
203 self.logger.error(
"Exception in reading Volume of EURUS (at least)")
217 return float(
''.join(re.findall(
r'([\d+\s\.\+\-])',line)).replace(
' ',
''))
224 cours = table.xpath(
".//*[text()='Cours']")
226 str_value = cours[0].xpath(
"../..//td[last()]//text()")
236 no = str_value[2].find(
'(c)')
242 return ( value, state )
252 dernier = table.xpath(
".//*[contains(text(),'Dernier')]")
257 tmp_datetime = dernier[0].xpath(
"../td[last()]//text()")
264 if len(tmp_datetime) == 1:
267 text_ascii = tmp_datetime[0].encode(
'ascii',
'replace')
269 tab_datetime = text_ascii.rsplit(
'?')
270 date_tmp=datetime.datetime.strptime( tab_datetime[0],
"%d/%m/%y" )
271 time_tmp=tab_datetime[1]
275 elif len(tmp_datetime) == 2:
277 text_ascii = tmp_datetime[0].encode(
'ascii',
'replace')[:-1]
279 date_tmp=datetime.datetime.strptime( text_ascii,
"%d/%m/%y" )
280 time_tmp=tmp_datetime[1]
285 self.logger.error(
"_extract_data_dernier tmp_datetime %s" % tmp_datetime )
287 raise ParserError(
'Cannot extract date of the dernier echange',
288 self.local_stock.keys()[0], self.e_action.name, self.
url)
291 return ( date_tmp.strftime(
"%Y-%m-%d"), time_tmp )
328 if __name__ ==
"__main__":
334 logging.basicConfig(level=logging.DEBUG)
335 logger = logging.getLogger(
'SP')
336 main_logger = logging.getLogger(
"SP.main")
342 print " Only for debugging, should call ./Run_Parsers for all options "
344 print " Parser_Bourso CAC40 InstValue"
345 print " Action: InstValue only implemented"
350 list_stock.append(stock)
354 print "\n== store copy of object in parser"
356 parser.store_stock_copy( Utils.to_list(stock) )
358 print "Caught error in store_stock_copy"
361 print "\n== create_url"
364 except Exception
as ex:
365 print "Catch Exception ", ex
368 print "\n== web_query "
370 s = UtilsParsers.web_query( parser.url )
372 except Exception
as ex:
373 print "Got exception from web_query: ", ex
376 print "\n== parse the web page"
379 except ParserError
as ex:
380 print "Main catch PaserError ", ex
381 print ex.get_format_string()
383 except Exception
as ex:
384 print "Catch standard exception ", ex
386 print "\n== update the stock(s)"
387 parser.update_stock()
389 print Utils.pretty_dict(
DictionaryStocks().get_stocks( list_stock ).get_action() )
391 print "\n== sleep 5, modify state and second update "
394 stock.set_action(
'InstValue',
'state',
'OPEN')
397 parser.run_parser( list_stock )
398 except PortfolioError
as ex:
399 print "Caught PortfolioEorror"
400 print "ex:", ex.get_format_string()
402 except Exception
as ex:
403 print "Catch standard exception ", ex
405 print Utils.pretty_dict(
DictionaryStocks().get_stocks( list_stock ).get_action() )
e_action
enumeration (EAction) of the type of query to perform
dictionary dict_record
Make the correspondence between the html page and the template InstValue.
Define the global variable StockTemplates.StTmpl and dictionary templates.
def parse
Parse the html page, specific to instantaneous values.
def _extract_data_value
Specific to extract the value and the state of the stock (OPEN/CLOSED).
Define 2 abstract methods which need to be overridden by the Parsers and a generic algorithm (run_pa...
def create_url
Create the url with the code of the stock.
def _extract_data_generic
Generic version to parse values, extract the last text() from a td entry.
Define custom and specific exceptions for the complete package.
Derived class specific to the parsers.
Define an abstract base class for specific Parsers.
string __INSTVALUE_STOCK_URL
url format for boursorama, to complete with code_bourso
def _extract_number
General function to extract all numbers ( price, volume...)
Container of all Stocks objects, it also reads the static stocks configuration file "dictstocks...
Global variables for configuration: paths, TCP ports and generic definitions.
url
save url, useful for reporting errors and exceptions
def _extract_data_dernier
Specific to get the date and time of the dernier echange.
def _extract_data
Extract the instantaneous values from the main lxml etree table.
Define singleton class DictionaryStocks, act as the main container of Stocks objects.
Specific parser for querying the Boursorama website, only for instantaneous data. ...