############################################################################### # Date: Fri Aug 10 21:19:32 CDT 2007 # Author: John Quigley # Revision: $Id$ ############################################################################### import os import re import time import logging from paste import request from paste import wsgiwrappers from search.chump import ChumpQuery from search.engine import DbxmlEngine from search.engine import InotifyWatcher import toil #------------------------------------------------------------------------------ # Search application class Application(toil.ApplicationInterface): logger = logging.getLogger('Application') def __init__(self, conf): self.conf = conf self.debug = conf.getboolean('TOIL', 'debug') self.corpus = list() self.templates = dict() self.isInitialized = False def filter_fun(path): return os.path.isfile(path) and filter_fun.regexp.search(path) filter_fun.regexp = re.compile(self.conf.get('APP','corpus.regex')) self.engine = DbxmlEngine(conf.get('APP','path.container'), ChumpQuery, filter_fun) self.watcher = InotifyWatcher(self.conf.get('APP','path.corpus'), self.engine, filter_fun) def initialize(self): self.engine.initialize() self._loadTemplates() #TODO: self.logger.trace('successfully loaded templates: ' + repr(self.templates)) # TODO: allow path.corpus to specify multiple paths, separated by ":" self.engine.indexCorpus(self.conf.get('APP','path.corpus')) self.watcher.start() self.isInitialized = True def handler(self, environ, start_response): start_time = time.time() host = environ['REMOTE_ADDR'] port = environ['REMOTE_PORT'] req = environ['SCRIPT_URL'] query = environ['QUERY_STRING'] root = self.conf.get('APP', 'web.root') self.logger.info('handling request %s from %s:%s' % (req,host, port)) #self.logger.trace('request environ: %(environ)s' % locals()) # reload templates with every request when we're in debug mode if self.debug: self._loadTemplates() # dispatch on the URL, default response is the index # TODO: improve this with chump-specific implementations of toil request handlers isxml = False if req == root + '/advanced': content = self.templates['advanced'] elif req == root + '/find': isxml = True content = self.engine.find(request.parse_querystring(environ)) elif req == root + '/stats': raise NotImplementedError, "statistics not implemented" else: content = self.templates['index'] response = wsgiwrappers.WSGIResponse() # wrap the content with headers and footers and # set content-type depending on type of output if isxml: content = self.templates['xml-header'] + content + self.templates['xml-footer'] response.headers['Content-Type'] = 'text/xml' else: content = self.templates['header'] + content + self.templates['footer'] response.write(content) self.logger.debug('request processing duration: %fs' % (time.time() - start_time)) return response(environ, start_response) def shutdown(self): self.logger.info('caught shutdown signal') # TODO: is there anything else that needs to be done to ensure clean dbxml term? self.engine.sync() self.watcher.stop() self.watcher.join() def _loadTemplates(self): # for (l-value, r-value) in [APP] whose l-value match /^path.tmpl/ and # r-value is an existing file, read in data to self.templates of type dict for l,r in self.conf.items('APP'): if re.search('^path.tmpl', l) and os.path.isfile(r): fin = file(r) self.templates[re.split('\.',l)[-1]] = fin.read() fin.close() #self.logger.trace('loaded template: ' + os.path.basename(r)) self.logger.debug('successfully loaded page templates') # EOF