Source code for flatisfy.data

# coding: utf-8
"""
This module contains all the code related to building necessary data files from
the source opendata files.
"""
from __future__ import absolute_import, print_function, unicode_literals

import logging

import flatisfy.exceptions

from flatisfy import database
from flatisfy import data_files
from flatisfy.models.postal_code import PostalCode
from flatisfy.models.public_transport import PublicTransport
from flatisfy.tools import hash_dict

LOGGER = logging.getLogger(__name__)

# Try to load lru_cache
try:
    from functools import lru_cache
except ImportError:
    try:
        from functools32 import lru_cache
    except ImportError:
        def lru_cache(maxsize=None):  # pylint: disable=unused-argument
            """
            Identity implementation of ``lru_cache`` for fallback.
            """
            return lambda func: func
        LOGGER.warning(
            "`functools.lru_cache` is not available on your system. Consider "
            "installing `functools32` Python module if using Python2 for "
            "better performances."
        )


[docs]def preprocess_data(config, force=False): """ Ensures that all the necessary data have been inserted in db from the raw opendata files. :params config: A config dictionary. :params force: Whether to force rebuild or not. :return bool: Whether data have been built or not. """ # Check if a build is required get_session = database.init_db(config["database"], config["search_index"]) with get_session() as session: is_built = ( session.query(PublicTransport).count() > 0 and session.query(PostalCode).count() > 0 ) if is_built and not force: # No need to rebuild the database, skip return False # Otherwise, purge all existing data session.query(PublicTransport).delete() session.query(PostalCode).delete() # Build all opendata files LOGGER.info("Rebuilding data...") for preprocess in data_files.PREPROCESSING_FUNCTIONS: data_objects = preprocess() if not data_objects: raise flatisfy.exceptions.DataBuildError( "Error with %s." % preprocess.__name__ ) with get_session() as session: session.add_all(data_objects) LOGGER.info("Done building data!") return True
@hash_dict @lru_cache(maxsize=5) def load_data(model, constraint, config): """ Load data of the specified model from the database. Only load data for the specific areas of the postal codes in config. :param model: SQLAlchemy model to load. :param constraint: A constraint from configuration to limit the spatial extension of the loaded data. :param config: A config dictionary. :returns: A list of loaded SQLAlchemy objects from the db """ get_session = database.init_db(config["database"], config["search_index"]) results = [] with get_session() as session: areas = [] # Get areas to fetch from, using postal codes for postal_code in constraint["postal_codes"]: areas.append(data_files.french_postal_codes_to_quarter(postal_code)) # Load data for each area areas = list(set(areas)) for area in areas: results.extend( session.query(model) .filter(model.area == area).all() ) # Expunge loaded data from the session to be able to use them # afterwards session.expunge_all() return results