Source code for flatisfy.config

# coding: utf-8
"""
This module handles the configuration management for Flatisfy.

It loads the default configuration, then overloads it with the provided config
file and then overloads it with command-line options.
"""
from __future__ import absolute_import, print_function, unicode_literals
from builtins import str

import json
import logging
import os
import sys
import traceback

import appdirs
from weboob.capabilities.housing import POSTS_TYPES, HOUSE_TYPES

from flatisfy import data
from flatisfy import tools
from flatisfy.constants import TimeToModes
from flatisfy.models.postal_code import PostalCode


# Default configuration
DEFAULT_CONFIG = {
    # Constraints to match
    "constraints": {
        "default": {
            "type": None,  # RENT, SALE, SHARING
            "house_types": [],  # List of house types, must be in APART, HOUSE,
                                # PARKING, LAND, OTHER or UNKNOWN
            "postal_codes": [],  # List of postal codes
            "area": (None, None),  # (min, max) in m^2
            "cost": (None, None),  # (min, max) in currency unit
            "rooms": (None, None),  # (min, max)
            "bedrooms": (None, None),  # (min, max)
            "minimum_nb_photos": None,  # min number of photos
            "description_should_contain": [],  # list of terms
            "description_should_not_contain": [],  # list of terms
            "time_to": {}  # Dict mapping names to {"gps": [lat, lng],
                           #                        "time": (min, max),
                           #                        "mode": Valid mode }
                           # Time is in seconds
        }
    },
    # Whether or not to store personal data from housing posts (phone number
    # etc)
    "store_personal_data": False,
    # Max distance between an housing and a found station, to avoid
    # false-positive
    "max_distance_housing_station": 1500,
    # Score to consider two flats as being duplicates
    "duplicate_threshold": 15,
    # Score to consider two images as being duplicates through hash comparison
    "duplicate_image_hash_threshold": 10,
    # Whether images should be downloaded and served locally
    "serve_images_locally": True,
    # Navitia API key
    "navitia_api_key": None,
    # Mapbox API key
    "mapbox_api_key": None,
    # Number of filtering passes to run
    "passes": 3,
    # Maximum number of entries to fetch
    "max_entries": None,
    # Directory in wich data will be put. ``None`` is XDG default location.
    "data_directory": None,
    # Path to the modules directory containing all Weboob modules. ``None`` if
    # ``weboob_modules`` package is pip-installed, and you want to use
    # ``pkgresource`` to automatically find it.
    "modules_path": None,
    # SQLAlchemy URI to the database to use
    "database": None,
    # Path to the Whoosh search index file. Use ``None`` to put it in
    # ``data_directory``.
    "search_index": None,
    # Web app port
    "port": 8080,
    # Web app host to listen on
    "host": "127.0.0.1",
    # Web server to use to serve the webapp (see Bottle deployment doc)
    "webserver": None,
    # List of Weboob backends to use (default to any backend available)
    "backends": None,
    # Should email notifications be sent?
    "send_email": False,
    "smtp_server": 'localhost',
    "smtp_port": 25,
    "smtp_from": "noreply@flatisfy.org",
    "smtp_to": [],
    # The web site url, to be used in email notifications. (doesn't matter
    # whether the trailing slash is present or not)
    "website_url": "http://127.0.0.1:8080"
}

LOGGER = logging.getLogger(__name__)


[docs]def validate_config(config, check_with_data): """ Check that the config passed as argument is a valid configuration. :param config: A config dictionary to fetch. :param check_with_data: Whether we should use the available OpenData to check the config values. :return: ``True`` if the configuration is valid, ``False`` otherwise. """ def _check_constraints_bounds(bounds): """ Check the bounds for numeric constraints. """ assert isinstance(bounds, list) assert len(bounds) == 2 assert all( x is None or ( isinstance(x, (float, int)) and x >= 0 ) for x in bounds ) if bounds[0] is not None and bounds[1] is not None: assert bounds[1] > bounds[0] try: # Note: The traceback fetching code only handle single line asserts. # Then, we disable line-too-long pylint check and E501 flake8 checks # and use long lines whenever needed, in order to have the full assert # message in the log output. # pylint: disable=locally-disabled,line-too-long assert config["passes"] in [0, 1, 2, 3] assert config["max_entries"] is None or (isinstance(config["max_entries"], int) and config["max_entries"] > 0) # noqa: E501 assert config["data_directory"] is None or isinstance(config["data_directory"], str) # noqa: E501 assert os.path.isdir(config["data_directory"]) assert isinstance(config["search_index"], str) assert config["modules_path"] is None or isinstance(config["modules_path"], str) # noqa: E501 assert config["database"] is None or isinstance(config["database"], str) # noqa: E501 assert isinstance(config["port"], int) assert isinstance(config["host"], str) assert config["webserver"] is None or isinstance(config["webserver"], str) # noqa: E501 assert config["backends"] is None or isinstance(config["backends"], list) # noqa: E501 assert isinstance(config["send_email"], bool) assert config["smtp_server"] is None or isinstance(config["smtp_server"], str) # noqa: E501 assert config["smtp_port"] is None or isinstance(config["smtp_port"], int) # noqa: E501 assert config["smtp_to"] is None or isinstance(config["smtp_to"], list) assert isinstance(config["store_personal_data"], bool) assert isinstance(config["max_distance_housing_station"], (int, float)) assert isinstance(config["duplicate_threshold"], int) assert isinstance(config["duplicate_image_hash_threshold"], int) # API keys assert config["navitia_api_key"] is None or isinstance(config["navitia_api_key"], str) # noqa: E501 assert config["mapbox_api_key"] is None or isinstance(config["mapbox_api_key"], str) # noqa: E501 # Ensure constraints are ok assert config["constraints"] for constraint in config["constraints"].values(): assert "type" in constraint assert isinstance(constraint["type"], str) assert constraint["type"].upper() in POSTS_TYPES.__members__ assert "minimum_nb_photos" in constraint if constraint["minimum_nb_photos"]: assert isinstance(constraint["minimum_nb_photos"], int) assert constraint["minimum_nb_photos"] >= 0 assert "description_should_contain" in constraint assert isinstance(constraint["description_should_contain"], list) if constraint["description_should_contain"]: for term in constraint["description_should_contain"]: assert isinstance(term, str) assert "description_should_not_contain" in constraint assert isinstance(constraint["description_should_not_contain"], list) if constraint["description_should_not_contain"]: for term in constraint["description_should_not_contain"]: assert isinstance(term, str) assert "house_types" in constraint assert constraint["house_types"] for house_type in constraint["house_types"]: assert house_type.upper() in HOUSE_TYPES.__members__ assert "postal_codes" in constraint assert constraint["postal_codes"] assert all(isinstance(x, str) for x in constraint["postal_codes"]) if check_with_data: # Ensure data is built into db data.preprocess_data(config, force=False) # Check postal codes opendata_postal_codes = [ x.postal_code for x in data.load_data(PostalCode, constraint, config) ] for postal_code in constraint["postal_codes"]: assert postal_code in opendata_postal_codes # noqa: E501 assert "area" in constraint _check_constraints_bounds(constraint["area"]) assert "cost" in constraint _check_constraints_bounds(constraint["cost"]) assert "rooms" in constraint _check_constraints_bounds(constraint["rooms"]) assert "bedrooms" in constraint _check_constraints_bounds(constraint["bedrooms"]) assert "time_to" in constraint assert isinstance(constraint["time_to"], dict) for name, item in constraint["time_to"].items(): assert isinstance(name, str) assert "gps" in item assert isinstance(item["gps"], list) assert len(item["gps"]) == 2 assert "time" in item _check_constraints_bounds(item["time"]) if "mode" in item: TimeToModes[item["mode"]] return True except (AssertionError, KeyError): _, _, exc_traceback = sys.exc_info() return traceback.extract_tb(exc_traceback)[-1][-1]
[docs]def load_config(args=None, check_with_data=True): """ Load the configuration from file. :param args: An argparse args structure. :param check_with_data: Whether we should use the available OpenData to check the config values. Defaults to ``True``. :return: The loaded config dict. """ LOGGER.info("Initializing configuration...") # Default configuration config_data = DEFAULT_CONFIG.copy() # Load config from specified JSON if args and getattr(args, "config", None): LOGGER.debug("Loading configuration from %s.", args.config) try: with open(args.config, "r") as fh: config_data.update(json.load(fh)) except (IOError, ValueError) as exc: LOGGER.error( "Unable to load configuration from file, " "using default configuration: %s.", exc ) # Overload config with arguments if args and getattr(args, "passes", None) is not None: LOGGER.debug( "Overloading number of passes from CLI arguments: %d.", args.passes ) config_data["passes"] = args.passes if args and getattr(args, "max_entries", None) is not None: LOGGER.debug( "Overloading maximum number of entries from CLI arguments: %d.", args.max_entries ) config_data["max_entries"] = args.max_entries if args and getattr(args, "port", None) is not None: LOGGER.debug("Overloading web app port: %d.", args.port) config_data["port"] = args.port if args and getattr(args, "host", None) is not None: LOGGER.debug("Overloading web app host: %s.", args.host) config_data["host"] = str(args.host) # Handle data_directory option if args and getattr(args, "data_dir", None) is not None: LOGGER.debug("Overloading data directory from CLI arguments.") config_data["data_directory"] = args.data_dir elif config_data["data_directory"] is None: config_data["data_directory"] = appdirs.user_data_dir( "flatisfy", "flatisfy" ) LOGGER.debug("Using default XDG data directory: %s.", config_data["data_directory"]) if not os.path.isdir(config_data["data_directory"]): LOGGER.info("Creating data directory according to config: %s", config_data["data_directory"]) os.makedirs(config_data["data_directory"]) os.makedirs(os.path.join(config_data["data_directory"], "images")) if config_data["database"] is None: config_data["database"] = "sqlite:///" + os.path.join( config_data["data_directory"], "flatisfy.db" ) if config_data["search_index"] is None: config_data["search_index"] = os.path.join( config_data["data_directory"], "search_index" ) # Handle constraints filtering if args and getattr(args, "constraints", None) is not None: LOGGER.info( ("Filtering constraints from config according to CLI argument. " "Using only the following constraints: %s."), args.constraints.replace(",", ", ") ) constraints_filter = args.constraints.split(",") config_data["constraints"] = { k: v for k, v in config_data["constraints"].items() if k in constraints_filter } # Sanitize website url if config_data["website_url"] is not None: if config_data["website_url"][-1] != '/': config_data["website_url"] += '/' config_validation = validate_config(config_data, check_with_data) if config_validation is True: LOGGER.info("Config has been fully initialized.") return config_data LOGGER.error("Error in configuration: %s.", config_validation) return None
[docs]def init_config(output=None): """ Initialize an empty configuration file. :param output: File to output content to. Defaults to ``stdin``. """ config_data = DEFAULT_CONFIG.copy() if output and output != "-": with open(output, "w") as fh: fh.write(tools.pretty_json(config_data)) else: print(tools.pretty_json(config_data))