Source code for flatisfy.cmds

# coding: utf-8
"""
Main commands available for flatisfy.
"""
from __future__ import absolute_import, print_function, unicode_literals

import collections
import logging
import os

import flatisfy.filters
from flatisfy import database
from flatisfy import email
from flatisfy.models import flat as flat_model
from flatisfy.models import postal_code as postal_code_model
from flatisfy.models import public_transport as public_transport_model
from flatisfy import fetch
from flatisfy import tools
from flatisfy.filters import metadata
from flatisfy.web import app as web_app


LOGGER = logging.getLogger(__name__)


[docs]def filter_flats_list(config, constraint_name, flats_list, fetch_details=True):
    """
    Filter the available flats list. Then, filter it according to criteria.

    :param config: A config dict.
    :param constraint_name: The constraint name that the ``flats_list`` should
        satisfy.
    :param fetch_details: Whether additional details should be fetched between
        the two passes.
    :param flats_list: The initial list of flat objects to filter.
    :return: A dict mapping flat status and list of flat objects.
    """
    # Add the flatisfy metadata entry and prepare the flat objects
    flats_list = metadata.init(flats_list, constraint_name)

    # Get the associated constraint from config
    try:
        constraint = config["constraints"][constraint_name]
    except KeyError:
        LOGGER.error(
            "Missing constraint %s. Skipping filtering for these posts.",
            constraint_name
        )
        return {
            "new": [],
            "duplicate": [],
            "ignored": []
        }

    first_pass_result = collections.defaultdict(list)
    second_pass_result = collections.defaultdict(list)
    third_pass_result = collections.defaultdict(list)
    # Do a first pass with the available infos to try to remove as much
    # unwanted postings as possible
    if config["passes"] > 0:
        first_pass_result = flatisfy.filters.first_pass(flats_list,
                                                        constraint,
                                                        config)
    else:
        first_pass_result["new"] = flats_list

    # Load additional infos
    if fetch_details:
        for i, flat in enumerate(first_pass_result["new"]):
            details = fetch.fetch_details(config, flat["id"])
            first_pass_result["new"][i] = tools.merge_dicts(flat, details)

    # Do a second pass to consolidate all the infos we found and make use of
    # additional infos
    if config["passes"] > 1:
        second_pass_result = flatisfy.filters.second_pass(
            first_pass_result["new"], constraint, config
        )
    else:
        second_pass_result["new"] = first_pass_result["new"]

    # Do a third pass to deduplicate better
    if config["passes"] > 2:
        third_pass_result = flatisfy.filters.third_pass(
            second_pass_result["new"],
            config
        )
    else:
        third_pass_result["new"] = second_pass_result["new"]

    return {
        "new": third_pass_result["new"],
        "duplicate": (
            first_pass_result["duplicate"] +
            second_pass_result["duplicate"] +
            third_pass_result["duplicate"]
        ),
        "ignored": (
            first_pass_result["ignored"] +
            second_pass_result["ignored"] +
            third_pass_result["ignored"]
        )
    }


[docs]def filter_fetched_flats(config, fetched_flats, fetch_details=True):
    """
    Filter the available flats list. Then, filter it according to criteria.

    :param config: A config dict.
    :param fetch_details: Whether additional details should be fetched between
        the two passes.
    :param fetched_flats: The initial dict mapping constraints to the list of
        fetched flat objects to filter.
    :return: A dict mapping constraints to a dict mapping flat status and list
        of flat objects.
    """
    for constraint_name, flats_list in fetched_flats.items():
        fetched_flats[constraint_name] = filter_flats_list(
            config,
            constraint_name,
            flats_list,
            fetch_details
        )
    return fetched_flats


[docs]def import_and_filter(config, load_from_db=False):
    """
    Fetch the available flats list. Then, filter it according to criteria.
    Finally, store it in the database.

    :param config: A config dict.
    :param load_from_db: Whether to load flats from database or fetch them
        using WebOOB.
    :return: ``None``.
    """
    # Fetch and filter flats list
    if load_from_db:
        fetched_flats = fetch.load_flats_from_db(config)
    else:
        fetched_flats = fetch.fetch_flats(config)
    # Do not fetch additional details if we loaded data from the db.
    flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats,
                                           fetch_details=(not load_from_db))
    # Create database connection
    get_session = database.init_db(config["database"], config["search_index"])

    new_flats = []

    LOGGER.info("Merging fetched flats in database...")
    # Flatten the flats_by_status dict
    flatten_flats_by_status = collections.defaultdict(list)
    for flats in flats_by_status.values():
        for status, flats_list in flats.items():
            flatten_flats_by_status[status].extend(flats_list)

    with get_session() as session:
        # Set is_expired to true for all existing flats.
        # This will be set back to false if we find them during importing.
        for flat in session.query(flat_model.Flat).all():
            flat.is_expired = True;

        for status, flats_list in flatten_flats_by_status.items():
            # Build SQLAlchemy Flat model objects for every available flat
            flats_objects = {
                flat_dict["id"]: flat_model.Flat.from_dict(flat_dict)
                for flat_dict in flats_list
            }

            if flats_objects:
                # If there are some flats, try to merge them with the ones in
                # db
                existing_flats_queries = session.query(flat_model.Flat).filter(
                    flat_model.Flat.id.in_(flats_objects.keys())
                )
                for each in existing_flats_queries.all():
                    # For each flat to merge, take care not to overwrite the
                    # status if the user defined it
                    flat_object = flats_objects[each.id]
                    if each.status in flat_model.AUTOMATED_STATUSES:
                        flat_object.status = getattr(
                            flat_model.FlatStatus, status
                        )
                    else:
                        flat_object.status = each.status

                    # Every flat we fetched isn't expired
                    flat_object.is_expired = False

                    # For each flat already in the db, merge it (UPDATE)
                    # instead of adding it
                    session.merge(flats_objects.pop(each.id))

            # For any other flat, it is not already in the database, so we can
            # just set the status field without worrying
            for flat in flats_objects.values():
                flat.status = getattr(flat_model.FlatStatus, status)
                if flat.status == flat_model.FlatStatus.new:
                    new_flats.append(flat)

            session.add_all(flats_objects.values())

        if config["send_email"]:
            email.send_notification(config, new_flats)

    # Touch a file to indicate last update timestamp
    ts_file = os.path.join(
        config["data_directory"],
        "timestamp"
    )
    with open(ts_file, 'w'):
        os.utime(ts_file, None)

    LOGGER.info("Done!")


[docs]def purge_db(config):
    """
    Purge the database.

    :param config: A config dict.
    :return: ``None``
    """
    get_session = database.init_db(config["database"], config["search_index"])

    with get_session() as session:
        # Delete every flat in the db
        LOGGER.info("Purge all flats from the database.")
        for flat in session.query(flat_model.Flat).all():
            # Use (slower) deletion by object, to ensure whoosh index is
            # updated
            session.delete(flat)
        LOGGER.info("Purge all postal codes from the database.")
        session.query(postal_code_model.PostalCode).delete()
        LOGGER.info("Purge all public transportations from the database.")
        session.query(public_transport_model.PublicTransport).delete()


[docs]def serve(config):
    """
    Serve the web app.

    :param config: A config dict.
    :return: ``None``, long-running process.
    """
    app = web_app.get_app(config)

    server = config.get("webserver", None)
    if not server:
        # Default webserver is quiet, as Bottle is used with Canister for
        # standard logging
        server = web_app.QuietWSGIRefServer

    app.run(host=config["host"], port=config["port"], server=server)