#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Pipeline : scrape les N derniers posts -> SQLite + photos + snapshot JSON."""
import json, sqlite3, time, datetime, re, sys
from fb_common import session_facebook, DATA
from scrape import scrape_post, download_photos, bare_permalink

DB = DATA + "/suivi.db"
SNAP = DATA + "/snapshot.json"
N = int(sys.argv[1]) if len(sys.argv) > 1 else 5


def derniers_posts(n):
    links = json.load(open(DATA + "/post_links.json"))
    seen = set(); out = []
    for l in links:
        b = bare_permalink(l)
        if b and b not in seen:
            seen.add(b); out.append(b)
    return out[:n]


def init_db(con):
    con.executescript("""
    CREATE TABLE IF NOT EXISTS posts(
      post_id TEXT PRIMARY KEY, url TEXT, excerpt TEXT, rang INTEGER, scraped_at TEXT);
    CREATE TABLE IF NOT EXISTS comments(
      cid TEXT PRIMARY KEY, post_id TEXT, parent_cid TEXT, is_reply INTEGER,
      author TEXT, profile TEXT, author_id TEXT, is_page INTEGER,
      message TEXT, time_text TEXT, photos TEXT, replied_by_page INTEGER,
      scraped_at TEXT);
    CREATE TABLE IF NOT EXISTS authors(
      profile TEXT PRIMARY KEY, name TEXT, author_id TEXT, nb INTEGER, last_seen TEXT);
    """)


def main():
    posts = derniers_posts(N)
    print("Posts a traiter:", len(posts), flush=True)
    now = datetime.datetime.now().isoformat(timespec="seconds")
    con = sqlite3.connect(DB)
    init_db(con)
    con.executescript("DELETE FROM comments; DELETE FROM posts; DELETE FROM authors;")
    d = session_facebook()
    snapshot = {"generated_at": now, "page": "Collectif de Citoyens Ucclois", "posts": []}
    try:
        for rang, url in enumerate(posts, 1):
            print("[%d/%d] %s" % (rang, len(posts), url[-25:]), flush=True)
            try:
                res = scrape_post(d, url)
            except Exception as e:
                print("   ERREUR scrape:", str(e)[:120], flush=True)
                continue
            pid = bare_permalink(url).split("/posts/")[1]
            data = res["comments"]
            replies = [c for c in data.values() if c["is_reply"]]
            page_parents = {r["parent"] for r in replies if r["is_page"] and r["parent"]}
            tops = [c for c in data.values() if not c["is_reply"]]
            # telecharger photos + ecrire commentaires
            for c in data.values():
                locs = download_photos(c["id"], c["photos"]) if c["photos"] else []
                c["photos_local"] = locs
                rep = 1 if (not c["is_reply"] and c["id"] in page_parents) else 0
                con.execute("""INSERT OR REPLACE INTO comments
                    (cid,post_id,parent_cid,is_reply,author,profile,author_id,is_page,
                     message,time_text,photos,replied_by_page,scraped_at)
                    VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)""",
                    (c["id"], pid, c["parent"], int(c["is_reply"]), c["author"],
                     c["profile"], c["author_id"], int(c["is_page"]), c["message"],
                     c["time_text"], json.dumps(locs), rep, now))
                if not c["is_page"] and c["profile"]:
                    con.execute("""INSERT INTO authors(profile,name,author_id,nb,last_seen)
                        VALUES(?,?,?,1,?)
                        ON CONFLICT(profile) DO UPDATE SET nb=nb+1,last_seen=excluded.last_seen,
                        name=excluded.name""",
                        (c["profile"], c["author"], c["author_id"], now))
            con.execute("INSERT OR REPLACE INTO posts(post_id,url,excerpt,rang,scraped_at) VALUES(?,?,?,?,?)",
                        (pid, res["url"], res["excerpt"], rang, now))
            con.commit()
            nb_rep = sum(1 for c in tops if c["id"] in page_parents)
            print("   commentaires=%d  repondus=%d  sans_reponse=%d  reponses_page=%d"
                  % (len(tops), nb_rep, len(tops) - nb_rep, len(replies)), flush=True)
            # snapshot post
            snapshot["posts"].append({
                "rang": rang, "post_id": pid, "url": res["url"], "excerpt": res["excerpt"],
                "comments": [{
                    "id": c["id"], "parent": c["parent"], "is_reply": c["is_reply"],
                    "author": c["author"], "profile": c["profile"], "is_page": c["is_page"],
                    "message": c["message"], "time_text": c["time_text"],
                    "photos": c.get("photos_local", []),
                    "replied_by_page": (not c["is_reply"] and c["id"] in page_parents),
                    "reply_link": res["url"] + "?comment_id=" + c["id"],
                } for c in data.values()],
            })
    finally:
        d.quit()
        con.close()
    json.dump(snapshot, open(SNAP, "w"), ensure_ascii=False, indent=1)
    print("=> TERMINE. Base:", DB, "| snapshot:", SNAP, flush=True)


if __name__ == "__main__":
    main()

