import time, re, json, sys
from fb_common import session_facebook
from selenium.webdriver.common.by import By
PAGE_NAME = "Collectif de Citoyens Ucclois"
PAGE_URL_FRAG = "collectif.de.citoyens.ucclois"

def clean_profile(href):
    if not href: return None
    href = href.split("&__cft__")[0].split("&__tn__")[0].split("?__cft__")[0]
    href = re.sub(r'([?&])comment_id=[^&]*', r'\1', href)
    href = re.sub(r'([?&])reply_comment_id=[^&]*', r'\1', href)
    href = href.replace("?&","?").rstrip("?&")
    return href

def expand(d):
    for _ in range(12):
        clicked=False
        for kw in ["Voir plus de commentaires","Afficher plus de commentaires",
                   "Plus de commentaires","réponse","Voir les","Afficher"]:
            for e in d.find_elements(By.XPATH, "//span[contains(text(),'%s')]"%kw)[:6]:
                try:
                    d.execute_script("arguments[0].scrollIntoView({block:'center'});",e)
                    d.execute_script("arguments[0].click();",e); clicked=True; time.sleep(0.8)
                except Exception: pass
        d.execute_script("window.scrollTo(0, document.body.scrollHeight);"); time.sleep(1.2)
        if not clicked: break

def parse(d):
    arts = d.find_elements(By.CSS_SELECTOR,'[aria-label^="Commentaire de"], [aria-label^="Réponse de"]')
    by_id = {}
    for a in arts:
        try:
            al = a.get_attribute("aria-label") or ""
            links = [x.get_attribute("href") or "" for x in a.find_elements(By.CSS_SELECTOR,"a[href*='facebook.com']")]
            cid = rid = None
            for L in links:
                m = re.search(r'[?&]comment_id=(\d+)', L)
                if m and not cid: cid = m.group(1)
                m2 = re.search(r'[?&]reply_comment_id=(\d+)', L)
                if m2 and not rid: rid = m2.group(1)
            is_reply = al.startswith("Réponse de")
            own_id = rid if (is_reply and rid) else cid
            parent_id = cid if (is_reply and rid) else None
            if not own_id:
                continue
            # nom auteur depuis le label
            if is_reply:
                mname = re.match(r'Réponse de (.+?) au commentaire de', al)
            else:
                mname = re.match(r'Commentaire de (.+?)(?: il y a .*| \d+\s?(?:min|h|j|sem|an).*)?$', al)
            author = mname.group(1).strip() if mname else "?"
            # profil = premier lien non-post
            prof=None
            for L in links:
                if "/posts/" in L or "story_fbid" in L: continue
                prof = clean_profile(L); break
            is_page = author==PAGE_NAME or (prof and PAGE_URL_FRAG in prof and "/posts/" not in prof)
            # message : textContent des div[dir=auto] (lit aussi le hors-ecran)
            ACTIONS={"J'aime","Répondre","Partager","Modifié","Masquer","Suivre",
                     "Aimer","Auteur","Voir la traduction","Top fan","Plus pertinents",
                     "Tout afficher","Like","Reply"}
            def _drop(t):
                if t in ACTIONS: return True
                if re.match(r'^(il y a .+|\d+\s?(min|h|j|sem|ans?)|\d{1,2}\s\w+\.?)$', t): return True
                if re.match(r'^\d+$', t): return True
                if len(t)<=1: return True
                return False
            divs = d.execute_script(
                "return Array.from(arguments[0].querySelectorAll('div[dir=\"auto\"]'))"
                ".map(e=>e.textContent.trim()).filter(Boolean);", a)
            seen=set(); parts=[]
            for t in divs:
                if t in (author, PAGE_NAME): continue
                if _drop(t): continue
                if t not in seen: seen.add(t); parts.append(t)
            message=" ".join(parts).strip()
            # photos : imgs scontent non-avatar
            photos=[]
            for im in a.find_elements(By.CSS_SELECTOR,"img"):
                src=im.get_attribute("src") or ""
                if "scontent" in src and "/p148x148/" not in src and "_s." not in src:
                    photos.append(src)
            by_id[own_id]={"id":own_id,"parent":parent_id,"is_reply":is_reply,
                           "author":author,"profile":prof,"is_page":bool(is_page),
                           "message":message[:500],"photos":photos[:4],"label":al}
        except Exception as e:
            pass
    return by_id

URL = sys.argv[1] if len(sys.argv)>1 else json.load(open("/home/collectifweil/suivi_data/post_links.json"))[0]
m=re.search(r'(https://www\.facebook\.com/collectif\.de\.citoyens\.ucclois/posts/pfbid[0-9A-Za-z]+)',URL)
URL=m.group(1)
d=session_facebook(); d.get(URL); time.sleep(6); expand(d)
data=parse(d); d.quit()

tops=[c for c in data.values() if not c["is_reply"]]
replies=[c for c in data.values() if c["is_reply"]]
page_reply_parents={r["parent"] for r in replies if r["is_page"]}
print("POST:",URL[-30:])
print("commentaires:",len(tops)," | reponses:",len(replies)," | dont reponses page:",sum(r['is_page'] for r in replies))
for c in tops:
    ans = "REPONDU" if c["id"] in page_reply_parents else "—"
    print("[%s] %s | %s | photos:%d" % (ans, c["author"], (c["message"][:70] or "(vide)"), len(c["photos"])))
    print("      profil:", c["profile"])
json.dump(data, open("/home/collectifweil/suivi_data/post0_parsed.json","w"), ensure_ascii=False, indent=1)
