import time, re, json
from fb_common import session_facebook
PAGE = "https://www.facebook.com/collectif.de.citoyens.ucclois"
d = session_facebook()
d.get(PAGE); time.sleep(6)
# identite de la page
h = d.page_source
pid = None
for pat in [r'"pageID":"(\d+)"', r'"page_id":"(\d+)"', r'"entity_id":"(\d+)"', r'"userID":"(\d+)"']:
    m = re.search(pat, h)
    if m: pid = m.group(1); break
print("page_id candidat:", pid)

# defilement pour charger des posts
links = []
seen = set()
def collect():
    for a in d.find_elements("css selector", "a[href*='/posts/'], a[href*='story_fbid='], a[href*='/videos/'], a[href*='/photos/']"):
        try:
            href = a.get_attribute("href") or ""
        except Exception:
            continue
        # normaliser
        href = href.split("?__cft__")[0].split("&__cft__")[0]
        m = re.search(r'(/posts/pfbid[0-9A-Za-z]+|/posts/\d+|story_fbid=\w+|/videos/\d+)', href)
        if m and href not in seen:
            seen.add(href); links.append(href)

for i in range(15):
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2.2)
    collect()
    if i % 4 == 0:
        print("scroll", i, "-> permaliens:", len(links))

print("=== PERMALIENS TROUVES:", len(links), "===")
for l in links[:15]:
    print(l)
open("/home/collectifweil/suivi_data/page.html","w",encoding="utf-8").write(h)
json.dump(links, open("/home/collectifweil/suivi_data/post_links.json","w"))
d.quit()
