-
Notifications
You must be signed in to change notification settings - Fork 9
/
extract_rooms.py
43 lines (33 loc) · 1.25 KB
/
extract_rooms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Extract listing ids from location files
"""
from pathlib import Path
from typing import Set
import json
import argtyped
from tqdm.auto import tqdm
from helpers import list_to_txt
class Arguments(argtyped.Arguments):
loc_dir: Path = Path("dora")
room_dir: Path = Path("merlin")
if __name__ == "__main__":
args = Arguments()
locations = list(args.loc_dir.glob("*.json"))
room_set: Set[int] = set()
for filename in tqdm(locations):
with open(filename) as fid:
data = json.load(fid)
rooms = []
for section in data["data"]["dora"]["exploreV3"]["sections"]:
if section["__typename"] != "DoraExploreV3ListingsSection":
continue
for item in section["items"]:
listing_id = int(item["listing"]["id"])
if listing_id not in room_set:
rooms.append(listing_id)
room_set.add(listing_id)
room_folder = args.room_dir / filename.stem / str(listing_id)
room_folder.mkdir(exist_ok=True, parents=True)
if rooms != []:
list_to_txt(rooms, args.room_dir / filename.stem / "rooms.txt")
print(f"Extracted {len(room_set)} rooms from {len(locations)} locations")