-
Notifications
You must be signed in to change notification settings - Fork 2
/
mm_calendar.py
189 lines (162 loc) · 7.86 KB
/
mm_calendar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import os
import pickle
import re
from datetime import datetime
from itertools import groupby
from operator import itemgetter
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
def authenticate_google_calendar():
"""Authenticate and return a Google Calendar API service."""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('calendar', 'v3', credentials=creds)
return service
def create_event(service, event):
"""Create an event on the user's calendar."""
event = service.events().insert(calendarId=MM_calendar_ID, body=event).execute()
print(f"Event created: {event.get('htmlLink')}")
def create_calendar_events(pres_list):
service = authenticate_google_calendar()
for pres in pres_list:
# Check if minutes part is missing and add it if necessary
time_begin = pres['time_begin'] if ':' in pres['time_begin'] else pres['time_begin'][:-2] + ':00' + pres['time_begin'][-2:]
time_end = pres['time_end'] if ':' in pres['time_end'] else pres['time_end'][:-2] + ':00' + pres['time_end'][-2:]
start_time = datetime.strptime(f"{pres['pres_date']} {time_begin}", "%m/%d/%Y %I:%M%p")
end_time = datetime.strptime(f"{pres['pres_date']} {time_end}", "%m/%d/%Y %I:%M%p")
event_body = {
'summary': pres['title'],
'location': pres['location'],
'description': f"Presented by: {pres['name']}\n\nAbstract: {pres['abstract']}\n\nURL: {pres['url']}",
'start': {
'dateTime': start_time.isoformat(),
'timeZone': MM_TIMEZONE,
},
'end': {
'dateTime': end_time.isoformat(),
'timeZone': MM_TIMEZONE,
},
}
create_event(service, event_body)
def create_pres_list_from_aps(url):
pres_list = []
# Send a GET request to the URL
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find('meta', attrs={'name': 'citation_title'})['content']
authors_meta = soup.find('meta', attrs={'name': 'citation_authors'})['content']
authors = authors_meta.split(';')
presenter = authors[0] if authors else ""
abstract_div = soup.find('div', class_='largernormal')
abstract_text = abstract_div.text if abstract_div else ""
location_text = soup.find(string=lambda x: x and "Room:" in x)
location = location_text.split("Room:")[1].strip() if location_text else "Not specified"
pres_date_meta = soup.find('meta', attrs={'name': 'citation_date'})['content']
pres_date = pres_date_meta
session_title_tag = soup.find('h3')
if session_title_tag and "Poster Session" in session_title_tag.text:
# Extract times from parenthesis in session title
time_text = re.search(r'\((\d+pm)-(\d+pm) CST\)', session_title_tag.text)
time_begin, time_end = time_text.groups() if time_text else ("Not specified", "Not specified")
else:
# Extract the times from the content following the citation_date
time_info = soup.find('p', style="margin-top: 0px;").find_next_sibling('p')
if time_info:
time_text = time_info.text.strip()
times = time_text.split('–')
# Example adjustments within your existing code
time_begin = times[0].strip().replace('\xa0', '').lower() if len(times) > 0 else "Not specified"
time_end = times[1].strip().replace('\xa0', '').lower() if len(times) > 1 else "Not specified"
else:
time_begin, time_end = "Not specified", "Not specified"
pres_list.append({
"name": presenter.strip(),
"title": title.strip(),
"pres_date": pres_date,
"time_begin": time_begin,
"time_end": time_end,
"abstract": abstract_text.strip(),
"url": url,
"location": location
})
return pres_list
def process_aps_urls(url_list):
"""
Process a list of APS URLs to extract presentation details.
Parameters:
- url_list: A list of strings, where each string is a URL to an APS abstract page.
Returns:
- A list of dictionaries, where each dictionary contains details of a presentation.
"""
all_pres_details = []
for url in url_list:
pres_details = create_pres_list_from_aps(url)
# Assuming each URL corresponds to a single presentation,
# and create_pres_list_from_aps returns a list with a single dict,
# we extend the master list.
all_pres_details.extend(pres_details)
return all_pres_details
def print_all_session_details_v0(url):
pres_list = create_pres_list_from_aps(url)
pres_list.sort(key=lambda x: datetime.strptime(x['pres_date'], '%m/%d/%Y')) # assuming date is in 'mm/dd/yyyy' format
for pres in pres_list:
session_id = url.split('/')[-1]
print(f"{session_id:<10} | Room: {pres['location']:<10} | Time: {pres['pres_date']} ({pres['time_begin']} - {pres['time_end']})")
def print_all_session_details(url_list):
all_pres = []
for url in url_list:
all_pres.extend(create_pres_list_from_aps(url))
all_pres.sort(key=lambda x: (datetime.strptime(x['pres_date'], '%m/%d/%Y'), x['location'], x['time_begin'], x['time_end']))
for key, group in groupby(all_pres, key=itemgetter('pres_date', 'location', 'time_begin', 'time_end')):
session_ids = [session['url'].split('/')[-1] for session in group]
print(f"{', '.join(session_ids):<10} | Room: {key[1]:<10} | Time: {key[0]} ({key[2]} - {key[3]})")
def delete_all_created_calendar_entries():
service = authenticate_google_calendar()
events_result = service.events().list(calendarId=MM_calendar_ID, timeMin=datetime.now().isoformat()).execute()
events = events_result.get('items', [])
if not events:
print('No upcoming events found.')
for event in events:
print(f"Deleting event: {event['summary']}")
service.events().delete(calendarId=MM_calendar_ID, eventId=event['id']).execute()
if __name__ == "__main__":
# read calendar ID from .env
load_dotenv()
MM_calendar_ID = os.getenv("MM_calendar_ID")
MM_TIMEZONE = 'America/Chicago' # Minneapolis time
SCOPES = ['https://www.googleapis.com/auth/calendar']
url_list = [
"https://meetings.aps.org/Meeting/MAR24/Session/J00.249",
"https://meetings.aps.org/Meeting/MAR24/Session/B54.6",
"https://meetings.aps.org/Meeting/MAR24/Session/J00.246",
"https://meetings.aps.org/Meeting/MAR24/Session/J00.265",
"https://meetings.aps.org/Meeting/MAR24/Session/K50.11",
"https://meetings.aps.org/Meeting/MAR24/Session/S50.2",
"https://meetings.aps.org/Meeting/MAR24/Session/A47.8",
"https://meetings.aps.org/Meeting/MAR24/Session/K50.5",
"https://meetings.aps.org/Meeting/MAR24/Session/M48.12",
"https://meetings.aps.org/Meeting/MAR24/Session/S50.1",
]
all_pres_details = process_aps_urls(url_list)
#create_calendar_events(all_pres_details)
print_all_session_details(url_list)