-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_reviews.py
334 lines (263 loc) · 10.3 KB
/
get_reviews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
import base64
import getpass
import json
import os
import re
import urllib.parse
from operator import itemgetter
from pathlib import Path
from urllib.parse import parse_qs, urlparse
import markdown
import openreview
import pypandoc
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
def get_key():
# Use a fixed salt (not ideal, but better than nothing)
salt = b"fixed_salt_for_openreview"
# Use the machine's hostname as a basis for the key
hostname = os.uname().nodename.encode()
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(hostname))
return key
def encrypt(text):
f = Fernet(get_key())
return f.encrypt(text.encode()).decode()
def decrypt(text):
f = Fernet(get_key())
return f.decrypt(text.encode()).decode()
def delete_credentials():
if os.path.exists("credentials.enc"):
os.remove("credentials.enc")
print("Cached credentials have been deleted.")
else:
print("No cached credentials found.")
def load_cached_credentials():
if os.path.exists("credentials.enc"):
with open("credentials.enc", "r") as f:
creds = json.loads(decrypt(f.read()))
return creds["username"], creds["password"]
return None, None
def save_credentials(username, password):
creds = {"username": username, "password": password}
encrypted = encrypt(json.dumps(creds))
with open("credentials.enc", "w") as f:
f.write(encrypted)
def get_credentials():
username, password = load_cached_credentials()
if username and password:
print("Using cached credentials.")
return username, password
username = input("Enter your OpenReview username: ")
password = getpass.getpass("Enter your OpenReview password: ")
cache_choice = input(
"Do you want to cache these credentials? Note that this is insecure. (y/N): "
).lower()
if cache_choice == "y":
save_credentials(username, password)
print("Credentials cached.")
return username, password
def get_unique_filename(base_filename):
counter = 1
filename, extension = os.path.splitext(base_filename)
while os.path.exists(base_filename):
base_filename = f"{filename}_{counter}{extension}"
counter += 1
return base_filename
def parse_openreview_url(url):
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
forum_id = query_params.get("id", [None])[0]
referrer = query_params.get("referrer", [None])[0]
if referrer:
decoded_referrer = urllib.parse.unquote(referrer)
venue_match = re.search(r"/group\?id=([^#]+)", decoded_referrer)
venue_id = venue_match.group(1) if venue_match else None
else:
venue_id = None
return forum_id, venue_id
def extract_reviewer_id(signature):
# Reviewer_<id> : NeuRIPS 2024
# Program_Committee_<id> : AAAI25
match = re.search(r"(Reviewer|Program_Committee)_(\w+)$", signature[0])
return match.group(2) if match else None
def generate_markdown(notes):
# Separate the full paper and other notes
full_paper = next(
(note for note in notes if note["replyto"] is None), None
)
other_notes = [note for note in notes if note["replyto"] is not None]
markdown = ""
# Process full paper
if full_paper:
markdown += "# Full Paper\n\n"
markdown += process_full_paper(full_paper)
# Create a dictionary to store notes by their ID
notes_by_id = {note["id"]: note for note in other_notes}
# Group top-level reviews by reviewer
reviewer_groups = {}
for note in other_notes:
if note["replyto"] == full_paper["id"]:
reviewer_id = extract_reviewer_id(note["signatures"])
if reviewer_id:
reviewer_groups[reviewer_id] = note["id"]
# Process reviewer notes and their replies
for reviewer_id, top_review_id in reviewer_groups.items():
markdown += f"## {reviewer_id}\n\n"
markdown += process_note_thread(top_review_id, notes_by_id)
return markdown.strip()
def process_note_thread(note_id, notes_by_id, depth=0):
markdown = ""
note = notes_by_id[note_id]
# Process the current note
markdown += " " * depth
markdown += process_note(
note, is_rebuttal="Authors" in note["signatures"][0]
)
# Process replies to this note
replies = [n for n in notes_by_id.values() if n["replyto"] == note_id]
replies.sort(key=itemgetter("cdate"))
for reply in replies:
markdown += process_note_thread(reply["id"], notes_by_id, depth + 1)
return markdown
def process_full_paper(paper):
markdown = f"### Paper ID: {paper['id']}\n\n"
content = paper["content"]
# Essential fields that should always be present
essential_fields = ["title", "authors", "abstract"]
# Additional fields to check for
additional_fields = [
"keywords",
"primary_keywords",
"secondary_keywords",
"TLDR",
"venue",
"paperhash",
]
try:
# Process essential fields
for field in essential_fields:
if field in content:
if field == "authors":
markdown += f"**{field.capitalize()}:** {', '.join(content[field]['value'])}\n\n"
else:
markdown += f"**{field.capitalize()}:** {content[field]['value']}\n\n"
else:
print(f"Warning: Essential field '{field}' is missing.")
# Process additional fields
for field in additional_fields:
if field in content:
if isinstance(content[field]["value"], list):
markdown += f"**{field.capitalize()}:** {', '.join(content[field]['value'])}\n\n"
else:
markdown += f"**{field.capitalize()}:** {content[field]['value']}\n\n"
except KeyError as e:
print(f"KeyError: {e}")
print("Available keys in content:")
for key in content.keys():
print(f"- {key}")
# Add available information to markdown
for key, value in content.items():
if isinstance(value, dict) and "value" in value:
markdown += f"**{key.capitalize()}:** {value['value']}\n\n"
markdown += "---\n\n"
return markdown
def process_note(note, is_rebuttal=False):
markdown = f"### {'Rebuttal to ' if is_rebuttal else ''}Note {note['number']} (ID: {note['id']})\n\n"
content = note["content"]
for key, value in content.items():
if isinstance(value, dict) and "value" in value:
markdown += f"**{key.capitalize()}:**\n {value['value']}\n\n"
markdown += "---\n\n"
return markdown
def markdown_to_odt(markdown_text, output_filename):
# Convert markdown to HTML
html = markdown.markdown(markdown_text)
# Convert HTML to ODT
pypandoc.convert_text(
html, "odt", format="html", outputfile=output_filename
)
print(f"ODT file created: {output_filename}")
def ensure_output_dir(output_dir: str) -> Path:
"""Create output directory if it doesn't exist and return Path object."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
return output_path
if __name__ == "__main__":
# Ask for the URL
url = input("Enter the OpenReview URL: ")
# Parse the URL to get forum_id and venue_id
forum_id, venue_id = parse_openreview_url(url)
if not forum_id or not venue_id:
print("Error: Couldn't extract forum ID or venue ID from the URL.")
exit(1)
print(f"Extracted forum ID: {forum_id}")
print(f"Extracted venue ID: {venue_id}")
username, password = get_credentials()
try:
client = openreview.api.OpenReviewClient(
baseurl="https://api2.openreview.net",
username=username,
password=password,
)
venue_group = client.get_group(venue_id)
notes = client.get_notes(forum=forum_id)
markdown_output = generate_markdown([note.__dict__ for note in notes])
print(markdown_output)
# Create output directory
output_dir = ensure_output_dir("output")
# Save markdown file
markdown_filename = output_dir / f"{forum_id}.md"
markdown_filename = get_unique_filename(str(markdown_filename))
with open(markdown_filename, "w", encoding="utf-8") as md_file:
md_file.write(markdown_output)
print(f"Markdown file created: {markdown_filename}")
# Save ODT file
odt_filename = output_dir / f"{forum_id}.odt"
odt_filename = get_unique_filename(str(odt_filename))
markdown_to_odt(markdown_output, str(odt_filename))
except openreview.openreview.OpenReviewException as e:
if "ForbiddenError" in str(e):
print(
"Error: You don't have permission to access this venue or paper."
)
print("This could be because:")
print("1. You're not logged in with the correct account.")
print(
"2. You don't have the necessary permissions for this venue."
)
print("3. The paper or venue ID might be incorrect.")
print(
"\nPlease check your credentials and the URL, then try again."
)
delete_choice = input(
"Would you like to delete the cached credentials? (y/N): "
).lower()
if delete_choice == "y":
delete_credentials()
else:
print("Cached credentials were not deleted.")
elif "Invalid username or password" in str(e):
print("Error: Invalid username or password.")
delete_choice = input(
"Would you like to delete the cached credentials? (y/N): "
).lower()
if delete_choice == "y":
delete_credentials()
print(
"Please run the script again and enter your credentials."
)
else:
print("Cached credentials were not deleted.")
else:
print(f"An OpenReview error occurred: {e}")
exit(1)
except Exception as e:
print(f"An unexpected error occurred: {e}")
exit(1)