forked from akumria/findforks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfindforks2.py
executable file
·123 lines (96 loc) · 3.48 KB
/
findforks2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python3
import time
import argparse
import json
import os
import subprocess
import urllib.error
import urllib.parse
import urllib.request
#import requests_cache
def find_forks(remote):
"""
Query the GitHub API for all forks of a repository.
"""
resp_json = []
repo_url = subprocess.run(
["git", "remote", "get-url", remote],
stdout=subprocess.PIPE
)
repo_url_stdout = repo_url.stdout.decode()
(username, project) = parse_git_remote_output(repo_url_stdout)
if (not os.path.exists("data")):
os.path.mkdir("data") # just create it
for page in range(1,98): # eliza has 97
datafile = f"data/{username}{project}{page}.json"
GITHUB_FORK_URL = u"https://api.github.com/repos/{username}/{project}/forks?page={page}"
resp_json = []
if os.path.exists(datafile):
with open(datafile) as fi:
print("Loading", datafile)
resp_json = json.load(fi)
else:
print("missing", datafile)
try:
url = GITHUB_FORK_URL.format(username=username, project=project,page=page)
print(url)
time.sleep(3)
resp = urllib.request.urlopen(url)
jsond = resp.read().decode("utf8")
resp_json = json.loads(jsond)
if len(resp_json) ==0: # empty array
print("finished",url,resp_json)
raise StopIteration
with open(datafile,"w") as fo:
fo.write(jsond)
except urllib.error.HTTPError as e:
if e.code == 404:
raise StopIteration
else:
print(e)
for fork in resp_json:
print(fork['owner']['login'])
yield (fork['owner']['login'], fork['ssh_url'])
def parse_git_remote_output(repo_url):
"""
Given a repository URL, split it into its component parts.
convert [email protected]:akumria/all_forks.git to
service: [email protected]
username: akumria
project = all_forks
convert https://github.com/akumria/all_forks.git to
service: [email protected]
username: akumria
project = all_forks
"""
if repo_url.startswith("[email protected]"):
(service, repo) = repo_url.split(":")
(username, project_git) = repo.split("/")
project = project_git[:project_git.find(".")]
return (username, project)
if repo_url.startswith("http"):
o = urllib.parse.urlparse(repo_url)
data = o.path.split("/")
#print("DATA",data)
#print(data[1:3])
(username, project_git) = data[1:3]
# also handle the case where there is no '.git'
if project_git.find(".") < 0:
project = project_git
else:
project = project_git[:project_git.find(".")]
return (username, project)
def setup_remote(remote, repository_url):
"""
Configure a remote with a specific repository.
"""
print("{}: {}".format(remote, repository_url))
subprocess.run(["git", "remote", "add", remote, repository_url])
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--remote", help="Which remote to use", default="origin")
args = parser.parse_args()
for (remote, repository) in find_forks(args.remote):
setup_remote(remote, repository)
if __name__ == "__main__":
main()