-
Notifications
You must be signed in to change notification settings - Fork 611
/
Copy pathlinkcheck.py
executable file
·156 lines (118 loc) · 4.77 KB
/
linkcheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
# This is free and unencumbered software released into the public
# domain.
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a
# compiled binary, for any purpose, commercial or non-commercial, and
# by any means.
# In jurisdictions that recognize copyright laws, the author or
# authors of this software dedicate any and all copyright interest in
# the software to the public domain. We make this dedication for the
# benefit of the public at large and to the detriment of our heirs
# and successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to
# this software under copyright law.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# For more information, please refer to <http://unlicense.org>
"""Check for broken links.
This finds links like this...
[click here](some-file.md)
[or here](../some/path/another-file.md)
![here's an image](../images/some-cool-image.png)
...but not like this:
[some website](http://github.com/)
[another website](https://github.com/)
[local link](#some-title)
"""
import os
import posixpath
import common
def check(this_file, target, title, titledict):
"""Check if a link's target is like it should be.
Return an error message string or "ok".
"""
if target.startswith(('http://', 'https://')):
# We don't need this currently, but checking these links could
# be added later.
return "ok"
path = posixpath.join(posixpath.dirname(this_file), target)
path = posixpath.normpath(path)
if not os.path.exists(path):
return "doesn't exist"
if target.endswith('/'):
# A directory.
if not os.path.isdir(path):
return "not a directory"
else:
# A file.
if not os.path.isfile(path):
return "not a file"
if title is not None and title not in titledict[path]:
return "no title named %s" % title
return "ok"
def find_titles(filename):
"""Read titles of a markdown file and return a list of them."""
result = []
with open(filename, 'r') as f:
for line in f:
if line.startswith('```'):
# it's a code block, let's skip to the end of it to
# avoid detecting comments as titles
while f.readline().rstrip() != '```':
pass
if line.startswith('#'):
# found a title
result.append(common.header_link(line.lstrip('#').strip()))
return result
def find_links(this_file):
"""Read links of a markdown file.
Return a list of (target, title, lineno) pairs where title can be None.
"""
result = []
with open(this_file, 'r') as f:
for match, lineno in common.find_links(f):
target = match.group(2)
if '#' in target:
file, title = target.split('#', 1)
if not file:
# link to this file, [blabla](#hi)
file = posixpath.basename(this_file)
else:
file = target
title = None
result.append((file, title, lineno))
return result
def get_line(filename, lineno):
"""Return the lineno'th line of a file."""
with open(filename, 'r') as f:
for lineno2, line in enumerate(f, start=1):
if lineno == lineno2:
return line
raise ValueError("%s is less than %d lines long" % (filename, lineno))
def main():
print("Searching for titles and links...")
titledict = {} # {filename: [title1, title2, ...]}
linkdict = {} # {filename: [(file, title, lineno), ...])
for path in common.get_markdown_files():
titledict[path] = find_titles(path)
linkdict[path] = find_links(path)
print("Checking the links...")
total = 0
broken = 0
for filename, linklist in linkdict.items():
for target, title, lineno in linklist:
status = check(filename, target, title, titledict)
if status != "ok":
print(" file %s, line %d: %s" % (filename, lineno, status))
print(" %s" % get_line(filename, lineno))
broken += 1
total += 1
print("%d/%d links seem to be broken." % (broken, total))
if __name__ == '__main__':
main()