-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
41 lines (34 loc) · 945 Bytes
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package main
import (
"fmt"
"github.com/wrkode/greenscraper/cmd"
"regexp"
"sync"
)
func main() {
keywords, err := cmd.ReadLinesFromFile("keywords.txt")
if err != nil {
panic(fmt.Sprintf("Failed to read keywords from file: %v", err))
}
urls, err := cmd.ReadLinesFromFile("urls.txt")
if err != nil {
panic(fmt.Sprintf("Failed to read URLs from file: %v", err))
}
titleRegex := regexp.MustCompile(".*'>(.*?)<span class=\"vs\">.*")
keywordRegexes := make([]*regexp.Regexp, len(keywords))
for i, keyword := range keywords {
keywordRegexes[i] = regexp.MustCompile(keyword)
}
const concurrentLimit = 5
sem := make(chan struct{}, concurrentLimit) // semaphore pattern for limiting concurrency
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
sem <- struct{}{} // acquire a token
go func(u string) {
cmd.ProcessURL(u, keywordRegexes, titleRegex, &wg)
<-sem // release a token
}(url)
}
wg.Wait()
}