From 0fa2d41f52b68996da17ec06693a19ac04bb3abe Mon Sep 17 00:00:00 2001 From: Paul Lecuq Date: Sat, 20 Apr 2024 09:35:30 +0200 Subject: [PATCH] hotfix: site scraping issues --- src/config/main.go | 4 ++-- src/qrz/main.go | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/config/main.go b/src/config/main.go index 9467f8f..8ff511c 100644 --- a/src/config/main.go +++ b/src/config/main.go @@ -43,8 +43,8 @@ func (config *Config) GetConfig() error { config.DbPassword = qrzsection.Key("db_password").MustString("password") config.Cron = qrzsection.Key("cron").MustString("@every 1h") - config.URLBase = `http://groupe-frs.hamstation.eu/index_qrz_liste_%s.php` - config.URLBaseForGroups = "http://groupe-frs.hamstation.eu/bdd/menu_listing_division.php" + config.URLBase = `https://groupe-frs.hamstation.eu/index_qrz_liste_%s.php` + config.URLBaseForGroups = "https://groupe-frs.hamstation.eu/bdd/menu_listing_division.php" config.QrzGroups = []string{} diff --git a/src/qrz/main.go b/src/qrz/main.go index cb844e3..a108724 100644 --- a/src/qrz/main.go +++ b/src/qrz/main.go @@ -15,6 +15,7 @@ import ( ) const gxpath = "//center/form/select/*" +const agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" var frsre = regexp.MustCompile(`^ [0-9]{1,4}\s[A-Z]{1,5}\s[0-9]{1,5}`) var gre1 = regexp.MustCompile(`.*document.write\('(.*)'\).*`) @@ -76,8 +77,11 @@ func Run(config config.Config) (err error) { } func getGroups(urlbase string) (groups []string, err error) { - clt := &http.Client{} - resp, err := clt.Get(urlbase) + client := &http.Client{} + req, _ := http.NewRequest("GET", urlbase, nil) + req.Header.Set("User-Agent", agent) + + resp, err := client.Do(req) if err != nil { log.Println(err) } @@ -87,7 +91,14 @@ func getGroups(urlbase string) (groups []string, err error) { } retstr := string(pagebody) - body := gre1.FindStringSubmatch(retstr)[1] + var body string + res := gre1.FindStringSubmatch(retstr) + if len(res) > 0 { + body = res[1] + } else { + log.Printf("error getting body for url %s\n", urlbase) + return + } htmlpage, err := htmlquery.Parse(strings.NewReader(body)) if err != nil { @@ -108,9 +119,11 @@ func getGroups(urlbase string) (groups []string, err error) { // getBody fetch html page func getBody(url string) (string, error) { - var client http.Client + client := &http.Client{} + req, _ := http.NewRequest("GET", url, nil) + req.Header.Set("User-Agent", agent) - resp, err := client.Get(url) + resp, err := client.Do(req) if err != nil { return "", err }