hotfix: site scraping issues
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Paul 2024-04-20 09:35:30 +02:00
parent bf3be98a9a
commit 0fa2d41f52
2 changed files with 20 additions and 7 deletions

View File

@ -43,8 +43,8 @@ func (config *Config) GetConfig() error {
config.DbPassword = qrzsection.Key("db_password").MustString("password")
config.Cron = qrzsection.Key("cron").MustString("@every 1h")
config.URLBase = `http://groupe-frs.hamstation.eu/index_qrz_liste_%s.php`
config.URLBaseForGroups = "http://groupe-frs.hamstation.eu/bdd/menu_listing_division.php"
config.URLBase = `https://groupe-frs.hamstation.eu/index_qrz_liste_%s.php`
config.URLBaseForGroups = "https://groupe-frs.hamstation.eu/bdd/menu_listing_division.php"
config.QrzGroups = []string{}

View File

@ -15,6 +15,7 @@ import (
)
const gxpath = "//center/form/select/*"
const agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
var frsre = regexp.MustCompile(`^ [0-9]{1,4}\s[A-Z]{1,5}\s[0-9]{1,5}`)
var gre1 = regexp.MustCompile(`.*document.write\('(.*)'\).*`)
@ -76,8 +77,11 @@ func Run(config config.Config) (err error) {
}
func getGroups(urlbase string) (groups []string, err error) {
clt := &http.Client{}
resp, err := clt.Get(urlbase)
client := &http.Client{}
req, _ := http.NewRequest("GET", urlbase, nil)
req.Header.Set("User-Agent", agent)
resp, err := client.Do(req)
if err != nil {
log.Println(err)
}
@ -87,7 +91,14 @@ func getGroups(urlbase string) (groups []string, err error) {
}
retstr := string(pagebody)
body := gre1.FindStringSubmatch(retstr)[1]
var body string
res := gre1.FindStringSubmatch(retstr)
if len(res) > 0 {
body = res[1]
} else {
log.Printf("error getting body for url %s\n", urlbase)
return
}
htmlpage, err := htmlquery.Parse(strings.NewReader(body))
if err != nil {
@ -108,9 +119,11 @@ func getGroups(urlbase string) (groups []string, err error) {
// getBody fetch html page
func getBody(url string) (string, error) {
var client http.Client
client := &http.Client{}
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("User-Agent", agent)
resp, err := client.Get(url)
resp, err := client.Do(req)
if err != nil {
return "", err
}