hotfix: site scraping issues
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
59704119e1
commit
b1f0c375c6
@ -43,8 +43,8 @@ func (config *Config) GetConfig() error {
|
|||||||
config.DbPassword = qrzsection.Key("db_password").MustString("password")
|
config.DbPassword = qrzsection.Key("db_password").MustString("password")
|
||||||
config.Cron = qrzsection.Key("cron").MustString("@every 1h")
|
config.Cron = qrzsection.Key("cron").MustString("@every 1h")
|
||||||
|
|
||||||
config.URLBase = `http://groupe-frs.hamstation.eu/index_qrz_liste_%s.php`
|
config.URLBase = `https://groupe-frs.hamstation.eu/index_qrz_liste_%s.php`
|
||||||
config.URLBaseForGroups = "http://groupe-frs.hamstation.eu/bdd/menu_listing_division.php"
|
config.URLBaseForGroups = "https://groupe-frs.hamstation.eu/bdd/menu_listing_division.php"
|
||||||
|
|
||||||
config.QrzGroups = []string{}
|
config.QrzGroups = []string{}
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const gxpath = "//center/form/select/*"
|
const gxpath = "//center/form/select/*"
|
||||||
|
const agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
var frsre = regexp.MustCompile(`^ [0-9]{1,4}\s[A-Z]{1,5}\s[0-9]{1,5}`)
|
var frsre = regexp.MustCompile(`^ [0-9]{1,4}\s[A-Z]{1,5}\s[0-9]{1,5}`)
|
||||||
var gre1 = regexp.MustCompile(`.*document.write\('(.*)'\).*`)
|
var gre1 = regexp.MustCompile(`.*document.write\('(.*)'\).*`)
|
||||||
@ -76,8 +77,11 @@ func Run(config config.Config) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getGroups(urlbase string) (groups []string, err error) {
|
func getGroups(urlbase string) (groups []string, err error) {
|
||||||
clt := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := clt.Get(urlbase)
|
req, _ := http.NewRequest("GET", urlbase, nil)
|
||||||
|
req.Header.Set("User-Agent", agent)
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
}
|
}
|
||||||
@ -87,7 +91,14 @@ func getGroups(urlbase string) (groups []string, err error) {
|
|||||||
}
|
}
|
||||||
retstr := string(pagebody)
|
retstr := string(pagebody)
|
||||||
|
|
||||||
body := gre1.FindStringSubmatch(retstr)[1]
|
var body string
|
||||||
|
res := gre1.FindStringSubmatch(retstr)
|
||||||
|
if len(res) > 0 {
|
||||||
|
body = res[1]
|
||||||
|
} else {
|
||||||
|
log.Printf("error getting body for url %s\n", urlbase)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
htmlpage, err := htmlquery.Parse(strings.NewReader(body))
|
htmlpage, err := htmlquery.Parse(strings.NewReader(body))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -108,9 +119,11 @@ func getGroups(urlbase string) (groups []string, err error) {
|
|||||||
|
|
||||||
// getBody fetch html page
|
// getBody fetch html page
|
||||||
func getBody(url string) (string, error) {
|
func getBody(url string) (string, error) {
|
||||||
var client http.Client
|
client := &http.Client{}
|
||||||
|
req, _ := http.NewRequest("GET", url, nil)
|
||||||
|
req.Header.Set("User-Agent", agent)
|
||||||
|
|
||||||
resp, err := client.Get(url)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user