optimized query insertion

This commit is contained in:
Paul 2020-02-29 17:37:01 +01:00
parent e19450d529
commit 6122fa4de3
4 changed files with 73 additions and 34 deletions

View File

@ -3,7 +3,7 @@
## Summary
qrz is a small program that frs identifiers from hamstation.eu to database.
Currently WIP
It supports incremental data insertion
## Howto

View File

@ -15,33 +15,43 @@ func main() {
config.GetConfig()
config.DbSchema = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s` (`id` int(8) NOT NULL AUTO_INCREMENT, `qrz` varchar(25) NOT NULL, `name` varchar(25) DEFAULT NULL, `address` varchar(50) DEFAULT NULL, `city` varchar(50) DEFAULT NULL, `zipcode` varchar(5) DEFAULT NULL, `dept` varchar(50) DEFAULT NULL, `country` varchar(25) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `qrz` (`qrz`,`name`,`city`,`dept`) USING BTREE, KEY `test` (`country`), FULLTEXT KEY `city` (`city`), FULLTEXT KEY `dept` (`dept`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;", config.DbTable)
config.DbInsertStatement = "INSERT IGNORE INTO %s (qrz,name,city,dept,country) VALUES ('%s','%s','%s','%s','%s');"
db, err := sqlx.Connect("mysql", fmt.Sprintf("%s:%s@tcp(%s)/%s", config.DbUsername, config.DbPassword, config.DbHostname, config.DbName))
if err != nil {
log.Fatalln(err)
}
bodyString, err := qrz.GetBody(config)
if err != nil {
log.Fatalln(err)
}
frsPeople, err := qrz.GetFrsEntries(config, bodyString)
if err != nil {
log.Fatalln(err)
}
_, err = db.Exec(config.DbSchema)
if err != nil {
log.Fatalln(err)
}
bodyStr, err := qrz.GetBody(config)
if err != nil {
log.Fatalln(err)
}
frsPeople, err := qrz.GetFrsEntries(config, bodyStr)
if err != nil {
log.Fatalln(err)
}
existingQrz, err := qrz.GetCurrentEntries(config, *db)
if err != nil {
log.Fatalln(err)
}
err = qrz.DiscardExistingEntries(config, &frsPeople, existingQrz)
if err != nil {
log.Fatalln(err)
}
if len(frsPeople) > 0 {
err = qrz.InsertFrsEntryToDB(config, *db, frsPeople)
if err != nil {
log.Fatalln(err)
}
} else {
fmt.Println("No entry to insert in database")
}
}

View File

@ -2,6 +2,7 @@ package config
import (
"flag"
"fmt"
"git.paulbsd.com/paulbsd/qrz/utils"
"gopkg.in/ini.v1"
@ -33,6 +34,14 @@ func (config *Config) GetConfig() error {
config.FrsConfig.XPath = qrzsection.Key("frs_xpath").MustString("//tr")
config.FrsConfig.Regex = qrzsection.Key("frs_regex").MustString(".*")
config.DbSchema = fmt.Sprintf("CREATE TABLE IF NOT EXISTS `%s` (`id` int(8) NOT NULL AUTO_INCREMENT, `qrz` varchar(25) NOT NULL, `name` varchar(25) DEFAULT NULL, `address` varchar(50) DEFAULT NULL, `city` varchar(50) DEFAULT NULL, `zipcode` varchar(5) DEFAULT NULL, `dept` varchar(50) DEFAULT NULL, `country` varchar(25) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `qrz` (`qrz`,`name`,`city`,`dept`) USING BTREE, KEY `test` (`country`), FULLTEXT KEY `city` (`city`), FULLTEXT KEY `dept` (`dept`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;", config.DbTable)
config.DbInsertStatement = "INSERT IGNORE INTO %s (qrz,name,city,dept,country) VALUES ('%s','%s','%s','%s','%s');"
config.DbGetQrzStatement = "SELECT qrz FROM %s;"
config.DbCheckStatement = "SELECT COUNT(*) FROM %s WHERE qrz = '%s';"
return nil
}
@ -45,6 +54,8 @@ type Config struct {
DbSchema string
DbTable string
DbInsertStatement string
DbGetQrzStatement string
DbCheckStatement string
FrsConfig struct {
URL string
XPath string

View File

@ -36,40 +36,58 @@ func GetBody(config config.Config) (string, error) {
}
// GetFrsEntries get FRS entries from html body
func GetFrsEntries(config config.Config, body string) ([]FrsEntry, error) {
var err error
var frsentries []FrsEntry
func GetFrsEntries(config config.Config, body string) (frsentries map[string]FrsEntry, err error) {
frsentries = make(map[string]FrsEntry)
re := regexp.MustCompile(config.FrsConfig.Regex)
htmlpage, err := htmlquery.Parse(strings.NewReader(body))
if err != nil {
return frsentries, err
return
}
for _, n := range htmlquery.Find(htmlpage, config.FrsConfig.XPath) {
var frs FrsEntry
td := htmlquery.Find(n, "//td")
if re.MatchString(htmlquery.InnerText(td[0])) {
frs.QRZ = strings.Replace(htmlquery.InnerText(td[0]), "'", "\\'", -1)
frs.Name = strings.Replace(htmlquery.InnerText(td[1]), "'", "\\'", -1)
frs.City = strings.Replace(htmlquery.InnerText(td[2]), "'", "\\'", -1)
frs.Dept = strings.Replace(htmlquery.InnerText(td[3]), "'", "\\'", -1)
frs.Country = strings.Replace(htmlquery.InnerText(td[4]), "'", "\\'", -1)
frs := FrsEntry{
QRZ: strings.Replace(htmlquery.InnerText(td[0]), "'", "\\'", -1),
Name: strings.Replace(htmlquery.InnerText(td[1]), "'", "\\'", -1),
City: strings.Replace(htmlquery.InnerText(td[2]), "'", "\\'", -1),
Dept: strings.Replace(htmlquery.InnerText(td[3]), "'", "\\'", -1),
Country: strings.Replace(htmlquery.InnerText(td[4]), "'", "\\'", -1)}
frsentries[frs.QRZ] = frs
}
}
return
}
frsentries = append(frsentries, frs)
// GetCurrentEntries fetch existing entries from database
func GetCurrentEntries(config config.Config, db sqlx.DB) (existingQRZ []string, err error) {
q := fmt.Sprintf(config.DbGetQrzStatement, config.DbTable)
rows, err := db.Query(q)
for rows.Next() {
var i string
rows.Scan(&i)
existingQRZ = append(existingQRZ, i)
}
return
}
// DiscardExistingEntries remove existing entries from original map[string]FrsEntry
func DiscardExistingEntries(config config.Config, frsPeople *map[string]FrsEntry, existingQRZ []string) (err error) {
for _, entry := range existingQRZ {
delete(*frsPeople, entry)
}
return frsentries, nil
return
}
// InsertFrsEntryToDB inserts frs entries to database
func InsertFrsEntryToDB(config config.Config, db sqlx.DB, frsPeople []FrsEntry) error {
var err error
func InsertFrsEntryToDB(config config.Config, db sqlx.DB, frsPeople map[string]FrsEntry) (err error) {
tx := db.MustBegin()
var qrzNum int
fmt.Println(fmt.Sprintf("Starting insert %d entries", len(frsPeople)))
fmt.Println(fmt.Sprintf("Starting insert of %d entries", len(frsPeople)))
for _, j := range frsPeople {
query := fmt.Sprintf(config.DbInsertStatement, config.DbTable, j.QRZ, j.Name, j.City, j.Dept, j.Country)
@ -81,7 +99,7 @@ func InsertFrsEntryToDB(config config.Config, db sqlx.DB, frsPeople []FrsEntry)
if err != nil {
return err
}
fmt.Println(fmt.Sprintf("Commited %d entries", qrzNum))
fmt.Println(fmt.Sprintf("Committed %d entries", qrzNum))
return nil
}