2016-05-01 06:00:43 +02:00
|
|
|
package cache
|
2016-04-26 22:12:46 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/md5"
|
|
|
|
"encoding/hex"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log"
|
2016-05-02 17:32:44 +02:00
|
|
|
"mime"
|
|
|
|
"net/http"
|
2016-04-26 22:12:46 +02:00
|
|
|
"os"
|
|
|
|
"path"
|
2016-05-02 17:32:44 +02:00
|
|
|
"strconv"
|
2016-04-26 22:12:46 +02:00
|
|
|
"sync"
|
2016-05-02 17:32:44 +02:00
|
|
|
"time"
|
2016-04-26 22:12:46 +02:00
|
|
|
)
|
|
|
|
|
2016-05-01 06:00:43 +02:00
|
|
|
// Reader is a generic interface for reading cache entries either from disk or
|
|
|
|
// directly attached to a downloader.
|
|
|
|
type Reader interface {
|
|
|
|
io.ReadCloser
|
|
|
|
GetEntry() (*Entry, error)
|
|
|
|
}
|
|
|
|
|
2016-04-26 22:12:46 +02:00
|
|
|
// Cache provides access to entries in the cache.
|
|
|
|
type Cache struct {
|
|
|
|
mutex sync.Mutex
|
|
|
|
directory string
|
2016-05-01 06:00:43 +02:00
|
|
|
downloaders map[string]*downloader
|
2016-04-26 22:12:46 +02:00
|
|
|
waitGroup sync.WaitGroup
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewCache creates a new cache in the specified directory.
|
2016-04-27 00:15:31 +02:00
|
|
|
func NewCache(directory string) (*Cache, error) {
|
|
|
|
if err := os.MkdirAll(directory, 0775); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-04-26 22:12:46 +02:00
|
|
|
return &Cache{
|
|
|
|
directory: directory,
|
2016-05-01 06:00:43 +02:00
|
|
|
downloaders: make(map[string]*downloader),
|
2016-04-27 00:15:31 +02:00
|
|
|
}, nil
|
2016-04-26 22:12:46 +02:00
|
|
|
}
|
|
|
|
|
2016-05-02 17:32:44 +02:00
|
|
|
// getFilenames returns the filenames for the JSON and data files from a URL.
|
|
|
|
func (c *Cache) getFilenames(rawurl string) (hash, jsonFilename, dataFilename string) {
|
|
|
|
b := md5.Sum([]byte(rawurl))
|
|
|
|
hash = hex.EncodeToString(b[:])
|
|
|
|
jsonFilename = path.Join(c.directory, fmt.Sprintf("%s.json", hash))
|
|
|
|
dataFilename = path.Join(c.directory, fmt.Sprintf("%s.data", hash))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2016-05-01 06:00:43 +02:00
|
|
|
// GetReader obtains a Reader for the specified rawurl. If a downloader
|
2016-04-26 22:12:46 +02:00
|
|
|
// currently exists for the URL, a live reader is created and connected to it.
|
|
|
|
// If the URL exists in the cache, it is read using the standard file API. If
|
2016-05-07 07:33:11 +02:00
|
|
|
// not, a downloader and live reader are created.
|
|
|
|
func (c *Cache) GetReader(rawurl string, maxAge time.Duration) (Reader, error) {
|
2016-05-02 17:32:44 +02:00
|
|
|
hash, jsonFilename, dataFilename := c.getFilenames(rawurl)
|
2016-04-26 22:12:46 +02:00
|
|
|
c.mutex.Lock()
|
|
|
|
defer c.mutex.Unlock()
|
|
|
|
d, ok := c.downloaders[hash]
|
|
|
|
if !ok {
|
|
|
|
_, err := os.Stat(jsonFilename)
|
|
|
|
if err != nil {
|
|
|
|
if !os.IsNotExist(err) {
|
2016-05-01 06:00:43 +02:00
|
|
|
return nil, err
|
2016-04-26 22:12:46 +02:00
|
|
|
}
|
2016-05-07 07:33:11 +02:00
|
|
|
} else {
|
2016-05-01 06:00:43 +02:00
|
|
|
r, err := newDiskReader(jsonFilename, dataFilename)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-04-26 22:12:46 +02:00
|
|
|
}
|
2016-05-07 07:33:11 +02:00
|
|
|
e, _ := r.GetEntry()
|
|
|
|
lastModified, _ := time.Parse(http.TimeFormat, e.LastModified)
|
|
|
|
if lastModified.Before(time.Now().Add(maxAge)) || e.Complete {
|
2016-04-26 22:12:46 +02:00
|
|
|
log.Println("[HIT]", rawurl)
|
2016-05-01 06:00:43 +02:00
|
|
|
return r, nil
|
2016-04-26 22:12:46 +02:00
|
|
|
}
|
|
|
|
}
|
2016-05-01 06:00:43 +02:00
|
|
|
d = newDownloader(rawurl, jsonFilename, dataFilename)
|
2016-04-26 22:12:46 +02:00
|
|
|
go func() {
|
2016-05-01 06:00:43 +02:00
|
|
|
d.WaitForDone()
|
2016-04-26 22:12:46 +02:00
|
|
|
c.mutex.Lock()
|
|
|
|
defer c.mutex.Unlock()
|
|
|
|
delete(c.downloaders, hash)
|
|
|
|
c.waitGroup.Done()
|
|
|
|
}()
|
|
|
|
c.downloaders[hash] = d
|
|
|
|
c.waitGroup.Add(1)
|
|
|
|
}
|
|
|
|
log.Println("[MISS]", rawurl)
|
2016-05-01 06:00:43 +02:00
|
|
|
return newLiveReader(d, dataFilename)
|
2016-04-26 22:12:46 +02:00
|
|
|
}
|
|
|
|
|
2016-05-02 17:32:44 +02:00
|
|
|
// Insert adds an item into the cache.
|
|
|
|
func (c *Cache) Insert(rawurl string, r io.Reader) error {
|
|
|
|
_, jsonFilename, dataFilename := c.getFilenames(rawurl)
|
|
|
|
f, err := os.Open(dataFilename)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
n, err := io.Copy(f, r)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
e := &Entry{
|
|
|
|
URL: rawurl,
|
|
|
|
Complete: true,
|
|
|
|
ContentLength: strconv.FormatInt(n, 10),
|
|
|
|
ContentType: mime.TypeByExtension(rawurl),
|
|
|
|
LastModified: time.Now().Format(http.TimeFormat),
|
|
|
|
}
|
|
|
|
return e.Save(jsonFilename)
|
|
|
|
}
|
|
|
|
|
2016-04-26 22:12:46 +02:00
|
|
|
// TODO: implement some form of "safe abort" for downloads so that the entire
|
2016-05-02 17:32:44 +02:00
|
|
|
// application doesn't end up spinning its tires waiting for downloads to end.
|
2016-04-26 22:12:46 +02:00
|
|
|
|
|
|
|
// Close waits for all downloaders to complete before shutting down.
|
|
|
|
func (c *Cache) Close() {
|
|
|
|
c.waitGroup.Wait()
|
|
|
|
}
|