From 674b9d8d075a0f913b9ab1fe7ae2b289a258edf7 Mon Sep 17 00:00:00 2001 From: Tom Wiesing Date: Thu, 4 Apr 2024 13:52:15 +0200 Subject: [PATCH] Add initial triplestore rebuild functionality --- cmd/rebuild_ts.go | 35 +++++ cmd/wdcli/main.go | 1 + internal/dis/component/triplestore/backup.go | 3 +- .../dis/component/triplestore/database.go | 122 +++++++++++------- .../dis/component/triplestore/provision.go | 6 +- internal/dis/component/triplestore/restore.go | 28 ++++ .../dis/component/triplestore/snapshot.go | 6 +- internal/dis/component/triplestore/update.go | 6 +- .../wisski/ingredient/php/extras/prefixes.go | 4 +- internal/wisski/ingredient/trb/trb.go | 91 +++++++++++++ internal/wisski/wisski.go | 6 + 11 files changed, 252 insertions(+), 56 deletions(-) create mode 100644 cmd/rebuild_ts.go create mode 100644 internal/dis/component/triplestore/restore.go create mode 100644 internal/wisski/ingredient/trb/trb.go diff --git a/cmd/rebuild_ts.go b/cmd/rebuild_ts.go new file mode 100644 index 0000000..abd572e --- /dev/null +++ b/cmd/rebuild_ts.go @@ -0,0 +1,35 @@ +package cmd + +import ( + wisski_distillery "github.com/FAU-CDI/wisski-distillery" + "github.com/FAU-CDI/wisski-distillery/internal/cli" +) + +// RebuildTS is the 'rebuild_ts' setting +var RebuildTS wisski_distillery.Command = rebuildTS{} + +type rebuildTS struct { + AllowEmptyRepository bool `short:"a" long:"allow-empty" description:"don't abort if repository is empty"` + Positionals struct { + Slug string `positional-arg-name:"SLUG" required:"1-1" description:"slug of instance to rebuild triplestore for"` + } `positional-args:"true"` +} + +func (rebuildTS) Description() wisski_distillery.Description { + return wisski_distillery.Description{ + Requirements: cli.Requirements{ + NeedsDistillery: true, + }, + Command: "rebuild_ts", + Description: "rebuild the triplestore for a specific instance", + } +} + +func (rts rebuildTS) Run(context wisski_distillery.Context) (err error) { + instance, err := context.Environment.Instances().WissKI(context.Context, rts.Positionals.Slug) + if err != nil { + return err + } + + return instance.TRB().DoSomething(context.Context, context.Stdout, rts.AllowEmptyRepository) +} diff --git a/cmd/wdcli/main.go b/cmd/wdcli/main.go index 97f07ca..1735b6c 100644 --- a/cmd/wdcli/main.go +++ b/cmd/wdcli/main.go @@ -58,6 +58,7 @@ func init() { // backup & cron wdcli.Register(cmd.Snapshot) + wdcli.Register(cmd.RebuildTS) wdcli.Register(cmd.Backup) wdcli.Register(cmd.Cron) wdcli.Register(cmd.Monday) diff --git a/internal/dis/component/triplestore/backup.go b/internal/dis/component/triplestore/backup.go index fa8f010..8fc06a2 100644 --- a/internal/dis/component/triplestore/backup.go +++ b/internal/dis/component/triplestore/backup.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "io" + "net/http" "github.com/FAU-CDI/wisski-distillery/internal/dis/component" ) @@ -32,7 +33,7 @@ func (ts *Triplestore) Backup(scontext *component.StagingContext) error { } func (ts Triplestore) listRepositories(ctx context.Context) (repos []Repository, err error) { - res, err := ts.OpenRaw(ctx, "GET", "/rest/repositories", nil, "", "application/json", 0) + res, err := ts.DoRest(ctx, 0, http.MethodGet, "/rest/repositories", &RequestHeaders{Accept: "application/json"}) if err != nil { return nil, err } diff --git a/internal/dis/component/triplestore/database.go b/internal/dis/component/triplestore/database.go index da8f27f..a4ac824 100644 --- a/internal/dis/component/triplestore/database.go +++ b/internal/dis/component/triplestore/database.go @@ -31,43 +31,71 @@ type TriplestoreUserAppSettings struct { // This includes e.g. CRUDing a specific repo. const tsTrivialTimeout = time.Minute -// OpenRaw makes an http request to the triplestore api. -// -// When bodyName is non-empty, expect body to be a byte slice representing a multipart/form-data upload with the given name. -// When bodyName is empty, simply marshal body as application/json -func (ts Triplestore) OpenRaw(ctx context.Context, method, url string, body any, bodyName string, accept string, timeout time.Duration) (*http.Response, error) { - var reader io.Reader // to read the body from - var contentType string // content-type of the request being sent +// RequestHeaders represent headers of a raw http request +type RequestHeaders struct { + Accept string + ContentType string +} - // for "PUT" and "POST" we setup a body - if method == http.MethodPut || method == http.MethodPost { - if bodyName != "" { - var buffer bytes.Buffer +func (rh *RequestHeaders) With(headers RequestHeaders) *RequestHeaders { - // write the file to it - writer := multipart.NewWriter(&buffer) - { - part, err := writer.CreateFormFile(bodyName, "filename.txt") - if err != nil { - return nil, err - } - io.Copy(part, bytes.NewReader(body.([]byte))) - } - writer.Close() - - // use it for the request - reader = &buffer - contentType = writer.FormDataContentType() - } else { - mbytes, err := json.Marshal(body) - if err != nil { - return nil, err - } - reader = bytes.NewReader(mbytes) - contentType = "application/json" - } + // create new request headers and copy the old options + var newHeaders RequestHeaders + if rh != nil { + newHeaders = *rh } + // add the options + if headers.Accept != "" { + newHeaders.Accept = headers.Accept + } + + if headers.ContentType != "" { + newHeaders.ContentType = headers.ContentType + } + + return &newHeaders +} + +// DoRest performs a (raw) http request to the without a body. +func (ts Triplestore) DoRest(ctx context.Context, timeout time.Duration, method, url string, headers *RequestHeaders) (*http.Response, error) { + return ts.DoRestWithReader(ctx, timeout, method, url, headers, nil) +} + +// DoRestWithForm performs a http request where the body are all bytes read from fieldvalue. +func (ts Triplestore) DoRestWithForm(ctx context.Context, timeout time.Duration, method, url string, headers *RequestHeaders, fieldname string, fieldvalue io.Reader) (*http.Response, error) { + var buffer bytes.Buffer + + // write the file to it + writer := multipart.NewWriter(&buffer) + { + part, err := writer.CreateFormFile(fieldname, "filename.txt") + if err != nil { + return nil, err + } + io.Copy(part, fieldvalue) + } + writer.Close() + + // and sent the reader as the body + return ts.DoRestWithReader(ctx, timeout, method, url, headers.With(RequestHeaders{ContentType: writer.FormDataContentType()}), &buffer) +} + +// DoRestWithReader performs a http request where the body is copied from the given io.Reader. +// The caller must ensure the reader is closed. +func (ts Triplestore) DoRestWithMarshal(ctx context.Context, timeout time.Duration, method, url string, headers *RequestHeaders, body any) (*http.Response, error) { + // encode into a buffer + var buffer bytes.Buffer + if err := json.NewEncoder(&buffer).Encode(body); err != nil { + return nil, err + } + + return ts.DoRestWithReader(ctx, timeout, method, url, headers.With(RequestHeaders{ContentType: "application/json"}), &buffer) +} + +// DoRestWithReader performs a http request where the body is copied from the given io.Reader. +// The caller must ensure the reader is closed. +func (ts Triplestore) DoRestWithReader(ctx context.Context, timeout time.Duration, method string, url string, headers *RequestHeaders, body io.Reader) (*http.Response, error) { // create the request object client := &http.Client{ Timeout: timeout, @@ -75,20 +103,22 @@ func (ts Triplestore) OpenRaw(ctx context.Context, method, url string, body any, DisableKeepAlives: true, }, } - req, err := http.NewRequestWithContext(ctx, method, ts.BaseURL+url, reader) + + // create the request and authentication + req, err := http.NewRequestWithContext(ctx, method, ts.BaseURL+url, body) if err != nil { return nil, err } - - // Setup configuration! - if accept != "" { - req.Header.Set("Accept", accept) - } - if contentType != "" { - req.Header.Set("Content-Type", contentType) - } req.SetBasicAuth(ts.Config.TS.AdminUsername, ts.Config.TS.AdminPassword) + // add extra headers + if headers != nil && headers.Accept != "" { + req.Header.Set("Accept", headers.Accept) + } + if headers != nil && headers.ContentType != "" { + req.Header.Set("Content-Type", headers.ContentType) + } + // and send it return client.Do(req) } @@ -97,7 +127,7 @@ func (ts Triplestore) OpenRaw(ctx context.Context, method, url string, body any, // This is achieved using a polling strategy. func (ts Triplestore) Wait(ctx context.Context) error { return timex.TickUntilFunc(func(time.Time) bool { - res, err := ts.OpenRaw(ctx, "GET", "/rest/repositories", nil, "", "", tsTrivialTimeout) + res, err := ts.DoRest(ctx, tsTrivialTimeout, http.MethodGet, "/rest/repositories", nil) zerolog.Ctx(ctx).Trace().Err(err).Msg("Triplestore wait") if err != nil { return false @@ -110,10 +140,11 @@ func (ts Triplestore) Wait(ctx context.Context) error { // PurgeUser deletes the specified user from the triplestore. // When the user does not exist, returns no error. func (ts Triplestore) PurgeUser(ctx context.Context, user string) error { - res, err := ts.OpenRaw(ctx, "DELETE", "/rest/security/users/"+user, nil, "", "", tsTrivialTimeout) + res, err := ts.DoRest(ctx, tsTrivialTimeout, http.MethodDelete, "/rest/security/users/"+user, nil) if err != nil { return err } + defer res.Body.Close() if res.StatusCode != http.StatusNoContent && res.StatusCode != http.StatusNotFound { return errors.Errorf("Delete returned code %d", res.StatusCode) } @@ -123,10 +154,11 @@ func (ts Triplestore) PurgeUser(ctx context.Context, user string) error { // PurgeRepo deletes the specified repo from the triplestore. // When the repo does not exist, returns no error. func (ts Triplestore) PurgeRepo(ctx context.Context, repo string) error { - res, err := ts.OpenRaw(ctx, "DELETE", "/rest/repositories/"+repo, nil, "", "", tsTrivialTimeout) + res, err := ts.DoRest(ctx, tsTrivialTimeout, http.MethodDelete, "/rest/repositories/"+repo, nil) if err != nil { return err } + defer res.Body.Close() if res.StatusCode != http.StatusOK && res.StatusCode != http.StatusNotFound { return errors.Errorf("Delete returned code %d", res.StatusCode) } diff --git a/internal/dis/component/triplestore/provision.go b/internal/dis/component/triplestore/provision.go index 7468c87..5466df9 100644 --- a/internal/dis/component/triplestore/provision.go +++ b/internal/dis/component/triplestore/provision.go @@ -61,7 +61,7 @@ func (ts *Triplestore) CreateRepository(ctx context.Context, name, domain, user, // do the create! { - res, err := ts.OpenRaw(ctx, "POST", "/rest/repositories", createRepo.Bytes(), "config", "", tsTrivialTimeout) + res, err := ts.DoRestWithForm(ctx, tsTrivialTimeout, http.MethodPost, "/rest/repositories", nil, "config", &createRepo) if err != nil { return errTripleStoreFailedRepository.WithMessageF(err) } @@ -73,7 +73,7 @@ func (ts *Triplestore) CreateRepository(ctx context.Context, name, domain, user, // create the user and grant them access { - res, err := ts.OpenRaw(ctx, "POST", "/rest/security/users/"+user, TriplestoreUserPayload{ + res, err := ts.DoRestWithMarshal(ctx, tsTrivialTimeout, http.MethodPost, "/rest/security/users/"+user, nil, TriplestoreUserPayload{ Password: password, AppSettings: TriplestoreUserAppSettings{ DefaultInference: true, @@ -87,7 +87,7 @@ func (ts *Triplestore) CreateRepository(ctx context.Context, name, domain, user, "READ_REPO_" + name, "WRITE_REPO_" + name, }, - }, "", "", tsTrivialTimeout) + }) if err != nil { return errTripleStoreFailedRepository.WithMessageF(err) } diff --git a/internal/dis/component/triplestore/restore.go b/internal/dis/component/triplestore/restore.go new file mode 100644 index 0000000..7e8a093 --- /dev/null +++ b/internal/dis/component/triplestore/restore.go @@ -0,0 +1,28 @@ +package triplestore + +import ( + "context" + "fmt" + "io" + "net/http" + + "github.com/pkg/errors" +) + +var errTSRestoreWrongStatusCode = errors.New("Triplestore.Restore: Wrong status code") + +// RestoreDB snapshots the provided repository into dst +func (ts Triplestore) RestoreDB(ctx context.Context, repo string, reader io.Reader) error { + // submit the form + res, err := ts.DoRestWithReader(ctx, 0, http.MethodPut, "/repositories/"+repo+"/statements", &RequestHeaders{ContentType: nquadsContentType}, reader) + if err != nil { + return err + } + defer res.Body.Close() + + if res.StatusCode != http.StatusNoContent { + message, _ := io.ReadAll(res.Body) + return fmt.Errorf("%w: %s", errTSRestoreWrongStatusCode, message) + } + return nil +} diff --git a/internal/dis/component/triplestore/snapshot.go b/internal/dis/component/triplestore/snapshot.go index 2c8da6c..9d1567e 100644 --- a/internal/dis/component/triplestore/snapshot.go +++ b/internal/dis/component/triplestore/snapshot.go @@ -25,15 +25,17 @@ func (ts *Triplestore) Snapshot(wisski models.Instance, scontext *component.Stag var errTSBackupWrongStatusCode = errors.New("Triplestore.Backup: Wrong status code") +const nquadsContentType = "text/x-nquads" + // SnapshotDB snapshots the provided repository into dst func (ts Triplestore) SnapshotDB(ctx context.Context, dst io.Writer, repo string) (int64, error) { - res, err := ts.OpenRaw(ctx, "GET", "/repositories/"+repo+"/statements?infer=false", nil, "", "application/n-quads", 0) + res, err := ts.DoRest(ctx, 0, http.MethodGet, "/repositories/"+repo+"/statements?infer=false", &RequestHeaders{Accept: nquadsContentType}) if err != nil { return 0, err } + defer res.Body.Close() if res.StatusCode != http.StatusOK { return 0, errTSBackupWrongStatusCode } - defer res.Body.Close() return io.Copy(dst, res.Body) } diff --git a/internal/dis/component/triplestore/update.go b/internal/dis/component/triplestore/update.go index 7ca7a3b..3af4437 100644 --- a/internal/dis/component/triplestore/update.go +++ b/internal/dis/component/triplestore/update.go @@ -20,7 +20,7 @@ func (ts Triplestore) Update(ctx context.Context, progress io.Writer) error { logging.LogMessage(progress, "Resetting admin user password") { - res, err := ts.OpenRaw(ctx, "PUT", "/rest/security/users/"+ts.Config.TS.AdminUsername, TriplestoreUserPayload{ + res, err := ts.DoRestWithMarshal(ctx, tsTrivialTimeout, http.MethodPut, "/rest/security/users/"+ts.Config.TS.AdminUsername, nil, TriplestoreUserPayload{ Password: ts.Config.TS.AdminPassword, AppSettings: TriplestoreUserAppSettings{ DefaultInference: true, @@ -30,7 +30,7 @@ func (ts Triplestore) Update(ctx context.Context, progress io.Writer) error { ExecuteCount: true, }, GrantedAuthorities: []string{"ROLE_ADMIN"}, - }, "", "", tsTrivialTimeout) + }) if err != nil { return fmt.Errorf("failed to create triplestore user: %s", err) } @@ -52,7 +52,7 @@ func (ts Triplestore) Update(ctx context.Context, progress io.Writer) error { logging.LogMessage(progress, "Enabling Triplestore security") { - res, err := ts.OpenRaw(ctx, "POST", "/rest/security", true, "", "", tsTrivialTimeout) + res, err := ts.DoRestWithMarshal(ctx, tsTrivialTimeout, http.MethodPost, "/rest/security", nil, true) if err != nil { return fmt.Errorf("failed to enable triplestore security: %s", err) } diff --git a/internal/wisski/ingredient/php/extras/prefixes.go b/internal/wisski/ingredient/php/extras/prefixes.go index 76615ce..1c9b9a5 100644 --- a/internal/wisski/ingredient/php/extras/prefixes.go +++ b/internal/wisski/ingredient/php/extras/prefixes.go @@ -48,7 +48,7 @@ var listURIPrefixesPHP string // server is an optional server to fetch prefixes from. // server may be nil. func (prefixes *Prefixes) All(ctx context.Context, server *phpx.Server) ([]string, error) { - uris, err := prefixes.database(ctx, server) + uris, err := prefixes.triplestore(ctx, server) if err != nil { return nil, err } @@ -61,7 +61,7 @@ func (prefixes *Prefixes) All(ctx context.Context, server *phpx.Server) ([]strin return append(uris, uris2...), nil } -func (wisski *Prefixes) database(ctx context.Context, server *phpx.Server) (prefixes []string, err error) { +func (wisski *Prefixes) triplestore(ctx context.Context, server *phpx.Server) (prefixes []string, err error) { // get all the ugly prefixes err = wisski.dependencies.PHP.ExecScript(ctx, server, &prefixes, listURIPrefixesPHP, "list_prefixes") if err != nil { diff --git a/internal/wisski/ingredient/trb/trb.go b/internal/wisski/ingredient/trb/trb.go new file mode 100644 index 0000000..f39d518 --- /dev/null +++ b/internal/wisski/ingredient/trb/trb.go @@ -0,0 +1,91 @@ +package trb + +import ( + "context" + "errors" + "fmt" + "io" + "os" + + "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient" + "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient/barrel" + "github.com/FAU-CDI/wisski-distillery/pkg/logging" +) + +type TRB struct { + ingredient.Base + + dependencies struct { + Barrel *barrel.Barrel + } +} + +func (trb *TRB) DoSomething(ctx context.Context, out io.Writer, allowEmptyRepository bool) (err error) { + + // stop instance, restart when done + logging.LogMessage(out, "Shutting down instance") + if err := trb.dependencies.Barrel.Stack().Down(ctx, out); err != nil { + return err + } + + defer func() { + logging.LogMessage(out, "Restarting instance") + e := trb.dependencies.Barrel.Stack().Up(ctx, out) + if err == nil { + err = e + } + }() + + // make the backup + logging.LogMessage(out, "Dumping triplestore") + path, err := trb.makeBackup(ctx, allowEmptyRepository) + if err != nil { + return err + } + fmt.Printf("Wrote %q\n", path) + + logging.LogMessage(out, "Purging triplestore") + if err := trb.Malt.TS.Purge(ctx, trb.Instance, trb.Domain()); err != nil { + return err + } + + logging.LogMessage(out, "Provising triplestore") + if err := trb.Malt.TS.Provision(ctx, trb.Instance, trb.Domain()); err != nil { + return err + } + + logging.LogMessage(out, "Loading dump file") + content, err := os.Open(path) + if err != nil { + return err + } + defer content.Close() + + logging.LogMessage(out, "Restoring triplestore") + if err := trb.Malt.TS.RestoreDB(ctx, trb.GraphDBRepository, content); err != nil { + return err + } + + return +} + +var errBackupEmpty = errors.New("no data contained in backup file (is the repository empty?)") + +func (trb *TRB) makeBackup(ctx context.Context, allowEmptyRepository bool) (path string, err error) { + f, err := os.CreateTemp("", "") + if err != nil { + return "", err + } + defer f.Close() + + count, err := trb.Malt.TS.SnapshotDB(ctx, f, trb.GraphDBRepository) + if err != nil { + return "", err + } + + if count == 0 && !allowEmptyRepository { + return "", errBackupEmpty + } + + return f.Name(), nil +} diff --git a/internal/wisski/wisski.go b/internal/wisski/wisski.go index 716d0ae..3c509a8 100644 --- a/internal/wisski/wisski.go +++ b/internal/wisski/wisski.go @@ -19,6 +19,7 @@ import ( "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient/php/extras" "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient/php/users" "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient/reserve" + "github.com/FAU-CDI/wisski-distillery/internal/wisski/ingredient/trb" "github.com/FAU-CDI/wisski-distillery/internal/wisski/liquid" "github.com/tkw1536/pkglib/lifetime" ) @@ -70,6 +71,10 @@ func (wisski *WissKI) Barrel() *barrel.Barrel { return export[*barrel.Barrel](wisski) } +func (wisski *WissKI) TRB() *trb.TRB { + return export[*trb.TRB](wisski) +} + func (wisski *WissKI) Manager() *manager.Manager { return export[*manager.Manager](wisski) } @@ -183,4 +188,5 @@ func (wisski *WissKI) allIngredients(context *lifetime.Registry[ingredient.Ingre lifetime.Place[*reserve.Reserve](context) lifetime.Place[*ssh.SSH](context) + lifetime.Place[*trb.TRB](context) }