internal/component => internal/dis/component

This commit is contained in:
Tom Wiesing 2022-10-18 09:40:37 +02:00
parent 9443217441
commit b5b1ce2340
No known key found for this signature in database
123 changed files with 76 additions and 76 deletions

View file

@ -0,0 +1,44 @@
package triplestore
import (
"encoding/json"
"io"
"github.com/FAU-CDI/wisski-distillery/internal/dis/component"
)
func (ts *Triplestore) BackupName() string { return "triplestore" }
// Backup makes a backup of all Triplestore repositories databases into the path dest.
func (ts *Triplestore) Backup(context component.StagingContext) error {
// list all the directories
repos, err := ts.listRepositories()
if err != nil {
return err
}
// then backup each file separatly
return context.AddDirectory("", func() error {
for _, repo := range repos {
if err := context.AddFile(repo.ID+".nq", func(file io.Writer) error {
_, err := ts.SnapshotDB(file, repo.ID)
return err
}); err != nil {
return err
}
}
return nil
})
}
func (ts Triplestore) listRepositories() (repos []Repository, err error) {
res, err := ts.OpenRaw("GET", "/rest/repositories", nil, "", "application/json")
if err != nil {
return nil, err
}
defer res.Body.Close()
err = json.NewDecoder(res.Body).Decode(&repos)
return
}

View file

@ -0,0 +1,57 @@
# This file is used to initialize a new GraphDB repository.
# In this file the variables ${GRAPHDB_REPO} and ${INSTANCE_DOMAIN} will be replaced.
# All other variables will be left untouched.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
@prefix owlim: <http://www.ontotext.com/trree/owlim#>.
[] a rep:Repository ;
rep:repositoryID "${GRAPHDB_REPO}" ;
rdfs:label "${INSTANCE_DOMAIN}" ;
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository" ;
sr:sailImpl [
sail:sailType "graphdb:Sail" ;
owlim:owlim-license "" ;
owlim:base-URL "http://${INSTANCE_DOMAIN}/" ;
owlim:defaultNS "" ;
owlim:entity-index-size "10000000" ;
owlim:entity-id-size "32" ;
owlim:imports "" ;
owlim:repository-type "file-repository" ;
owlim:ruleset "empty" ;
owlim:storage-folder "storage" ;
owlim:enable-context-index "false" ;
owlim:cache-memory "80m" ;
owlim:tuple-index-memory "80m" ;
owlim:enablePredicateList "false" ;
owlim:predicate-memory "0%" ;
owlim:fts-memory "0%" ;
owlim:ftsIndexPolicy "never" ;
owlim:ftsLiteralsOnly "true" ;
owlim:in-memory-literal-properties "false" ;
owlim:enable-literal-index "true" ;
owlim:index-compression-ratio "-1" ;
owlim:check-for-inconsistencies "false" ;
owlim:disable-sameAs "false" ;
owlim:enable-optimization "true" ;
owlim:transaction-mode "safe" ;
owlim:transaction-isolation "true" ;
owlim:query-timeout "0" ;
owlim:query-limit-results "0" ;
owlim:throw-QueryEvaluationException-on-timeout "false" ;
owlim:useShutdownHooks "true" ;
owlim:read-only "false" ;
owlim:nonInterpretablePredicates "http://www.w3.org/2000/01/rdf-schema#label;http://www.w3.org/1999/02/22-rdf-syntax-ns#type;http://www.ontotext.com/owlim/ces#gazetteerConfig;http://www.ontotext.com/owlim/ces#metadataConfig" ;
]
].

View file

@ -0,0 +1,136 @@
package triplestore
import (
"bytes"
"encoding/json"
"io"
"mime/multipart"
"net/http"
"time"
"github.com/FAU-CDI/wisski-distillery/pkg/timex"
"github.com/pkg/errors"
"github.com/tkw1536/goprogram/stream"
)
type TriplestoreUserPayload struct {
Password string `json:"password"`
AppSettings TriplestoreUserAppSettings `json:"appSettings"`
GrantedAuthorities []string `json:"grantedAuthorities"`
}
type TriplestoreUserAppSettings struct {
DefaultInference bool `json:"DEFAULT_INFERENCE"`
DefaultVisGraphSchema bool `json:"DEFAULT_VIS_GRAPH_SCHEMA"`
DefaultSameas bool `json:"DEFAULT_SAMEAS"`
IgnoreSharedQueries bool `json:"IGNORE_SHARED_QUERIES"`
ExecuteCount bool `json:"EXECUTE_COUNT"`
}
// OpenRaw makes an http request to the triplestore api.
//
// When bodyName is non-empty, expect body to be a byte slice representing a multipart/form-data upload with the given name.
// When bodyName is empty, simply marshal body as application/json
func (ts Triplestore) OpenRaw(method, url string, body interface{}, bodyName string, accept string) (*http.Response, error) {
var reader io.Reader
var contentType string
// for "PUT" and "POST" we setup a body
if method == "PUT" || method == "POST" {
if bodyName != "" {
buffer := &bytes.Buffer{}
writer := multipart.NewWriter(buffer)
contentType = writer.FormDataContentType()
part, err := writer.CreateFormFile(bodyName, "filename.txt")
if err != nil {
return nil, err
}
io.Copy(part, bytes.NewReader(body.([]byte)))
writer.Close()
reader = buffer
} else {
contentType = "application/json"
mbytes, err := json.Marshal(body)
if err != nil {
return nil, err
}
reader = bytes.NewReader(mbytes)
}
}
// create the request object
client := &http.Client{
Transport: &http.Transport{
DialContext: ts.Environment.DialContext,
DisableKeepAlives: true,
},
}
req, err := http.NewRequest(method, ts.BaseURL+url, reader)
if err != nil {
return nil, err
}
// Setup configuration!
if accept != "" {
req.Header.Set("Accept", accept)
}
if contentType != "" {
req.Header.Set("Content-Type", contentType)
}
req.SetBasicAuth(ts.Config.TriplestoreAdminUser, ts.Config.TriplestoreAdminPassword)
// and send it
return client.Do(req)
}
// Wait waits for the connection to the Triplestore to succeed.
// This is achieved using a polling strategy.
func (ts Triplestore) Wait() error {
n := stream.FromNil()
return timex.TickUntilFunc(func(time.Time) bool {
res, err := ts.OpenRaw("GET", "/rest/repositories", nil, "", "")
n.EPrintf("[Triplestore.Wait]: %s\n", err)
if err != nil {
return false
}
defer res.Body.Close()
return true
}, ts.PollContext, ts.PollInterval)
}
// TriplestorePurgeUser deletes the specified user from the triplestore
func (ts Triplestore) PurgeUser(user string) error {
res, err := ts.OpenRaw("DELETE", "/rest/security/users/"+user, nil, "", "")
if err != nil {
return err
}
if res.StatusCode != http.StatusNoContent {
return errors.Errorf("Delete returned code %d", res.StatusCode)
}
return nil
}
// TriplestorePurgeRepo deletes the specified repo from the triplestore
func (ts Triplestore) PurgeRepo(repo string) error {
res, err := ts.OpenRaw("DELETE", "/rest/repositories/"+repo, nil, "", "")
if err != nil {
return err
}
if res.StatusCode != http.StatusOK {
return errors.Errorf("Delete returned code %d", res.StatusCode)
}
return nil
}
type Repository struct {
ID string `json:"id"`
Title string `json:"title"`
URI string `json:"uri"`
Type string `json:"type"`
SesameType string `json:"sesameType"`
Location string `json:"location"`
Readable bool `json:"readable"`
Writable bool `json:"writable"`
Local bool `json:"local"`
}

View file

@ -0,0 +1,88 @@
package triplestore
import (
"bytes"
"net/http"
_ "embed"
"github.com/FAU-CDI/wisski-distillery/internal/models"
"github.com/FAU-CDI/wisski-distillery/pkg/errorx"
"github.com/FAU-CDI/wisski-distillery/pkg/unpack"
"github.com/tkw1536/goprogram/exit"
)
var errTripleStoreFailedRepository = exit.Error{
Message: "Failed to create repository: %s",
ExitCode: exit.ExitGeneric,
}
//go:embed create-repo.ttl
var createRepoTTL []byte
func (ts *Triplestore) Provision(instance models.Instance, domain string) error {
return ts.CreateRepository(instance.GraphDBRepository, domain, instance.GraphDBUsername, instance.GraphDBPassword)
}
func (ts *Triplestore) Purge(instance models.Instance, domain string) error {
return errorx.First(
ts.PurgeRepo(instance.GraphDBRepository),
ts.PurgeUser(instance.GraphDBUsername),
)
}
func (ts *Triplestore) CreateRepository(name, domain, user, password string) error {
if err := ts.Wait(); err != nil {
return err
}
// prepare the create repo request
var createRepo bytes.Buffer
err := unpack.WriteTemplate(&createRepo, map[string]string{
"GRAPHDB_REPO": name,
"INSTANCE_DOMAIN": domain,
}, bytes.NewReader(createRepoTTL))
if err != nil {
return err
}
// do the create!
{
res, err := ts.OpenRaw("POST", "/rest/repositories", createRepo.Bytes(), "config", "")
if err != nil {
return errTripleStoreFailedRepository.WithMessageF(err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusCreated {
return errTripleStoreFailedRepository.WithMessageF("Repo create did not return status code 201")
}
}
// create the user and grant them access
{
res, err := ts.OpenRaw("POST", "/rest/security/users/"+user, TriplestoreUserPayload{
Password: password,
AppSettings: TriplestoreUserAppSettings{
DefaultInference: true,
DefaultVisGraphSchema: true,
DefaultSameas: true,
IgnoreSharedQueries: false,
ExecuteCount: true,
},
GrantedAuthorities: []string{
"ROLE_USER",
"READ_REPO_" + name,
"WRITE_REPO_" + name,
},
}, "", "")
if err != nil {
return errTripleStoreFailedRepository.WithMessageF(err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusCreated {
return errTripleStoreFailedRepository.WithMessageF("User create did not return status code 201")
}
}
return nil
}

View file

@ -0,0 +1,38 @@
package triplestore
import (
"io"
"net/http"
"github.com/FAU-CDI/wisski-distillery/internal/dis/component"
"github.com/FAU-CDI/wisski-distillery/internal/models"
"github.com/pkg/errors"
)
func (Triplestore) SnapshotNeedsRunning() bool { return false }
func (Triplestore) SnapshotName() string { return "triplestore" }
func (ts *Triplestore) Snapshot(wisski models.Instance, context component.StagingContext) error {
return context.AddDirectory(".", func() error {
return context.AddFile(wisski.GraphDBRepository+".nq", func(file io.Writer) error {
_, err := ts.SnapshotDB(file, wisski.GraphDBRepository)
return err
})
})
}
var errTSBackupWrongStatusCode = errors.New("Triplestore.Backup: Wrong status code")
// SnapshotDB snapshots the provided repository into dst
func (ts Triplestore) SnapshotDB(dst io.Writer, repo string) (int64, error) {
res, err := ts.OpenRaw("GET", "/repositories/"+repo+"/statements?infer=false", nil, "", "application/n-quads")
if err != nil {
return 0, err
}
if res.StatusCode != http.StatusOK {
return 0, errTSBackupWrongStatusCode
}
defer res.Body.Close()
return io.Copy(dst, res.Body)
}

View file

@ -0,0 +1 @@
DOCKER_NETWORK_NAME=${DOCKER_NETWORK_NAME}

View file

@ -0,0 +1,52 @@
package triplestore
import (
"context"
"embed"
"path/filepath"
"time"
"github.com/FAU-CDI/wisski-distillery/internal/dis/component"
"github.com/FAU-CDI/wisski-distillery/pkg/environment"
)
type Triplestore struct {
component.Base
BaseURL string // upstream server url
PollContext context.Context // context to abort polling with
PollInterval time.Duration // duration to wait for during wait
}
func (ts *Triplestore) Path() string {
return filepath.Join(ts.Still.Config.DeployRoot, "core", "triplestore")
}
func (Triplestore) Context(parent component.InstallationContext) component.InstallationContext {
return parent
}
//go:embed all:triplestore
//go:embed triplestore.env
var resources embed.FS
func (ts *Triplestore) Stack(env environment.Environment) component.StackWithResources {
return component.MakeStack(ts, env, component.StackWithResources{
Resources: resources,
ContextPath: "triplestore",
CopyContextFiles: []string{"graphdb.zip"}, // TODO: Move into constant?
EnvPath: "triplestore.env",
EnvContext: map[string]string{
"DOCKER_NETWORK_NAME": ts.Config.DockerNetworkName,
},
MakeDirs: []string{
filepath.Join("data", "data"),
filepath.Join("data", "work"),
filepath.Join("data", "logs"),
},
})
}

View file

@ -0,0 +1,3 @@
*
!*.zip
!entrypoint.sh

View file

@ -0,0 +1,64 @@
# This Dockerfile contains instructions to compile and run GraphDB inside a Docker container.
# It is roughly based on https://github.com/Ontotext-AD/graphdb-docker/blob/master/free-edition/Dockerfile
# but has been modified for performance and security.
# This image is intended to be built like:
# docker build --build-arg graphdb_src=graphdb.zip .
# We first make a base image to base further builds on.
# We don't use alpine here, as that uses significantly slower musl instead of glibc.
FROM adoptopenjdk/openjdk11:debian-slim as base
# Create a user called graphdb
RUN useradd -ms /bin/bash graphdb
# make a base images, to add the sources to.
FROM base as sources
# install unzip
RUN apt-get update && apt-get install -y unzip
# add the source file (by default graphdb.zip) to the image
ARG src=graphdb.zip
ADD ${src} /graphdb.zip
# unpack it into a temporary directory
RUN unzip "$src" -d "/unpack/"
# Move it into /opt/graphdb, and chown it to graphdb
RUN mv "/unpack"/* /opt/graphdb
RUN chown -R graphdb:graphdb /opt/graphdb
# finally make an image that will run
FROM base as final
# add the entrypoint script
ADD entrypoint.sh /entrypoint.sh
# copy over the sources
COPY --from=sources /opt/graphdb /opt/graphdb
# set environment variables for graphdb_home and path
ENV GRAPHDB_HOME=/opt/graphdb
ENV PATH=$GRAPHDB_HOME/bin:$PATH
# Workaround for CVE-2021-44228
# (not sure if we are vulnerable, but just because)
ENV LOG4J_FORMAT_MSG_NO_LOOKUPS=true
# expose a port
EXPOSE 7200
# setup a healthcheck, that checks if the server is up.
RUN apt-get update && apt-get install -y curl
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD curl --fail 127.0.0.1:7200/rest/repositories || exit 1
# Add volumes for data, work and logs as these might be accessible from the outside.
# To add your own configuration, manually mount a config file into /opt/graphdb/work
VOLUME /opt/graphdb/data
VOLUME /opt/graphdb/work
VOLUME /opt/graphdb/logs
# setup command and entrypoint
CMD ["-Dgraphdb.home=/opt/graphdb"]
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

View file

@ -0,0 +1,24 @@
version: "3.7"
services:
triplestore:
build: .
ports:
- "127.0.0.1:7200:7200"
volumes:
- './data/data:/opt/graphdb/data'
- './data/work:/opt/graphdb/work'
- './data/logs:/opt/graphdb/logs'
command: "\"-Dgraphdb.home=/opt/graphdb -Ddefault.min.distinct.threshold=2G\""
# Use 1GB of heap space
environment:
GDB_HEAP_SIZE: 16G
labels:
- "eu.wiss-ki.barrel.distillery=${DOCKER_NETWORK_NAME}"
restart: always
networks:
default:
name: ${DOCKER_NETWORK_NAME}
external: true

View file

@ -0,0 +1,13 @@
#!/bin/bash
set -e
# Because we want to run graphdb as a limited user
# we need to make sure that the volumes are writable.
# Because of that, we 'chown'
chown graphdb:graphdb /opt/graphdb/data
chown graphdb:graphdb /opt/graphdb/work
chown graphdb:graphdb /opt/graphdb/logs
# switch to the graphdb user, and run graphdb
su graphdb -c "/opt/graphdb/bin/graphdb $@"

View file

@ -0,0 +1,66 @@
package triplestore
import (
"fmt"
"net/http"
"github.com/FAU-CDI/wisski-distillery/pkg/logging"
"github.com/pkg/errors"
"github.com/tkw1536/goprogram/stream"
)
var errTriplestoreFailedSecurity = errors.New("failed to enable triplestore security: request did not succeed with HTTP 200 OK")
func (ts Triplestore) Update(io stream.IOStream) error {
logging.LogMessage(io, "Waiting for Triplestore")
if err := ts.Wait(); err != nil {
return err
}
logging.LogMessage(io, "Resetting admin user password")
{
res, err := ts.OpenRaw("PUT", "/rest/security/users/"+ts.Config.TriplestoreAdminUser, TriplestoreUserPayload{
Password: ts.Config.TriplestoreAdminPassword,
AppSettings: TriplestoreUserAppSettings{
DefaultInference: true,
DefaultVisGraphSchema: true,
DefaultSameas: true,
IgnoreSharedQueries: false,
ExecuteCount: true,
},
GrantedAuthorities: []string{"ROLE_ADMIN"},
}, "", "")
if err != nil {
return fmt.Errorf("failed to create triplestore user: %s", err)
}
defer res.Body.Close()
switch res.StatusCode {
case http.StatusOK:
// we set the password => requests are unauthorized
// so we still need to enable security (see below!)
case http.StatusUnauthorized:
// a password is needed => security is already enabled.
// the password may or may not work, but that's a problem for later
logging.LogMessage(io, "Security is already enabled")
return nil
default:
return fmt.Errorf("failed to create triplestore user: %s", err)
}
}
logging.LogMessage(io, "Enabling Triplestore security")
{
res, err := ts.OpenRaw("POST", "/rest/security", true, "", "")
if err != nil {
return fmt.Errorf("failed to enable triplestore security: %s", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return errTriplestoreFailedSecurity
}
return nil
}
}