new commit
This commit is contained in:
parent
da296f8a64
commit
e46a9fd4ec
69 changed files with 4199 additions and 4805 deletions
207
21_importArtifacts.py
Normal file
207
21_importArtifacts.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importArtifacts(api, engine):
|
||||
print('Importing artifacts...')
|
||||
|
||||
tableName = "c__obj"
|
||||
bundleId = 'bd30c2c64a3caa8bb1628c780c3f24bb'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load artifacts table
|
||||
artifactsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create artifacts
|
||||
for index, row in artifactsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artifactsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact {artifactsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artifactValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
|
||||
case 'f__5000_obj_dok_nr_':
|
||||
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
|
||||
docId = value[0]
|
||||
case 'f__500n_ngk_nr_':
|
||||
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
|
||||
case 'f__5130_entst_ort':
|
||||
# We map productions place to Production Place Assignment entity.
|
||||
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
|
||||
case 'f__5200_obj_titel':
|
||||
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
|
||||
case 'f__5210_status':
|
||||
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
|
||||
case 'f__5220_gattung':
|
||||
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
|
||||
case 'f__5223_form__attribut':
|
||||
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
|
||||
case 'f__5226_art':
|
||||
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
|
||||
case 'f__523f_funktion':
|
||||
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
|
||||
case 'f__5240_formtyp':
|
||||
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
|
||||
case 'f__524g_gestalt':
|
||||
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
|
||||
case 'f__5362_hoehe':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5364_breite':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5366_tiefe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5368_laenge':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5370_durchmesser':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5380_gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__538h_hist__gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__55ng_darst__schlagw_':
|
||||
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
|
||||
case 'f__5bes_beschreibung':
|
||||
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
|
||||
case 'f__5ges_geschichte':
|
||||
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
|
||||
case 'f__68an_abdruck_nr_':
|
||||
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__stwv_statwerkverz':
|
||||
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
|
||||
case 'f__9990_kommentar':
|
||||
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Production Place Assignment
|
||||
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
|
||||
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
|
||||
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Artifact and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
|
||||
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
|
||||
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
|
||||
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Artifact
|
||||
artifact = Entity(api=api, fields=artifactValues, bundle_id=bundleId)
|
||||
api.save(artifact)
|
||||
|
||||
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finished importing artifacts')
|
||||
Loading…
Add table
Add a link
Reference in a new issue