new commit

This commit is contained in:
rnsrk 2025-09-09 10:16:31 +02:00
parent da296f8a64
commit e46a9fd4ec
69 changed files with 4199 additions and 4805 deletions

2
.gitignore vendored
View file

@ -11,3 +11,5 @@ wisski_py
__pycache__
logs/*
.venv
.env
.vscode

15
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

160
00_start.py Normal file
View file

@ -0,0 +1,160 @@
from importlib import import_module
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
from time import sleep
# Import entities
material_module = import_module("01_importMaterialsAndTechnique")
administrator_module = import_module("02_importAdministrator")
administrator_status_module = import_module("03_importAdministratorStatus")
source_module = import_module("03_importSource")
artist_source_reference_assignment_module = import_module("04_importArtistSourceReferenceAssignment")
marks_module = import_module("04_importMarks")
source_reference_assignment_module = import_module("04_importSourceReferenceAssignment")
artist_module = import_module("05_importArtist")
literature_module = import_module("06_importLiterature")
inspection_mark_module = import_module("07_importInspectionMark")
journal_assignment_module = import_module("07_importJournalAssignment")
literature_reference_assignment_module = import_module("07_importLiteratureReferenceAssignment")
parent_literature_assignment_module = import_module("07_importParentLiteratureAssignment")
inspection_mark_location_module = import_module("08_importInspectionMarkLocation")
inspection_mark_relation_module = import_module("09_importInspectionMarkRelation")
mark_dating_info_module = import_module("10_importMarkDatingInfo")
birth_module = import_module("12_importBirth")
death_module = import_module("13_importDeath")
dating_module = import_module("14_importDating")
goldsmith_relation_module = import_module("15_importGoldsmithRelation")
client_module = import_module("16_importClient")
mentioned_module = import_module("17_importMentioned")
num_dating_module = import_module("18_importNumDating")
origin_assignment_module = import_module("19_importOriginAssignment")
workshops_module = import_module("20_importWorkshops")
artifacts_module = import_module("21_importArtifacts")
artifact_relation_module = import_module("22_importArtifactRelation")
artist_assignment_module = import_module("24_importArtistAssignment")
mark_information_module = import_module("25_importMarkInformation")
photographer_module = import_module("26_importPhotographer")
# Import relations
artifact_to_artist_relation_module = import_module("98__r__importArtifactToArtistRelationRelation")
artifact_to_client_assignment_relation_module = import_module("98__r__importArtifactToClientAssignmentRelation")
artifact_to_inspection_mark_location_relation_module = import_module("98__r__importArtifactToInspectionMarkLocationRelation")
artifact_to_literature_reference_assignment_relation_module = import_module("98__r__importArtifactToLiteratureReferenceAssignmentRelation")
artifact_to_mark_information_assignment_relation_module = import_module("98__r__importArtifactToMarkInformationAssignmentRelation")
artifact_to_material_relation_module = import_module("98__r__importArtifactToMaterialRelation")
artifact_to_numerice_date_relation_module = import_module("98__r__importArtifactToNumericeDateRelation")
artifact_to_photograph_relation_module = import_module("98__r__importArtifactToPhotographRelation")
artifact_to_relation_relation_module = import_module("98__r__importArtifactToRelationRelation")
artifact_to_source_relation_module = import_module("98__r__importArtifactToSourceRelation")
artifact_to_status_administrator_relation_module = import_module("98__r__importArtifactToStatusAdministratorRelation")
artist_to_birth_relation_module = import_module("98__r__importArtistToBirthRelation")
artist_to_death_relation_module = import_module("98__r__importArtistToDeathRelation")
artist_to_goldsmith_relation_module = import_module("98__r__importArtistToGoldsmithRelation")
artist_to_literature_reference_relation_module = import_module("98__r__importArtistToLiteratureReferenceRelation")
artist_to_mentioned_relation_module = import_module("98__r__importArtistToMentionedRelation")
artist_to_origin_relation_module = import_module("98__r__importArtistToOriginRelation")
artist_to_workshop_relation_module = import_module("98__r__importArtistToWorkshopRelation")
inspection_mark_dating_information_assignment_relation_module = import_module("98__r__importInspectionMarkDatingInformationAssignmentRelation")
inspection_mark_relation_relation_module = import_module("98__r__importInspectionMarkRelationRelation")
inspection_mark_to_literature_reference_relation_module = import_module("98__r__importInspectionMarkToLiteratureReferenceRelation")
literature_to_journal_relation_module = import_module("98__r__importLiteratureToJournalRelation")
literature_to_parent_publication_relation_module = import_module("98__r__importLiteratureToParentPublicationRelation")
mark_to_dating_relation_module = import_module("98__r__importMarkToDatingRelation")
mark_to_literature_relation_module = import_module("98__r__importMarkToLiteratureRelation")
mark_to_mark_information_relation_module = import_module("98__r__importMarkToMarkInformationRelation")
mark_to_source_relation_module = import_module("98__r__importMarkToSourceRelation")
source_to_date_relation_module = import_module("98__r__importSourceToDateRelation")
source_to_literature_reference_assignment_relation_module = import_module("98__r__importSourceToLiteratureReferenceAssignmentRelation")
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilders = ['default']
trials = 0
while trials < 3 :
trials += 1
try:
# Call the function from the imported module
material_module.importMaterialsAndTechnique(api, engine)
administrator_module.importAdministrator(api, engine)
administrator_status_module.importAdministratorStatus(api, engine)
source_module.importSource(api, engine)
artist_source_reference_assignment_module.importArtistSourceReferenceAssignment(api, engine)
marks_module.importMarks(api, engine)
source_reference_assignment_module.importSourceReferenceAssignment(api, engine)
artist_module.importArtist(api, engine)
literature_module.importLiterature(api, engine)
inspection_mark_module.importInspectionMark(api, engine)
journal_assignment_module.importJournalAssignment(api, engine)
literature_reference_assignment_module.importLiteratureReferenceAssignment(api, engine)
parent_literature_assignment_module.importParentLiteratureAssignment(api, engine)
inspection_mark_location_module.importInspectionMarkLocation(api, engine)
inspection_mark_relation_module.importInspectionMarkRelation(api, engine)
mark_dating_info_module.importMarkDatingInfo(api, engine)
birth_module.importBirth(api, engine)
death_module.importDeath(api, engine)
dating_module.importDating(api, engine)
goldsmith_relation_module.importGoldsmithRelation(api, engine)
client_module.importClient(api, engine)
mentioned_module.importMentioned(api, engine)
num_dating_module.importNumDating(api, engine)
origin_assignment_module.importOriginAssignment(api, engine)
workshops_module.importWorkshops(api, engine)
artifacts_module.importArtifacts(api, engine)
artifact_relation_module.importArtifactRelation(api, engine)
artist_assignment_module.importArtistAssignment(api, engine)
mark_information_module.importMarkInformation(api, engine)
photographer_module.importPhotographer(api, engine)
api.pathbuilders = ['relations']
artifact_to_artist_relation_module.importArtifactToArtistRelationRelation(api, engine)
artifact_to_client_assignment_relation_module.importArtifactToClientAssignmentRelation(api, engine)
artifact_to_inspection_mark_location_relation_module.importArtifactToInspectionMarkLocationRelation(api, engine)
artifact_to_literature_reference_assignment_relation_module.importArtifactToLiteratureReferenceAssignmentRelation(api, engine)
artifact_to_mark_information_assignment_relation_module.importArtifactToMarkInformationAssignmentRelation(api, engine)
artifact_to_material_relation_module.importArtifactToMaterialRelation(api, engine)
artifact_to_numerice_date_relation_module.importArtifactToNumericeDateRelation(api, engine)
artifact_to_photograph_relation_module.importArtifactToPhotographRelation(api, engine)
artifact_to_relation_relation_module.importArtifactToRelationRelation(api, engine)
artifact_to_source_relation_module.importArtifactToSourceRelation(api, engine)
artifact_to_status_administrator_relation_module.importArtifactToStatusAdministratorRelation(api, engine)
artist_to_birth_relation_module.importArtistToBirthRelation(api, engine)
artist_to_death_relation_module.importArtistToDeathRelation(api, engine)
artist_to_goldsmith_relation_module.importArtistToGoldsmithRelation(api, engine)
artist_to_literature_reference_relation_module.importArtistToLiteratureReferenceRelation(api, engine)
artist_to_mentioned_relation_module.importArtistToMentionedRelation(api, engine)
artist_to_origin_relation_module.importArtistToOriginRelation(api, engine)
artist_to_workshop_relation_module.importArtistToWorkshopRelation(api, engine)
inspection_mark_dating_information_assignment_relation_module.importInspectionMarkDatingInformationAssignmentRelation(api, engine)
inspection_mark_relation_relation_module.importInspectionMarkRelationRelation(api, engine)
inspection_mark_to_literature_reference_relation_module.importInspectionMarkToLiteratureReferenceRelation(api, engine)
literature_to_journal_relation_module.importLiteratureToJournalRelation(api, engine)
literature_to_parent_publication_relation_module.importLiteratureToParentPublicationRelation(api, engine)
mark_to_dating_relation_module.importMarkToDatingRelation(api, engine)
mark_to_literature_relation_module.importMarkToLiteratureRelation(api, engine)
mark_to_mark_information_relation_module.importMarkToMarkInformationRelation(api, engine)
mark_to_source_relation_module.importMarkToSourceRelation(api, engine)
source_to_date_relation_module.importSourceToDateRelation(api, engine)
source_to_literature_reference_assignment_relation_module.importSourceToLiteratureReferenceAssignmentRelation(api, engine)
except Exception as e:
print(f'Error: {e}')
print(f'Trial {trials} of 3 failed.')
print(f'Retrying in 10 seconds...')
sleep(10)
continue

View file

@ -1,81 +0,0 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilders = ['default']
try:
processedRows = pd.read_csv(f'./logs/processedMaterials.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Load materials table
materialsTable = pd.read_sql_table('c__5280_material', con=engine)
# Create materials
for index, row in materialsTable.iterrows():
# For every row in table...
if index < len(processedRows) and materialsTable.loc[index, 'id'] == processedRows.iloc[index, 'id']:
# skip if already processed
print(f'Skipping already processed material {materialsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
materialValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
case 'f__5280_material':
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
case 'f__5300_technik':
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
material = Entity(api=api, fields=materialValues, bundle_id='b45978f2b073ff3c73b3c7220ebb3b89')
api.save(material)
print(f'Created material {index}: {material.uri}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedMaterials.csv', index=False)
print('finish')

View file

@ -0,0 +1,79 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
def importMaterialsAndTechnique(api, engine):
print('Importing materials and technique...')
tableName = 'c__5280_material'
bundleId = 'b45978f2b073ff3c73b3c7220ebb3b89'
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Load materials table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Create materials
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed material {sqlTable.loc[index, "id"]}')
continue
# Create Entity property dicts
materialValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# If value is a list of comma-separated strings, split each item by ',' and flatten.
if isinstance(value, list):
new_value = []
for v in value:
if isinstance(v, str) and ',' in v:
new_value.extend([x.strip() for x in v.split(',') if x.strip()])
else:
new_value.append(v)
value = new_value
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
case 'f__5280_material':
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
case 'f__5300_technik':
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
material = Entity(api=api, fields=materialValues, bundle_id=bundleId)
api.save(material)
print(f'Created material {index}: {material.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')

View file

@ -5,105 +5,92 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importAdministrator(api, engine):
print('Importing administrators...')
tableName = 'c__vwr'
bundleId = 'b4e5a6a31ff575ab09b07b5f27d322ab'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilders = ['default']
# Load sources table
administratorsTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processedAdministrators.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['administratorId', 'uuid', 'uri'])
# Load sources table
administratorsTable = pd.read_sql_table('c__vwr', con=engine)
administratorValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
# Create administrators
for index, row in administratorsTable.iterrows():
# For every row in table...
if index < len(processedRows) and administratorsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed administrator {administratorsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
administratorValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create administrators
for index, row in administratorsTable.iterrows():
administratorValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
# For every row in table...
if index < len(processedRows) and administratorsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed administrator {administratorsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{' in str(value):
print('replaced curly braces')
value = str(value).replace('###{new_line', '')
value = str(value).replace('}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
administratorValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
administratorValues['f707e595ce7301d61c064e8e44c9c4f4'] = value # UUID
case 'f__vwra_vwr_adresse':
administratorValues['f303bbabf3d97536777b0f552d20bc7a'] = value # Address
case 'f__vwrn_vwr_dok_nr_':
administratorValues['f37e82c36b4fc6b275a1a86a389481e1'] = value # Administrator document number
case 'f__vwrb_verw_publ_bez':
administratorValues['ffc50ffbcc3f411ed63e3c6dfc6b4d80'] = value # Appellation in publication
case 'f__9990_kommentar':
administratorValues['fcf9600af8c3eff355eb42466e9aac39'] = value # Comment
case 'f__2900_verw_langbez_':
administratorValues['f78d3c9e6800adbb8a9af0867cbdf3c7'] = value # Long Appellation
case 'f__2864_ort':
administratorValues['fecf6c9d7cbae513923e411178516378'] = value # Place
case 'f__290a_verw_kurzbez_':
administratorValues['fddaae99f4c6a835d9f9f195523c85f7'] = value # Short appellation
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
administratorValues['f707e595ce7301d61c064e8e44c9c4f4'] = value # UUID
case 'f__vwra_vwr_adresse':
administratorValues['f303bbabf3d97536777b0f552d20bc7a'] = value # Address
case 'f__vwrn_vwr_dok_nr_':
administratorValues['f37e82c36b4fc6b275a1a86a389481e1'] = value # Administrator document number
case 'f__vwrb_verw_publ_bez':
administratorValues['ffc50ffbcc3f411ed63e3c6dfc6b4d80'] = value # Appellation in publication
case 'f__9990_kommentar':
administratorValues['fcf9600af8c3eff355eb42466e9aac39'] = value # Comment
case 'f__2900_verw_langbez_':
administratorValues['f78d3c9e6800adbb8a9af0867cbdf3c7'] = value # Long Appellation
case 'f__2864_ort':
administratorValues['fecf6c9d7cbae513923e411178516378'] = value # Place
case 'f__290a_verw_kurzbez_':
administratorValues['fddaae99f4c6a835d9f9f195523c85f7'] = value # Short appellation
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Set Digitisation Process
administratorValues['f3ec4640a87bd4534763af0fca050193'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Set Digitisation Process
administratorValues['f3ec4640a87bd4534763af0fca050193'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Material
administrator = Entity(api=api, fields=administratorValues, bundle_id='b4e5a6a31ff575ab09b07b5f27d322ab') # Administrator
api.save(administrator)
# Create Administrator
administrator = Entity(api=api, fields=administratorValues, bundle_id=bundleId) # Administrator
api.save(administrator)
print(f'Created administrator {index}: {administrator.uri}')
print(f'Created administrator {index}: {administrator.uri} of {len(administratorsTable)}')
# Write log
processedRows = processedRows._append({'administratorId': administratorValues['f37e82c36b4fc6b275a1a86a389481e1'][0], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedAdministrators.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing administrators')

View file

@ -5,84 +5,74 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importAdministratorStatus(api, engine):
print('Importing administrator statuses...')
tableName = 'c__ob28_status_verwalt_'
bundleId = 'b45447146729190da3a1d3e19165a6f8'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load sources table
administratorStatusTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processedAdministratorStatus.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Create administratorStatuss
for index, row in administratorStatusTable.iterrows():
administratorStatusValues = {}
# Load sources table
administratorStatusTable = pd.read_sql_table('c__ob28_status_verwalt_', con=engine)
administratorStatusValues = {}
# Create administratorStatuss
for index, row in administratorStatusTable.iterrows():
# For every row in table...
if index < len(processedRows) and administratorStatusTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed administratorStatus {administratorStatusTable.iloc[index, 0]}')
continue
# Create Entity property dicts
administratorStatusValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# For every row in table...
if index < len(processedRows) and administratorStatusTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed administratorStatus {administratorStatusTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
administratorStatusValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'] = value # UUID
case 'f__290a_verw_kurzbez_':
administratorStatusValues['f08562a866d00cd5245c380c20e4e7f9'] = value # Admistrator short appellation
case 'f__2950_invent_nr_':
administratorStatusValues['f92ac041f6098335bf4075942a771ee3'] = value # Inventary
case 'f__2952_alte_i_nr_':
administratorStatusValues['fdc070143457df491f18347ac97b0f24'] = value # Old Identifier
case 'f__2864_ort':
administratorStatusValues['f9bc3796ceff9a3581bd8047545628b9'] = value # Place
case 'f__ob28_status_verwalt_':
administratorStatusValues['ff0265deb26c28f139345b89577b2539'] = value # Status
case 'f__2996_gelt_dauer':
administratorStatusValues['f3363962b4eaa4d38358bc1d2bda1a7f'] = value # Time-Span
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'] = value # UUID
case 'f__290a_verw_kurzbez_':
administratorStatusValues['f08562a866d00cd5245c380c20e4e7f9'] = value # Admistrator short appellation
case 'f__2950_invent_nr_':
administratorStatusValues['f92ac041f6098335bf4075942a771ee3'] = value # Inventary
case 'f__2952_alte_i_nr_':
administratorStatusValues['fdc070143457df491f18347ac97b0f24'] = value # Old Identifier
case 'f__2864_ort':
administratorStatusValues['f9bc3796ceff9a3581bd8047545628b9'] = value # Place
case 'f__ob28_status_verwalt_':
administratorStatusValues['ff0265deb26c28f139345b89577b2539'] = value # Status
case 'f__2996_gelt_dauer':
administratorStatusValues['f3363962b4eaa4d38358bc1d2bda1a7f'] = value # Time-Span
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
administratorStatus = Entity(api=api, fields=administratorStatusValues, bundle_id='b45447146729190da3a1d3e19165a6f8')
api.save(administratorStatus)
# Create Material
administratorStatus = Entity(api=api, fields=administratorStatusValues, bundle_id='b45447146729190da3a1d3e19165a6f8')
api.save(administratorStatus)
print(f'Created administratorStatus {index}: {administratorStatus.uri} of {len(administratorStatusTable)}')
print(f'Created administratorStatus {index}: {administratorStatus.uri} of {len(administratorStatusTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'][0], 'uri': administratorStatus.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedAdministratorStatus.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'][0], 'uri': administratorStatus.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing administrator statuses')

View file

@ -5,124 +5,112 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importSource(api, engine):
print('Importing sources...')
tableName = 'c__que'
bundleId = 'b7dc57a93e008a58514b0d4ca26147b1'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id','sourceId', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load sources table
sourcesTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processedSources.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id','sourceId', 'uuid', 'uri'])
# Load sources table
sourcesTable = pd.read_sql_table('c__que', con=engine)
sourceValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
# Create sources
for index, row in sourcesTable.iterrows():
# For every row in table...
if index < processedRows['id'].max():
# skip if already processed
print(f'Skipping already processed source {row['id']}')
continue
# Create Entity property dicts
sourceValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create sources
for index, row in sourcesTable.iterrows():
sourceValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
# For every row in table...
if index < len(processedRows) and sourcesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f"Skipping already processed source {sourcesTable.loc[index, 'id']}")
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
sourceValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
sourceValues['f9f02815a5631a85948d4d258a455f49'] = value # UUID
case 'f__9990_kommentar':
sourceValues['f89a563b07f965ca2dcb0b1bd178e863'] = value # Comment
case 'f__8080_verfasser':
sourceValues['f2d2934a6c72b5552f01042338ff5d67'] = value # Creator
case 'f__80bs_que__beschr_':
sourceValues['fd2122de6bcd62c61fcb7a9223baa20f'] = value # Description
case 'f__80bw_que__bewertung':
sourceValues['f70a7818de6e31eacea22148c92737ac'] = value # Evalutation
case 'f__8182_transkr__extern':
sourceValues['f409a3ea352d6bc55c27f6a93d239191'] = value # External Transkript
case 'f__2950_invent_nr_':
sourceValues['f71605f258ceb37ee5fcf2cd7871de2c'] = value # Inventary number
case 'f__2900_verw_langbez_':
sourceValues['f19d275cd6f48ef64d104997ca99291d'] = value # Long appellation administrator
case 'f__8540_repro_nr_':
sourceValues['f881dd5566725dc26a8b25cfba181792'] = value # Reproduction Number
case 'f__290a_verw_kurzbez_':
sourceValues['f343d954f8d95f1da98201a7f29ac81f'] = value # Short appellation Administrator
case 'f__8130_que_kurzt_':
sourceValues['f3faea3691516939fc4b0c2149ee2e5b'] = value # Shorttitle
case 'f__8000_que_dok_nr_':
sourceValues['f50ad6021b42c094f7e551faec831802'] = value # Source Document Identifier
case 'f__8092_untertitel':
sourceValues['fb734bd50628353b7b5c0bfc88f2cbdc'] = value # Subtitle
case 'f__80fp_vorhanden_als':
sourceValues['fd7b99a3db6191382401d69710ac192f'] = value # There as
case 'f__8090_titel':
sourceValues['f399332f583d268f07200efd1e3bb3c5'] = value # Title
case 'f__8180_transkript_':
sourceValues['f6585008a698902f45dc2a79b9a3a9de'] = value # Transcript
case 'f__8060_art':
sourceValues['f38c664e4f9b2effc83ebc50e1244442'] = value # Type
case 'f__2990_verbleib':
sourceValues['fae3bc551d146652898782f712f95749'] = value # Whereabouts
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
sourceValues['f9f02815a5631a85948d4d258a455f49'] = value # UUID
case 'f__9990_kommentar':
sourceValues['f89a563b07f965ca2dcb0b1bd178e863'] = value # Comment
case 'f__8080_verfasser':
sourceValues['f2d2934a6c72b5552f01042338ff5d67'] = value # Creator
case 'f__80bs_que__beschr_':
sourceValues['fd2122de6bcd62c61fcb7a9223baa20f'] = value # Description
case 'f__80bw_que__bewertung':
sourceValues['f70a7818de6e31eacea22148c92737ac'] = value # Evalutation
case 'f__8182_transkr__extern':
sourceValues['f409a3ea352d6bc55c27f6a93d239191'] = value # External Transkript
case 'f__2950_invent_nr_':
sourceValues['f71605f258ceb37ee5fcf2cd7871de2c'] = value # Inventary number
case 'f__2900_verw_langbez_':
sourceValues['f19d275cd6f48ef64d104997ca99291d'] = value # Long appellation administrator
case 'f__8540_repro_nr_':
sourceValues['f881dd5566725dc26a8b25cfba181792'] = value # Reproduction Number
case 'f__290a_verw_kurzbez_':
sourceValues['f343d954f8d95f1da98201a7f29ac81f'] = value # Short appellation Administrator
case 'f__8130_que_kurzt_':
sourceValues['f3faea3691516939fc4b0c2149ee2e5b'] = value # Shorttitle
case 'f__8000_que_dok_nr_':
sourceValues['f50ad6021b42c094f7e551faec831802'] = value # Source Document Identifier
case 'f__8092_untertitel':
sourceValues['fb734bd50628353b7b5c0bfc88f2cbdc'] = value # Subtitle
case 'f__80fp_vorhanden_als':
sourceValues['fd7b99a3db6191382401d69710ac192f'] = value # There as
case 'f__8090_titel':
sourceValues['f399332f583d268f07200efd1e3bb3c5'] = value # Title
case 'f__8180_transkript_':
sourceValues['f6585008a698902f45dc2a79b9a3a9de'] = value # Transcript
case 'f__8060_art':
sourceValues['f38c664e4f9b2effc83ebc50e1244442'] = value # Type
case 'f__2990_verbleib':
sourceValues['fae3bc551d146652898782f712f95749'] = value # Whereabouts
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Set Digitisation Process
sourceValues['ffdf27e75013fa55d31f728ff5166f06'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Set Digitisation Process
sourceValues['ffdf27e75013fa55d31f728ff5166f06'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Material
source = Entity(api=api, fields=sourceValues, bundle_id='b7dc57a93e008a58514b0d4ca26147b1')
api.save(source)
# Create Material
source = Entity(api=api, fields=sourceValues, bundle_id=bundleId)
api.save(source)
print(f'Created source {index}: {source.uri}')
print(f'Created source {index}: {source.uri} of {len(sourcesTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'sourceId': sourceValues['f50ad6021b42c094f7e551faec831802'][0], 'uuid': sourceValues['f9f02815a5631a85948d4d258a455f49'][0], 'uri': source.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedSources.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'sourceId': sourceValues['f50ad6021b42c094f7e551faec831802'][0], 'uuid': sourceValues['f9f02815a5631a85948d4d258a455f49'][0], 'uri': source.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,83 +5,70 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistSourceReferenceAssignment(api, engine):
print('Importing artist source reference assignments...')
# Load the environment variables
load_dotenv()
tableName = "c__81kr_que_kt_kue"
bundleId = 'bf71940d0b18c20511e2141159afb9de' # Artist source reference assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__81kr_que_kt_kue"
bundleId = 'bf71940d0b18c20511e2141159afb9de' # Artist source reference assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
entityValues = {}
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
entityValues['fe3139ac03bd854ac9196fc240e7c68b'] = value # UUID
fUuid = value[0]
case 'f__8134_stelle':
entityValues['f58c13c5502baef24ede2a8a977ae6c6'] = value # Source reference
case 'f__81kr_que_kt_kue':
entityValues['f14d2d19f879d7398a384bdc132921a3'] = value # Source short title
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
continue
case 'f__uuid':
entityValues['fe3139ac03bd854ac9196fc240e7c68b'] = value # UUID
fUuid = value[0]
case 'f__8134_stelle':
entityValues['f58c13c5502baef24ede2a8a977ae6c6'] = value # Source reference
case 'f__81kr_que_kt_kue':
entityValues['f14d2d19f879d7398a384bdc132921a3'] = value # Source short title
case _:
print(f'{key} is not a valid field, skipping.')
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,268 +5,255 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarks(api, engine):
print('Importing marks...')
# Load the environment variables
load_dotenv()
tableName = 'c__mar'
bundleId = 'b2c4e1c984d7758d7c7ec719110f7125'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'markId', 'uuid', 'uri'])
# Simple log
# Load mark table
sqlTable = pd.read_sql_table(tableName, con=engine)
print(f'Processing {len(sqlTable)} marks...')
try:
processedRows = pd.read_csv(f'./logs/processedMarks.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'markId', 'uuid', 'uri'])
# Load mark table
markTable = pd.read_sql_table('c__mar', con=engine)
print(f'Processing {len(markTable)} marks...')
# Create mark
for index, row in markTable.iterrows():
# For every row in table...
if index < processedRows['id'].max():
# skip if already processed
print(f'Skipping already processed mark {row['id']}')
continue
# Create Entity property dicts
markValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
dimensionValues = {}
featureValues = {}
featureDimensionValues = {}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create mark
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed mark {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification
# for nested semantics, because we need to be efficient.
match key:
case 'id':
# Create Entity property dicts
markValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
dimensionValues = {}
featureValues = {}
featureDimensionValues = {}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
markValues['fb40b199b4032e55acc152f994e93b45'] = value # UUID
case 'f__3002_pub_kue_nr_':
markValues['f6f0572ebec9c98e164d0e9aa0650c2e'] = value # Artist Number
case 'f__6700_mar_dok_nr_':
markValues['fe577970c02f173170ff3848a36b3b79'] = value # Mark Document Number
case 'f__6770_rosenb_nr_':
markValues['f6fc4b5726c97bad8b03ede860491649'] = value # Rosenberg Number
case 'f__9990_kommentar':
markValues['f01e527e707ff36bf966baa01c163378'] = value # Comment
case 'f__68an_abdruck_nr_':
markValues['f8324ea3c9ee378f1e19035e092aadb9'] = value # Print Number
case 'f__68nk_besonderheiten':
markValues['fa21e323a8a7a99ce3489e1f7753ac5f'] = value # Special Features
case 'f__8470_aufnahmenr_':
markValues['f67031e2a2b81ad9f318dc5b11d5a6af'] = value # Recording number
case 'f__684b_breite_marke':
# We map dimensions to Dimension entity.
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification
# for nested semantics, because we need to be efficient.
match key:
case 'id':
continue
case 'f__uuid':
markValues['fb40b199b4032e55acc152f994e93b45'] = value # UUID
case 'f__3002_pub_kue_nr_':
markValues['f6f0572ebec9c98e164d0e9aa0650c2e'] = value # Artist Number
case 'f__6700_mar_dok_nr_':
markValues['fe577970c02f173170ff3848a36b3b79'] = value # Mark Document Number
case 'f__6770_rosenb_nr_':
markValues['f6fc4b5726c97bad8b03ede860491649'] = value # Rosenberg Number
case 'f__9990_kommentar':
markValues['f01e527e707ff36bf966baa01c163378'] = value # Comment
case 'f__68an_abdruck_nr_':
markValues['f8324ea3c9ee378f1e19035e092aadb9'] = value # Print Number
case 'f__68nk_besonderheiten':
markValues['fa21e323a8a7a99ce3489e1f7753ac5f'] = value # Special Features
case 'f__8470_aufnahmenr_':
markValues['f67031e2a2b81ad9f318dc5b11d5a6af'] = value # Recording number
case 'f__684b_breite_marke':
# We map dimensions to Dimension entity.
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__684h_hoehe_marke':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['hight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68na_bz_breite_hoehe':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width_x_hight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__6840_rahmenform':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['frame_shape'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__684d_darst__marke':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['design'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__684l_text_marke':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['text'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nb_randanschluss':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['edge_connection'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nc_form_haste':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['haste_mould'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nd_form_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['slash_form_shape'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ne_haste_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nf_n_knick':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ng_ueberg__serifen':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
'transition_serif_haste'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nh_dicke_ser__max_':
# We map (features) dimensions to Dimension entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68ni_dicke_ser__min':
# We map features to Feature entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
'minimum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68nj_breite_serife':
# We map features to Feature entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
'width'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Goldschmiede/', 'goldsmiths/')
item = item.replace('Goldschmiede\\', 'goldsmiths/')
item = item.replace('Epitaphien/', 'epitaphies/')
item = item.replace('Epitaphien\\', 'epitaphies/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
markValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
case 'f__684h_hoehe_marke':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['hight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68na_bz_breite_hoehe':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width_x_hight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__6840_rahmenform':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['frame_shape'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__684d_darst__marke':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['design'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__684l_text_marke':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['text'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nb_randanschluss':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['edge_connection'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nc_form_haste':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['haste_mould'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nd_form_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['slash_form_shape'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ne_haste_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nf_n_knick':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ng_ueberg__serifen':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
'transition_serif_haste'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nh_dicke_ser__max_':
# We map (features) dimensions to Dimension entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68ni_dicke_ser__min':
# We map features to Feature entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
'minimum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__68nj_breite_serife':
# We map features to Feature entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
'width'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Goldschmiede/', 'goldsmiths/')
item = item.replace('Goldschmiede\\', 'goldsmiths/')
item = item.replace('Epitaphien/', 'epitaphies/')
item = item.replace('Epitaphien\\', 'epitaphies/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
markValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
dimension = []
for key, value in dimensionValues.items():
if value:
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
api.save(dimensionItem)
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
dimension = []
for key, value in dimensionValues.items():
if value:
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
api.save(dimensionItem)
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
# Create (feature) Dimension entities and add their UUIDs to a list
# because we link Feature and its Dimension over the UUID
featureDimension = []
for key, value in featureDimensionValues.items():
if value:
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
api.save(featureDimensionItem)
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
# Create (feature) Dimension entities and add their UUIDs to a list
# because we link Feature and its Dimension over the UUID
featureDimension = []
for key, value in featureDimensionValues.items():
if value:
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
api.save(featureDimensionItem)
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
# Add the serif feature t the feature list
if featureDimension:
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
# Add the serif feature t the feature list
if featureDimension:
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
feature = []
for key, value in featureValues.items():
if value:
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
api.save(featureItem)
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
feature = []
for key, value in featureValues.items():
if value:
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
api.save(featureItem)
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Digitisation Process
if digitisationProcessValues:
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
if digitisationProcessValues:
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Add the field values for reference
if dimension:
markValues['f05807c9d81cd39b814f83de0175d66a'] = dimension # Dimension
if feature:
markValues['f3ce49288bc03e9d799f20ea277429db'] = feature # Feature
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
markValues['f73e27498813a922032b18b3f3ab8d10'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
markValues['f3baf98f752fc9638de175985183119a'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Add the field values for reference
if dimension:
markValues['f05807c9d81cd39b814f83de0175d66a'] = dimension # Dimension
if feature:
markValues['f3ce49288bc03e9d799f20ea277429db'] = feature # Feature
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
markValues['f73e27498813a922032b18b3f3ab8d10'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
markValues['f3baf98f752fc9638de175985183119a'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Mark
mark = Entity(api=api, fields=markValues, bundle_id='b2c4e1c984d7758d7c7ec719110f7125')
api.save(mark)
# Create Mark
mark = Entity(api=api, fields=markValues, bundle_id=bundleId)
api.save(mark)
print(f'Created mark number {index}: {mark.uri} of {len(markTable)}')
print(f'Created mark number {index}: {mark.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'markId': markValues['fe577970c02f173170ff3848a36b3b79'][0], 'uuid': markValues['fb40b199b4032e55acc152f994e93b45'][0], 'uri': mark.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedMarks.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'markId': markValues['fe577970c02f173170ff3848a36b3b79'][0], 'uuid': markValues['fb40b199b4032e55acc152f994e93b45'][0], 'uri': mark.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing marks')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importSourceReferenceAssignment(api, engine):
print('Importing source reference assignments...')
# Load the environment variables
load_dotenv()
tableName = "c__8130_que_kurzt_"
bundleId = 'b3c4232e84c2f39795bd602f152ed6f0' # Source reference assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__8130_que_kurzt_"
bundleId = 'b3c4232e84c2f39795bd602f152ed6f0' # Source reference assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
entityValues['fbe74fcb0ab0ce5a0181467b9b07e12e'] = value # UUID
fUuid = value[0]
case 'f__8134_stelle':
entityValues['f769795b4fd628d01692dd4516322db4'] = value # Source reference
case 'f__8130_que_kurzt_':
entityValues['f3e841bf3b4e91716d1ff5b83bf293d9'] = value # Source short title
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
continue
case 'f__uuid':
entityValues['fbe74fcb0ab0ce5a0181467b9b07e12e'] = value # UUID
fUuid = value[0]
case 'f__8134_stelle':
entityValues['f769795b4fd628d01692dd4516322db4'] = value # Source reference
case 'f__8130_que_kurzt_':
entityValues['f3e841bf3b4e91716d1ff5b83bf293d9'] = value # Source short title
case _:
print(f'{key} is not a valid field, skipping.')
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
print(f'Created source reference assignment {index}: {entity.uri} of {len(tableName)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finished importing source reference assignments')

View file

@ -5,165 +5,154 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtist(api, engine):
print('Importing artists...')
# Load the environment variables
load_dotenv()
tableName = 'c__kue'
bundleId = 'bc322be33491dacc600dd43fdee09a5c'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
test = False
test = True
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processedArtists.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['artistId', 'uuid', 'uri'])
# Load sources table
artistsTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
artistsTable = pd.read_sql_table('c__kue', con=engine)
# Create artists
for index, row in artistsTable.iterrows():
artistValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
imageValues = {}
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
# Create artists
for index, row in artistsTable.iterrows():
# For every row in table...
if index < len(processedRows) and artistsTable.loc[index, 'f__3000_kue_dok_nr_'] == processedRows.loc[index, 'artistId']:
# skip if already processed
print(f'Skipping already processed artist {artistsTable.loc[index, "f__3000_kue_dok_nr_"]}')
continue
# Create Entity property dicts
artistValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# For every row in table...
if index < len(processedRows) and artistsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed artist {artistsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
artistValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
imageValues = {}
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
artistValues['fff2eb2283e4cd8df3783602a1bc96ab'] = value # UUID
case 'f__3170_and__taetigkeit':
artistValues['f01f51e385e5f206653e029ff5c845c4'] = value # Alternate occupation
case 'f__3000_kue_dok_nr_':
artistValues['f61deac361ac5e0731edbf214761d15c'] = value # Artist Document Number
case 'f__3002_pub_kue_nr_':
artistValues['f46b2ec14ce05d2618427c526198d64e'] = value # Artist published number
case 'f__9990_kommentar':
artistValues['fedc08e4225ac800e5d9f16bf345d181'] = value # Comment
case 'f__3360_letzte_erw_':
artistValues['f1419788b918f4c4a13393fd09ff37b3'] = value # Last Mentioned
case 'f__6700_mar_dok_nr_':
artistValues['f3d63eec34c00556cbadf635f78d815a'] = value # Mark Assignment
case 'f__33gs_meister_als':
artistValues['f30b60be791fb13f919c31510ca4de50'] = value # Master Education
case 'f__33mj_meisterjahr':
artistValues['fd2d07bb9ea1eadacdf28e41cacb92c1'] = value # Master Year
case 'f__3100_name':
artistValues['f71c047dad23083850a13d489386bf31'] = value # Name
case 'f__3105_abw_schreibw_':
artistValues['fbe84024bf9fad8f6a545b3af75d8b1b'] = value # Name Variants
case 'f__3166_fakt__taetig_als':
artistValues['fb0373e9fd949984cf9c09ec1ea0746c'] = value # Occupation
case 'f__336p_1__posth__erw_':
artistValues['fe079424bb6196d4a9721f84c43361f8'] = value # Posthumous Mentioned
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Goldschmiede/', 'goldsmiths/')
item = item.replace('Goldschmiede\\', 'goldsmiths/')
item = item.replace('Epitaphien/', 'epitaphies/')
item = item.replace('Epitaphien\\', 'epitaphies/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__6770_rosenb_nr_':
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
artistValues['fff2eb2283e4cd8df3783602a1bc96ab'] = value # UUID
case 'f__3170_and__taetigkeit':
artistValues['f01f51e385e5f206653e029ff5c845c4'] = value # Alternate occupation
case 'f__3000_kue_dok_nr_':
artistValues['f61deac361ac5e0731edbf214761d15c'] = value # Artist Document Number
case 'f__3002_pub_kue_nr_':
artistValues['f46b2ec14ce05d2618427c526198d64e'] = value # Artist published number
case 'f__9990_kommentar':
artistValues['fedc08e4225ac800e5d9f16bf345d181'] = value # Comment
case 'f__3360_letzte_erw_':
artistValues['f1419788b918f4c4a13393fd09ff37b3'] = value # Last Mentioned
case 'f__6700_mar_dok_nr_':
artistValues['f3d63eec34c00556cbadf635f78d815a'] = value # Mark Assignment
case 'f__33gs_meister_als':
artistValues['f30b60be791fb13f919c31510ca4de50'] = value # Master Education
case 'f__33mj_meisterjahr':
artistValues['fd2d07bb9ea1eadacdf28e41cacb92c1'] = value # Master Year
case 'f__3100_name':
artistValues['f71c047dad23083850a13d489386bf31'] = value # Name
case 'f__3105_abw_schreibw_':
artistValues['fbe84024bf9fad8f6a545b3af75d8b1b'] = value # Name Variants
case 'f__3166_fakt__taetig_als':
artistValues['fb0373e9fd949984cf9c09ec1ea0746c'] = value # Occupation
case 'f__336p_1__posth__erw_':
artistValues['fe079424bb6196d4a9721f84c43361f8'] = value # Posthumous Mentioned
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Goldschmiede/', 'goldsmiths/')
item = item.replace('Goldschmiede\\', 'goldsmiths/')
item = item.replace('Epitaphien/', 'epitaphies/')
item = item.replace('Epitaphien\\', 'epitaphies/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__6770_rosenb_nr_':
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0]) # add UUID to list
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artist and Image Assignment over the UUID
if imageList:
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
api.save(reproNumberAssignment)
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artist and Image Assignment over the UUID
if imageList:
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
api.save(reproNumberAssignment)
else:
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = []
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'][0]:
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44']:
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Material
artist = Entity(api=api, fields=artistValues, bundle_id='bc322be33491dacc600dd43fdee09a5c')
api.save(artist)
# Create Material
artist = Entity(api=api, fields=artistValues, bundle_id=bundleId)
api.save(artist)
print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}')
print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}')
# Write log
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
print('Testing mode activated. Exiting.')
exit()
if test:
print('Testing mode activated. Exiting.')
exit()
print('finish')
print('finished importing artists')

View file

@ -5,122 +5,108 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importLiterature(api, engine):
print('Importing literature...')
tableName = 'c__lit'
bundleId = 'bafe9c3d3b640d4d1a16b104f367ac91'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load sources table
literaturesTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processedLiteratures.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'literatureId', 'uuid', 'uri'])
# Load sources table
literaturesTable = pd.read_sql_table('c__lit', con=engine)
literatureValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
# Create literatures
for index, row in literaturesTable.iterrows():
# For every row in table...
if index < len(processedRows) and literaturesTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed literature {literaturesTable.iloc[index, 0]}')
continue
# Create Entity property dicts
literatureValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create literatures
for index, row in literaturesTable.iterrows():
# For every row in table...
if index < len(processedRows) and literaturesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed literature {literaturesTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
literatureValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
literatureValues['fd58e0884f7cf63f8436c2789fcd2745'] = value # UUID
case 'f__9990_kommentar':
literatureValues['f3208633f7767cc9f5e44e768818df20'] = value # Comment
case 'f__8270_verfasser':
literatureValues['f60a88060c75068b4bf2eefd5221793f'] = value # Creator
case 'f__8324_ersch_jahr':
literatureValues['fdae7bd743ae58bf623feca3a26bcf6c'] = value # Date
case 'f__8280_hrsg':
literatureValues['fd0bc706876adee304892f8f9e34567f'] = value # Editor
case 'f__8346_signatur':
literatureValues['fb434c214be21f7e82a851d6524c2850'] = value # Identifier
case 'f__9970_schlagwort':
literatureValues['f1a55055944adf5d4e866a1768633a7f'] = value # Keyword
case 'f__8200_lit_dok_nr_':
literatureValues['f3bdd54b9ea5808a571200e9c60e103e'] = value # Literature Document Identifier
case 'f__9971_sw_goldschmied':
literatureValues['f21a286fec5d48ea238c10877ee2b0db'] = value # Mentioned Actor
case 'f__8308_bibl_zusatz':
literatureValues['f1674a743a13a3d74b0c6ebb2cf0043f'] = value # Note
case 'f__8319_seitenangabe':
literatureValues['f0d1716a40498f52abd4a6522aa5f3ef'] = value # Pages
case 'f__8320_ersch_ort':
literatureValues['fc3cafc0f542cef2a0e1189873ff58a3'] = value # Publication Place
case 'f__8300_serientitel':
literatureValues['f660f34eb7091c1b0f4b492e49a0e71b'] = value # Series Title
case 'f__8330_lit_kurzt_':
literatureValues['f84416d4380cdd30e8b9fcea57f58957'] = value # Shorttitle
case 'f__8307_titelzusatz':
literatureValues['f8521679ac8f6441ddb086f1c5ed7528'] = value # Subtitle
case 'f__8290_titel':
literatureValues['fa1ae40cc9940569d5a1e3ea13e33488'] = value # Title
case 'f__8260_art':
literatureValues['f92c6453d265a952a56252e7d93cedea'] = value # Type
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
literatureValues['fd58e0884f7cf63f8436c2789fcd2745'] = value # UUID
case 'f__9990_kommentar':
literatureValues['f3208633f7767cc9f5e44e768818df20'] = value # Comment
case 'f__8270_verfasser':
literatureValues['f60a88060c75068b4bf2eefd5221793f'] = value # Creator
case 'f__8324_ersch_jahr':
literatureValues['fdae7bd743ae58bf623feca3a26bcf6c'] = value # Date
case 'f__8280_hrsg':
literatureValues['fd0bc706876adee304892f8f9e34567f'] = value # Editor
case 'f__8346_signatur':
literatureValues['fb434c214be21f7e82a851d6524c2850'] = value # Identifier
case 'f__9970_schlagwort':
literatureValues['f1a55055944adf5d4e866a1768633a7f'] = value # Keyword
case 'f__8200_lit_dok_nr_':
literatureValues['f3bdd54b9ea5808a571200e9c60e103e'] = value # Literature Document Identifier
case 'f__9971_sw_goldschmied':
literatureValues['f21a286fec5d48ea238c10877ee2b0db'] = value # Mentioned Actor
case 'f__8308_bibl_zusatz':
literatureValues['f1674a743a13a3d74b0c6ebb2cf0043f'] = value # Note
case 'f__8319_seitenangabe':
literatureValues['f0d1716a40498f52abd4a6522aa5f3ef'] = value # Pages
case 'f__8320_ersch_ort':
literatureValues['fc3cafc0f542cef2a0e1189873ff58a3'] = value # Publication Place
case 'f__8300_serientitel':
literatureValues['f660f34eb7091c1b0f4b492e49a0e71b'] = value # Series Title
case 'f__8330_lit_kurzt_':
literatureValues['f84416d4380cdd30e8b9fcea57f58957'] = value # Shorttitle
case 'f__8307_titelzusatz':
literatureValues['f8521679ac8f6441ddb086f1c5ed7528'] = value # Subtitle
case 'f__8290_titel':
literatureValues['fa1ae40cc9940569d5a1e3ea13e33488'] = value # Title
case 'f__8260_art':
literatureValues['f92c6453d265a952a56252e7d93cedea'] = value # Type
# Digitisation Process
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Set Digitisation Process
literatureValues['f59a2ad5cce3e51f172215ea88afac41'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Set Digitisation Process
literatureValues['f59a2ad5cce3e51f172215ea88afac41'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Material
literature = Entity(api=api, fields=literatureValues, bundle_id='bafe9c3d3b640d4d1a16b104f367ac91')
api.save(literature)
# Create Material
literature = Entity(api=api, fields=literatureValues, bundle_id='bafe9c3d3b640d4d1a16b104f367ac91')
api.save(literature)
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'literatureId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedLiteratures.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'docId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,197 +5,182 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMark(api, engine):
print('Importing inspection marks...')
tableName = 'c__bez'
bundleId = 'baad021dfda9b89d5ba407dd0fca0d03'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load inspectionMark table
inspectionMarkTable = pd.read_sql_table('c__bez', con=engine)
# Simple log
try:
processedRows = pd.read_csv(f'./logs/processedInspectionMarks.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'inspectionMarkId', 'uuid', 'uri'])
# Load inspectionMark table
inspectionMarkTable = pd.read_sql_table('c__bez', con=engine)
# Create inspectionMark
for index, row in inspectionMarkTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed inspectionMark {inspectionMarkTable.loc[index, "id"]}')
continue
# Create Entity property dicts
inspectionMarkValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
dimensionValues = {}
featureValues = {}
featureDimensionValues = {}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create inspectionMark
for index, row in inspectionMarkTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed inspectionMark {inspectionMarkTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{' in str(value):
print('replaced curly braces')
value = str(value).replace('###{new_line', '')
value = str(value).replace('}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification
# for nested semantics, because we need to be efficient.
match key:
case 'id':
# Create Entity property dicts
inspectionMarkValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
dimensionValues = {}
featureValues = {}
featureDimensionValues = {}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'] = value # UUID
case 'f__9990_kommentar':
inspectionMarkValues['f31eb01562daaeaa27d6c02012fccf02'] = value # Comment
case 'f__67bn_bz_kat_nr':
inspectionMarkValues['f275b0537ab47b15c24f31ad8a8aa226'] = value # Inspection Mark Cataloque Identifer
case 'f__67b0_bz_dok_nr':
inspectionMarkValues['f1cfc4053651e47d629bd5fc9fd707c1'] = value # Inspection Mark Identifier
case 'f__6700_mar_dok_nr_':
inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'] = value # Mark Document Identifier
case 'f__8470_aufnahmenr_':
inspectionMarkValues['f58febbb759a07a75edf9978771c1013'] = value # Recording Number
case 'f__68an_abdruck_nr_':
inspectionMarkValues['f7c155684a82af5caa3191f2646b51da'] = value # Reproduction Number
case 'f__68nk_besonderheiten':
inspectionMarkValues['fd980fca65d9ffd2f95859c4c5b9d284'] = value # Special Feature
case 'f__68ne_haste_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nf_n_knick':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ng_ueberg__serifen':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
'transition_serif_haste'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nh_dicke_ser__max_':
# We map (features) dimensions to Dimension entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
inspectionMarkValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification
# for nested semantics, because we need to be efficient.
match key:
case 'id':
continue
case 'f__uuid':
inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'] = value # UUID
case 'f__9990_kommentar':
inspectionMarkValues['f31eb01562daaeaa27d6c02012fccf02'] = value # Comment
case 'f__67bn_bz_kat_nr':
inspectionMarkValues['f275b0537ab47b15c24f31ad8a8aa226'] = value # Inspection Mark Cataloque Identifer
case 'f__67b0_bz_dok_nr':
inspectionMarkValues['f1cfc4053651e47d629bd5fc9fd707c1'] = value # Inspection Mark Identifier
case 'f__6700_mar_dok_nr_':
inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'] = value # Mark Document Identifier
case 'f__8470_aufnahmenr_':
inspectionMarkValues['f58febbb759a07a75edf9978771c1013'] = value # Recording Number
case 'f__68an_abdruck_nr_':
inspectionMarkValues['f7c155684a82af5caa3191f2646b51da'] = value # Reproduction Number
case 'f__68nk_besonderheiten':
inspectionMarkValues['fd980fca65d9ffd2f95859c4c5b9d284'] = value # Special Feature
case 'f__68ne_haste_schraegstr_':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nf_n_knick':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68ng_ueberg__serifen':
# We map features to Feature entity.
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
'transition_serif_haste'] # Type
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
case 'f__68nh_dicke_ser__max_':
# We map (features) dimensions to Dimension entity.
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte/', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('MArken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
inspectionMarkValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
# Create (feature) Dimension entities and add their UUIDs to a list
# because we link Feature and its Dimension over the UUID
featureDimension = []
for key, value in featureDimensionValues.items():
if value:
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
api.save(featureDimensionItem)
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
# Create (feature) Dimension entities and add their UUIDs to a list
# because we link Feature and its Dimension over the UUID
featureDimension = []
for key, value in featureDimensionValues.items():
if value:
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
api.save(featureDimensionItem)
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
# Add the serif feature t the feature list
if featureDimension:
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
# Add the serif feature t the feature list
if featureDimension:
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
feature = []
for key, value in featureValues.items():
if value:
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
api.save(featureItem)
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
# Create Dimension entities and add their UUIDs to a list
# because we link Mark and Dimension over the UUID
feature = []
for key, value in featureValues.items():
if value:
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
api.save(featureItem)
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
if value:
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Digitisation Process
if digitisationProcessValues:
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Create Digitisation Process
if digitisationProcessValues:
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Add the field values for reference
if feature:
inspectionMarkValues['f7eba97158ff1b9afc5fa0a5823145b4'] = feature # Feature UUID
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
inspectionMarkValues['fc697a5ad97f3277f20f67e18085b544'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
inspectionMarkValues['f998036ccd7daaf2d9938934c93938f3'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Add the field values for reference
if feature:
inspectionMarkValues['f7eba97158ff1b9afc5fa0a5823145b4'] = feature # Feature UUID
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
inspectionMarkValues['fc697a5ad97f3277f20f67e18085b544'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
inspectionMarkValues['f998036ccd7daaf2d9938934c93938f3'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Mark
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id='baad021dfda9b89d5ba407dd0fca0d03')
api.save(inspectionMark)
# Create Mark
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id=bundleId)
api.save(inspectionMark)
print(f'Created inspectionMark number {index}: {inspectionMark.uri} of {len(inspectionMarkTable)}')
print(f'Created inspectionMark number {index}: {inspectionMark.uri} of {len(inspectionMarkTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'inspectionMarkId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedInspectionMarks.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'docId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing inspection marks')

View file

@ -5,84 +5,70 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importJournalAssignment(api, engine):
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
test = False
tableName = "c__8310_zeitschrift"
bundleId = 'b5508ef3bb28f139ebdd9f6d545825c4'
test = False
tableName = "c__8310_zeitschrift"
bundleId = 'b5508ef3bb28f139ebdd9f6d545825c4'
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fadaaac928ec555c2574b3a9a4f5543d'] = value # UUID
fUuid = value[0]
case 'f__8310_zeitschrift':
entityValues['fd8fc741f6d4142637c061900b1cdd01'] = value # Client
case 'f__8312_zusatzzschr':
entityValues['f51edfb30c99d28bee1cf32b81190254'] = value # Date
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fadaaac928ec555c2574b3a9a4f5543d'] = value # UUID
fUuid = value[0]
case 'f__8310_zeitschrift':
entityValues['fd8fc741f6d4142637c061900b1cdd01'] = value # Client
case 'f__8312_zusatzzschr':
entityValues['f51edfb30c99d28bee1cf32b81190254'] = value # Date
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created journal assignment {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finish')

View file

@ -5,84 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importLiteratureReferenceAssignment(api, engine):
print('Importing literature reference assignments...')
# Load the environment variables
load_dotenv()
tableName = "c__8330_lit_kurzt_"
bundleId = 'bdda154adecb26deed2d8b67dab8a0db' # Literature Reference Assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__8330_lit_kurzt_"
bundleId = 'bdda154adecb26deed2d8b67dab8a0db' # Literature Reference Assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['facb3fc9d13472b00f59d506acece535'] = value # UUID
fUuid = value[0]
case 'f__8334_stelle':
entityValues['f099466b679af216600fdbfa722ddcb7'] = value # Literature Reference
case 'f__833r_repro_datei':
entityValues['fe145f4fec0a71a954bc3c75cf7b370a'] = value # Repro File
case 'f__8330_lit_kurzt_':
entityValues['ff2d656706c2ff11089f196ccab51843'] = value # Short Title
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['facb3fc9d13472b00f59d506acece535'] = value # UUID
fUuid = value[0]
case 'f__8334_stelle':
entityValues['f099466b679af216600fdbfa722ddcb7'] = value # Literature Reference
case 'f__833r_repro_datei':
entityValues['fe145f4fec0a71a954bc3c75cf7b370a'] = value # Repro File
case 'f__8330_lit_kurzt_':
entityValues['ff2d656706c2ff11089f196ccab51843'] = value # Short Title
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created literature reference assignment {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,84 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importParentLiteratureAssignment(api, engine):
# Load the environment variables
load_dotenv()
test = False
tableName = "c__8292_uebergeordn_publ_"
bundleId = 'bf55dda81ca0ddb4237a0d3ea495579b' # Parent literature assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
tableName = "c__8292_uebergeordn_publ_"
bundleId = 'bf55dda81ca0ddb4237a0d3ea495579b' # Parent literature assignment
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f8cced7d1c2f8d0d3fa9aa36b7e123bd'] = value # UUID
fUuid = value[0]
case 'f__8292_uebergeordn_publ_':
entityValues['f97ea22d9dd853c8f1cced6bc85c59b2'] = value # Parent literature
case 'f__8294_zusatzsatit':
entityValues['faf62c71a8e5844241899c0aa7801a9c'] = value # Subtitle
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f8cced7d1c2f8d0d3fa9aa36b7e123bd'] = value # UUID
fUuid = value[0]
case 'f__8292_uebergeordn_publ_':
entityValues['f97ea22d9dd853c8f1cced6bc85c59b2'] = value # Parent literature
case 'f__8294_zusatzsatit':
entityValues['faf62c71a8e5844241899c0aa7801a9c'] = value # Subtitle
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created parent literature assignment {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finish')

View file

@ -5,81 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMarkLocation(api, engine):
print('Importing inspection mark locations...')
# Load the environment variables
load_dotenv()
tableName = 'c__67b0_bz_dok_nr'
bundleId = 'b4158ec3a326d8ab504062296a82f13a'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processedInspectionMarkLocation.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Load sources table
inspectionMarkLocationsTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
inspectionMarkLocationsTable = pd.read_sql_table('c__67b0_bz_dok_nr', con=engine)
inspectionMarkLocationValues = {}
# Create inspectionMarkLocations
for index, row in inspectionMarkLocationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkLocationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
inspectionMarkLocationValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create inspectionMarkLocations
for index, row in inspectionMarkLocationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkLocationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{' in str(value):
print('replaced curly braces')
value = str(value).replace('###{new_line', '')
value = str(value).replace('}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
inspectionMarkLocationValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'] = value # UUID
case 'f__67b0_bz_dok_nr':
inspectionMarkLocationValues['f2d0b120ed40e17a5ad3f31d594d9b1c'] = value # Inspection Mark Identifier
case 'f__67b4_anbr_ort':
inspectionMarkLocationValues['f8a6343c2a8a5523eb2f0602f2baae04'] = value # Location
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'] = value # UUID
case 'f__67b0_bz_dok_nr':
inspectionMarkLocationValues['f2d0b120ed40e17a5ad3f31d594d9b1c'] = value # Inspection Mark Identifier
case 'f__67b4_anbr_ort':
inspectionMarkLocationValues['f8a6343c2a8a5523eb2f0602f2baae04'] = value # Location
case _:
print(f'{key} is not a valid field, skipping.')
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id='b4158ec3a326d8ab504062296a82f13a')
api.save(inspectionMarkLocation)
# Create Material
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id=bundleId)
api.save(inspectionMarkLocation)
print(f'Created inspectionMarkLocation {index}: {inspectionMarkLocation.uri}')
print(f'Created inspectionMarkLocation {index}: {inspectionMarkLocation.uri}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'][0], 'uri': inspectionMarkLocation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedInspectionMarkLocation.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'][0], 'uri': inspectionMarkLocation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing inspection mark locations')

View file

@ -5,80 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMarkRelation(api, engine):
print('Importing inspection mark relations...')
# Load the environment variables
load_dotenv()
tableName = 'c__67b7_beziehung'
bundleId = 'bd9b0ff8dc3a6d9284e1798531389bf1'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processedInspectionMarkRelation.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
# Load sources table
inspectionMarkRelationsTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
inspectionMarkRelationsTable = pd.read_sql_table('c__67b7_beziehung', con=engine)
inspectionMarkRelationValues = {}
# Create inspectionMarkRelations
for index, row in inspectionMarkRelationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkRelationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
inspectionMarkRelationValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create inspectionMarkRelations
for index, row in inspectionMarkRelationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and inspectionMarkRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
inspectionMarkRelationValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'] = value # UUID
case 'f__67b8_bez_bz_nr':
inspectionMarkRelationValues['ff3f6dd331ed27515f6721ac8312706c'] = value # Inspection Mark Identifier
case 'f__67b7_beziehung':
inspectionMarkRelationValues['f1cb8db7e1c26a5b5fe0c9d8fca60de2'] = value # Relation
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'] = value # UUID
case 'f__67b8_bez_bz_nr':
inspectionMarkRelationValues['ff3f6dd331ed27515f6721ac8312706c'] = value # Inspection Mark Identifier
case 'f__67b7_beziehung':
inspectionMarkRelationValues['f1cb8db7e1c26a5b5fe0c9d8fca60de2'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
inspectionMarkRelation = Entity(api=api, fields=inspectionMarkRelationValues, bundle_id='bd9b0ff8dc3a6d9284e1798531389bf1')
api.save(inspectionMarkRelation)
# Create Material
inspectionMarkRelation = Entity(api=api, fields=inspectionMarkRelationValues, bundle_id='bd9b0ff8dc3a6d9284e1798531389bf1')
api.save(inspectionMarkRelation)
print(f'Created inspectionMarkRelation {index}: {inspectionMarkRelation.uri}')
print(f'Created inspection mark relation {index}: {inspectionMarkRelation.uri}')
# Write log
processedRows = processedRows._append({'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedInspectionMarkRelation.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,80 +5,65 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarkDatingInfo(api, engine):
print('Importing mark dating info...')
tableName = 'c__68dm_datierung_marke'
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load sources table
datingInfosTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processedDatingInfo.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
# Load sources table
datingInfosTable = pd.read_sql_table('c__68dm_datierung_marke', con=engine)
datingInfoValues = {}
# Create datingInfos
for index, row in datingInfosTable.iterrows():
# For every row in table...
if index < len(processedRows) and datingInfosTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed datingInfo {datingInfosTable.iloc[index, 0]}')
continue
# Create Entity property dicts
datingInfoValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create datingInfos
for index, row in datingInfosTable.iterrows():
# For every row in table...
if index < len(processedRows) and datingInfosTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed datingInfo {datingInfosTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '###{{new_line}}###' in str(value):
print('replaced curly braces')
value = str(value).replace('###{{new_line}}###', '')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
# Create Entity property dicts
datingInfoValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
case 'f__uuid':
datingInfoValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
case 'f__68dm_datierung_marke':
datingInfoValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__68bm_bem_dat_marke':
datingInfoValues['fe7870b5a86040d81140bccb01697765'] = value # Note
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
datingInfoValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
case 'f__68dm_datierung_marke':
datingInfoValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__68bm_bem_dat_marke':
datingInfoValues['fe7870b5a86040d81140bccb01697765'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
datingInfo = Entity(api=api, fields=datingInfoValues, bundle_id='b9cfb95e627e1710cf8d736d4ca5db64') #Dating Information Assignment
api.save(datingInfo)
# Create Material
datingInfo = Entity(api=api, fields=datingInfoValues, bundle_id='b9cfb95e627e1710cf8d736d4ca5db64') #Dating Information Assignment
api.save(datingInfo)
print(f'Created datingInfo {index}: {datingInfo.uri} of {len(datingInfosTable)}')
print(f'Created mark dating info {index}: {datingInfo.uri} of {len(datingInfosTable)}')
# Write log
processedRows = processedRows._append({'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedDatingInfo.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing mark dating info')

View file

@ -1,97 +0,0 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
tableName = "c__6760_markenart"
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
continue
case 'f__uuid':
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
fUuid = value[0]
case 'f__5064_num__dat_':
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
case 'f__6894_anbr_ort':
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
case 'f__6700_mar_dok_nr_':
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
case 'f__6760_markenart':
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
case 'f__684c_bedeutung_bz':
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
case 'f__684a_bedeutung_mz':
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
case 'f__6770_rosenb_nr_':
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
print('finish')

View file

@ -5,94 +5,80 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importBirth(api, engine):
print('Importing birth...')
# Load the environment variables
load_dotenv()
test = False
tableName = "c__3270_geb_datum"
bundleId = 'b54049ec931bffb62359b4bdb11435fc'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
tableName = "c__3270_geb_datum"
bundleId = 'b54049ec931bffb62359b4bdb11435fc'
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['ff2a4da76944f5aba7d625c169d9ff66'] = value # UUID
fUuid = value[0]
case 'f__3290_geb_ort':
entityValues['fe71d86a78289c0b54242f5a3b67f81f'] = value # Birth place
case 'f__3270_geb_datum':
entityValues['ff3a9f042976963ac356db02d764b002'] = value # Date
case 'f__32ls_lit__stelle':
entityValues['fa03638df8a53e9aae38471fe10f409a'] = value # Literature Reference
case 'f__32lt_lit__kurztitel':
entityValues['f1af25f1770bd0db1982780697600cf4'] = value # Literature short title
case 'f__32bm_bem_geburt':
entityValues['f572f5e0f02f1c9b7c3ece5ffcf86c43'] = value # Note
case 'f__32qs_quelle_stelle':
entityValues['f1ebceaa76bac9ebf266733f64caa37c'] = value # Source reference
case 'f__32qt_quelle_kurztitel':
entityValues['f1a3597a874b3df9c1d87c5a32b487b0'] = value # Source short title
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['ff2a4da76944f5aba7d625c169d9ff66'] = value # UUID
fUuid = value[0]
case 'f__3290_geb_ort':
entityValues['fe71d86a78289c0b54242f5a3b67f81f'] = value # Birth place
case 'f__3270_geb_datum':
entityValues['ff3a9f042976963ac356db02d764b002'] = value # Date
case 'f__32ls_lit__stelle':
entityValues['fa03638df8a53e9aae38471fe10f409a'] = value # Literature Reference
case 'f__32lt_lit__kurztitel':
entityValues['f1af25f1770bd0db1982780697600cf4'] = value # Literature short title
case 'f__32bm_bem_geburt':
entityValues['f572f5e0f02f1c9b7c3ece5ffcf86c43'] = value # Note
case 'f__32qs_quelle_stelle':
entityValues['f1ebceaa76bac9ebf266733f64caa37c'] = value # Source reference
case 'f__32qt_quelle_kurztitel':
entityValues['f1a3597a874b3df9c1d87c5a32b487b0'] = value # Source short title
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created birth {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finished importing birth')

View file

@ -5,98 +5,84 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importDeath(api, engine):
print('Importing death...')
# Load the environment variables
load_dotenv()
test = False
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
tableName = "c__3330_todes_dat_"
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
test = True
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
tableName = "c__3330_todes_dat_"
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
fUuid = value[0]
case 'f__3330_todes_dat_':
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
case 'f__33ls_lit__stelle':
entityValues['fb4f168aa6a73169ef0350408a6260cc'] = value # Literature Reference
case 'f__33lt_lit__kurztitel':
entityValues['fd4ed8828d72a575f8609ba2c442b4b2'] = value # Literature short title
case 'f__33bm_bem_tod':
entityValues['f3028661430081ae44aa950abe0afbac'] = value # Note
case 'f__3350_tod_ort':
entityValues['fd80c2c8ba4c64c01e9c46ac7ae00d93'] = value # Place
case 'f__33qs_quelle_stelle':
entityValues['fd98cf7fbc0de4529e2a2d5e0b0c28bf'] = value # Source reference
case 'f__33qt_quelle_kurztitel':
entityValues['f973818e6c3d36ddd44ba3a713e308e6'] = value # Source short title
case 'f__710t_art_ereignis':
entityValues['fc039c43502b3525a92a8330d91f7944'] = value # Event type
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
fUuid = value[0]
case 'f__3330_todes_dat_':
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
case 'f__33ls_lit__stelle':
entityValues['fb4f168aa6a73169ef0350408a6260cc'] = value # Literature Reference
case 'f__33lt_lit__kurztitel':
entityValues['fd4ed8828d72a575f8609ba2c442b4b2'] = value # Literature short title
case 'f__33bm_bem_tod':
entityValues['f3028661430081ae44aa950abe0afbac'] = value # Note
case 'f__3350_tod_ort':
entityValues['fd80c2c8ba4c64c01e9c46ac7ae00d93'] = value # Place
case 'f__33qs_quelle_stelle':
entityValues['fd98cf7fbc0de4529e2a2d5e0b0c28bf'] = value # Source reference
case 'f__33qt_quelle_kurztitel':
entityValues['f973818e6c3d36ddd44ba3a713e308e6'] = value # Source short title
case 'f__710t_art_ereignis':
entityValues['fc039c43502b3525a92a8330d91f7944'] = value # Event type
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created death {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
break
if test:
break
print('finish')
print('finish')

View file

@ -5,82 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importDating(api, engine):
print('Importing dating...')
# Load the environment variables
load_dotenv()
tableName = "c__8100_datum"
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__8100_datum"
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
uuid = value[0]
case 'f__8100_datum':
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__81bm_bem__datierung':
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
fUuid = value[0]
case 'f__8100_datum':
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__81bm_bem__datierung':
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created dating {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,89 +5,75 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importGoldsmithRelation(api, engine):
print('Importing goldsmith relation...')
# Load the environment variables
load_dotenv()
test = False
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
tableName = "c__3007_bezieh__zu_gs"
bundleId = 'bef43e8a958e6a9bee04534b3841f6a0'
test = False
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
tableName = "c__3007_bezieh__zu_gs"
bundleId = 'bef43e8a958e6a9bee04534b3841f6a0'
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f588ff2629e3758ae18ec28c02270d27'] = value # UUID
fUuid = value[0]
case 'f__3011_verw__art':
entityValues['f2de276528d6b020306b8c7784008e5c'] = value # Actor relation type
case 'f__3010_name_gs':
entityValues['fc16719402aff4a1afec3387bf2bbc34'] = value # Goldsmith
case 'f__30bm_bem_beziehung':
entityValues['f7de6b267146070fa38ea5dc45150fa4'] = value # Note
case 'f__3007_bezieh__zu_gs':
entityValues['f8a46491ebad0ba670384a049402d697'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f588ff2629e3758ae18ec28c02270d27'] = value # UUID
fUuid = value[0]
case 'f__3011_verw__art':
entityValues['f2de276528d6b020306b8c7784008e5c'] = value # Actor relation type
case 'f__3010_name_gs':
entityValues['fc16719402aff4a1afec3387bf2bbc34'] = value # Goldsmith
case 'f__30bm_bem_beziehung':
entityValues['f7de6b267146070fa38ea5dc45150fa4'] = value # Note
case 'f__3007_bezieh__zu_gs':
entityValues['f8a46491ebad0ba670384a049402d697'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created goldsmith relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finished importing goldsmith relation')

View file

@ -5,86 +5,72 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
def importClient(api, engine):
print('Importing client...')
tableName = "c__410a_auftraggeber"
bundleId = 'b85d9987d762fb4e8ce89a69b0b8de31'
tableName = "c__410a_auftraggeber"
bundleId = 'b85d9987d762fb4e8ce89a69b0b8de31'
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fe0c458dfe9c0657fd02f312c2154d62'] = value # UUID
fUuid = value[0]
case 'f__410a_auftraggeber':
entityValues['f5ab8fb89d793bd5d27740c2b26bf672'] = value # Client
case 'f__41bm_bem__auftragg_':
entityValues['f0f33e0d5b40933d83260da3876a6cd3'] = value # Note
case 'f__41aa_anlass_auftrag':
entityValues['f88f0dbbcaff35acc80f1e6be571bd9e'] = value # Reason
case 'f__41as_stand_auftragg_':
entityValues['f9d4601e72d705c12fd7f09560e90d37'] = value # Status
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fe0c458dfe9c0657fd02f312c2154d62'] = value # UUID
fUuid = value[0]
case 'f__410a_auftraggeber':
entityValues['f5ab8fb89d793bd5d27740c2b26bf672'] = value # Client
case 'f__41bm_bem__auftragg_':
entityValues['f0f33e0d5b40933d83260da3876a6cd3'] = value # Note
case 'f__41aa_anlass_auftrag':
entityValues['f88f0dbbcaff35acc80f1e6be571bd9e'] = value # Reason
case 'f__41as_stand_auftragg_':
entityValues['f9d4601e72d705c12fd7f09560e90d37'] = value # Status
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
print(f'Created client {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,100 +5,86 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMentioned(api, engine):
print('Importing mentioned...')
# Load the environment variables
load_dotenv()
test = False
tableName = "c__7060_erwaehnt__datum_"
bundleId = 'b04b1756b09ba3260de278824332ad6c'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
tableName = "c__7060_erwaehnt__datum_"
bundleId = 'b04b1756b09ba3260de278824332ad6c'
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fac8bbc9701f5da711a6a49beca1b3e4'] = value # UUID
fUuid = value[0]
case 'f__410a_auftraggeber':
entityValues['f6b456466f45f72952a953bf169a47cc'] = value # Client
case 'f__7060_erwaehnt__datum_':
entityValues['ffdae7d7aeb84467faebf5468fb8b94f'] = value # Date
case 'f__7100_art_ereignis':
entityValues['fb462fbc544045fc244da8d490ed1cfc'] = value # Event type
case 'f__70ls_lit__stelle':
entityValues['f11f8bc3fdbedc686430ef57edfcf620'] = value # Literature Reference
case 'f__70lt_lit__kurztitel':
entityValues['f4ed2a340720f643bcc49ac9581b1181'] = value # Literature short title
case 'f__34ms_bei_meister_':
entityValues['f9d8ac79df3eb667db8fb8b23e52a816'] = value # Master
case 'f__70bm_bem_ereignis':
entityValues['f37dbed94d03576c91fff9c3c9026da5'] = value # Note
case 'f__70qs_quelle_stelle':
entityValues['ffc72e8058fd9efd4bb92270520942bd'] = value # Source reference
case 'f__70qt_quelle_kurztitel':
entityValues['f433afdf58621b6962dea8821cf21bb9'] = value # Source short title
case 'f__3420_taet_ort':
entityValues['f53e436b293c82f07fb17dd40c01f868'] = value # Workplace
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['fac8bbc9701f5da711a6a49beca1b3e4'] = value # UUID
fUuid = value[0]
case 'f__410a_auftraggeber':
entityValues['f6b456466f45f72952a953bf169a47cc'] = value # Client
case 'f__7060_erwaehnt__datum_':
entityValues['ffdae7d7aeb84467faebf5468fb8b94f'] = value # Date
case 'f__7100_art_ereignis':
entityValues['fb462fbc544045fc244da8d490ed1cfc'] = value # Event type
case 'f__70ls_lit__stelle':
entityValues['f11f8bc3fdbedc686430ef57edfcf620'] = value # Literature Reference
case 'f__70lt_lit__kurztitel':
entityValues['f4ed2a340720f643bcc49ac9581b1181'] = value # Literature short title
case 'f__34ms_bei_meister_':
entityValues['f9d8ac79df3eb667db8fb8b23e52a816'] = value # Master
case 'f__70bm_bem_ereignis':
entityValues['f37dbed94d03576c91fff9c3c9026da5'] = value # Note
case 'f__70qs_quelle_stelle':
entityValues['ffc72e8058fd9efd4bb92270520942bd'] = value # Source reference
case 'f__70qt_quelle_kurztitel':
entityValues['f433afdf58621b6962dea8821cf21bb9'] = value # Source short title
case 'f__3420_taet_ort':
entityValues['f53e436b293c82f07fb17dd40c01f868'] = value # Workplace
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
print(f'Created mentioned {index}: {entity.uri} of {len(tableName)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finish')

View file

@ -5,82 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importNumDating(api, engine):
print('Importing num dating...')
# Load the environment variables
load_dotenv()
tableName = "c__5064_num__dat_"
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64' # Dating Information Assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__5064_num__dat_"
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64' # Dating Information Assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.iloc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
uuid = value[0]
case 'f__5064_num__dat_':
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__50bm_bem__datierung':
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
fUuid = value[0]
case 'f__5064_num__dat_':
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
case 'f__50bm_bem__datierung':
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
print(f'Created num dating {index}: {entity.uri} of {len(tableName)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finish')

View file

@ -5,85 +5,70 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importOriginAssignment(api, engine):
print('Importing origin assignment...')
test = False
tableName = "c__3204_herkunft"
bundleId = 'b1d5be81f8b3dfbf9d6d90379cc0a14f'
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
test = False
tableName = "c__3204_herkunft"
bundleId = 'b1d5be81f8b3dfbf9d6d90379cc0a14f'
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f0d656adf9a5a9501e2f837af2e71dd6'] = value # UUID
fUuid = value[0]
case 'f__3hbm_bem_herkunft':
entityValues['f3755949b812523c5d2005ea831c122f'] = value # Note
case 'f__3204_herkunft':
entityValues['fecbc849373f6a48c23be62619da3b09'] = value # Place
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f0d656adf9a5a9501e2f837af2e71dd6'] = value # UUID
fUuid = value[0]
case 'f__3hbm_bem_herkunft':
entityValues['f3755949b812523c5d2005ea831c122f'] = value # Note
case 'f__3204_herkunft':
entityValues['fecbc849373f6a48c23be62619da3b09'] = value # Place
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created origin assignment {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
if test:
exit()
print('finish')
print('finished importing origin assignments')

View file

@ -5,82 +5,71 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importWorkshops(api, engine):
print('Importing workshops...')
# Load the environment variables
load_dotenv()
tableName = "c__nfws_forts_werkst_"
bundleId = 'beb03bccbdffdd31567df370303c1e2d'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processedWorkshops.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
test = False
# Load sources table
workshopsTable = pd.read_sql_table(tableName, con=engine)
test = False
# Load sources table
workshopsTable = pd.read_sql_table('c__nfws_forts_werkst_', con=engine)
workshopValues = {}
# Create workshops
for index, row in workshopsTable.iterrows():
# For every row in table...
if index < len(processedRows) and workshopsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed workshop {workshopsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
workshopValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create workshops
for index, row in workshopsTable.iterrows():
# For every row in table...
if index < len(processedRows) and workshopsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {workshopsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
docId = value[0]
case 'f__uuid':
workshopValues['fa7c19f4d03d7d15acf588460654bbf2'] = value # UUID
case 'f__nfws_forts_werkst_':
workshopValues['ff1aaeb118005d8506af6f56f7e424a4'] = value # Continued by
case 'f__nfbm_bem_forts_':
workshopValues['f71d24e2922d3151603ce144c0972f40'] = value # Note
case 'f__nfzr_zeitraumforts_':
workshopValues['f865ade60ba332a0a3ab4b77c39af7f4'] = value # Time-Span
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
workshopValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
docId = value[0]
case 'f__uuid':
workshopValues['fa7c19f4d03d7d15acf588460654bbf2'] = value # UUID
case 'f__nfws_forts_werkst_':
workshopValues['ff1aaeb118005d8506af6f56f7e424a4'] = value # Continued by
case 'f__nfbm_bem_forts_':
workshopValues['f71d24e2922d3151603ce144c0972f40'] = value # Note
case 'f__nfzr_zeitraumforts_':
workshopValues['f865ade60ba332a0a3ab4b77c39af7f4'] = value # Time-Span
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
workshop = Entity(api=api, fields=workshopValues, bundle_id='beb03bccbdffdd31567df370303c1e2d')
api.save(workshop)
# Create Material
workshop = Entity(api=api, fields=workshopValues, bundle_id=bundleId)
api.save(workshop)
print(f'Created workshop {index}: {workshop.uri} of {len(workshopsTable)}')
print(f'Created workshop {index}: {workshop.uri} of {len(workshopsTable)}')
# Write log
processedRows = processedRows._append({'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedWorkshops.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
if test:
exit()
print('finish')
print('finished importing workshops')

207
21_importArtifacts.py Normal file
View file

@ -0,0 +1,207 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
def importArtifacts(api, engine):
print('Importing artifacts...')
tableName = "c__obj"
bundleId = 'bd30c2c64a3caa8bb1628c780c3f24bb'
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
# Load artifacts table
artifactsTable = pd.read_sql_table(tableName, con=engine)
# Create artifacts
for index, row in artifactsTable.iterrows():
# For every row in table...
if index < len(processedRows) and artifactsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed artifact {artifactsTable.loc[index, "id"]}')
continue
# Create Entity property dicts
artifactValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
dimensionValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
case 'f__5000_obj_dok_nr_':
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
docId = value[0]
case 'f__500n_ngk_nr_':
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
case 'f__5130_entst_ort':
# We map productions place to Production Place Assignment entity.
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
case 'f__5200_obj_titel':
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
case 'f__5210_status':
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
case 'f__5220_gattung':
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
case 'f__5223_form__attribut':
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
case 'f__5226_art':
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
case 'f__523f_funktion':
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
case 'f__5240_formtyp':
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
case 'f__524g_gestalt':
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
case 'f__5362_hoehe':
# We map dimensions to Dimension entity.
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5364_breite':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5366_tiefe':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5368_laenge':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5370_durchmesser':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5380_gewicht':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__538h_hist__gewicht':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__55ng_darst__schlagw_':
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
case 'f__5bes_beschreibung':
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
case 'f__5ges_geschichte':
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
case 'f__68an_abdruck_nr_':
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte/', 'objects/')
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__stwv_statwerkverz':
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
case 'f__9990_kommentar':
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
# Create Production Place Assignment
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
# Create Dimension entities and add their UUIDs to a list
# because we link Artifact and Dimension over the UUID
dimension = []
for key, value in dimensionValues.items():
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
api.save(dimensionItem)
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Add the field values for reference
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Artifact
artifact = Entity(api=api, fields=artifactValues, bundle_id=bundleId)
api.save(artifact)
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finished importing artifacts')

View file

@ -1,213 +0,0 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/processedArtifacts.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['artifactId', 'uuid', 'uri'])
# Load artifacts table
artifactsTable = pd.read_sql_table('c__obj', con=engine)
# Create artifacts
for index, row in artifactsTable.iterrows():
# For every row in table...
if index < len(processedRows) and artifactsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
# skip if already processed
print(f'Skipping already processed artifact {artifactsTable.iloc[index, 0]}')
continue
# Create Entity property dicts
artifactValues = {}
creationValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
dimensionValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
continue
case 'f__uuid':
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
case 'f__5000_obj_dok_nr_':
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
docId = value[0]
case 'f__500n_ngk_nr_':
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
case 'f__5130_entst_ort':
# We map productions place to Production Place Assignment entity.
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
case 'f__5200_obj_titel':
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
case 'f__5210_status':
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
case 'f__5220_gattung':
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
case 'f__5223_form__attribut':
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
case 'f__5226_art':
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
case 'f__523f_funktion':
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
case 'f__5240_formtyp':
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
case 'f__524g_gestalt':
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
case 'f__5362_hoehe':
# We map dimensions to Dimension entity.
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5364_breite':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5366_tiefe':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5368_laenge':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5370_durchmesser':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__5380_gewicht':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__538h_hist__gewicht':
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
case 'f__55ng_darst__schlagw_':
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
case 'f__5bes_beschreibung':
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
case 'f__5ges_geschichte':
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
case 'f__68an_abdruck_nr_':
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
case 'f__8540_repro_nr_':
# We map images to Image entity
for item in value:
if item is not None:
# Replace dir paths in name
item = item.replace('Objekte/', 'objects/')
item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/')
item = item.replace('Objekte5\\', 'objects/')
item = item.replace('objekte5\\', 'objects/')
item = item.replace('Marken\\', 'marks/')
item = item.replace('Marken/', 'marks/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__stwv_statwerkverz':
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
case 'f__9990_kommentar':
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
case 'f__9900_datum_erfassung':
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
case 'f__99ae_datum_aenderung':
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
case 'f__efbm_bem_erfassung':
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
case 'f__ptxt_plug_in_text':
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
case _:
print(f'{key} is not a valid field, skipping.')
# Create Production Place Assignment
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
# Create Dimension entities and add their UUIDs to a list
# because we link Artifact and Dimension over the UUID
dimension = []
for key, value in dimensionValues.items():
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
api.save(dimensionItem)
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
# Create Image entities and add their UUIDs to a list
# because we link Image Assignment and Image over the UUID
imageList = []
for key, value in imageValues.items():
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
api.save(imageItem)
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
# Create Image Assignment entities and add their UUIDs to a list
# because we link Artifact and Image Assignment over the UUID
if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
api.save(imageAssignment)
# Create Digitisation Process
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
api.save(digitisationProcess)
# Add the field values for reference
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
# Create Artifact
artifact = Entity(api=api, fields=artifactValues, bundle_id='bd30c2c64a3caa8bb1628c780c3f24bb')
api.save(artifact)
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
# Write log
processedRows = processedRows._append({'artifactId': artifactValues['f7e2a8a273ab3d577bf5854902550c09'][0], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedArtifacts.csv', index=False)
print('finish')

View file

@ -5,84 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactRelation(api, engine):
print('Importing artifact relation...')
# Load the environment variables
load_dotenv()
tableName = "c__5007_beziehung"
bundleId = 'bf4a13ee46de57819f88834caaddc301' # Artifact relation assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "c__5007_beziehung"
bundleId = 'bf4a13ee46de57819f88834caaddc301' # Artifact relation assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.ioc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed artifact relation {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['ff7ebd530eb53efc489e80d9bbef293e'] = value # UUID
uuid = value[0]
case 'f__5008_bez_obj_nr_':
entityValues['f39d0e5207a375070d84b958017a62e8'] = value # Artifact Document Identifier
case 'f__bebm_bem_beziehung':
entityValues['f9cc743b648716684ccc3a7b9710d0ed'] = value # Note
case 'f__5007_beziehung':
entityValues['f4d3047b3b54285aa5a86183aedb1680'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['ff7ebd530eb53efc489e80d9bbef293e'] = value # UUID
fUuid = value[0]
case 'f__5008_bez_obj_nr_':
entityValues['f39d0e5207a375070d84b958017a62e8'] = value # Artifact Document Identifier
case 'f__bebm_bem_beziehung':
entityValues['f9cc743b648716684ccc3a7b9710d0ed'] = value # Note
case 'f__5007_beziehung':
entityValues['f4d3047b3b54285aa5a86183aedb1680'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created artifact relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact relation')

View file

@ -5,79 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistAssignment(api, engine):
print('Importing artist assignment...')
# Load the environment variables
load_dotenv()
tableName = "c__ob30_bez_kuenstler"
bundleId = 'bc8826cc7d9c9373ce71cfc0251c2a4f'
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
try:
processedRows = pd.read_csv(f'./logs/processedArtistAssignment.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
artistRelationsTable = pd.read_sql_table(tableName, con=engine)
# Load sources table
artistRelationsTable = pd.read_sql_table('c__ob30_bez_kuenstler', con=engine)
artistRelationValues = {}
# Create artistRelations
for index, row in artistRelationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed artistRelation {artistRelationsTable.loc[index, 'id']}')
continue
# Create Entity property dicts
for key, value in row.items():
print('value: ', value)
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create artistRelations
for index, row in artistRelationsTable.iterrows():
# For every row in table...
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed artistAssignment {artistRelationsTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
docId = value[0]
case 'f__uuid':
artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'] = value # UUID
case 'f__3100_name':
artistRelationValues['ff5bf58133f9351d03e2ee92b6f8bb7e'] = value # Artist Name
case 'f__3475_ber__funkt_':
artistRelationValues['fc0c7d8c6b736489210bc42ef0f1406a'] = value # Occupation
case 'f__ob30_bez_kuenstler':
artistRelationValues['f575d4f2c8ea5d37618cea708c2a7c5e'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
artistRelationValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
match key:
case 'id':
docId = value[0]
case 'f__uuid':
artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'] = value # UUID
case 'f__3100_name':
artistRelationValues['ff5bf58133f9351d03e2ee92b6f8bb7e'] = value # Artist Name
case 'f__3475_ber__funkt_':
artistRelationValues['fc0c7d8c6b736489210bc42ef0f1406a'] = value # Occupation
case 'f__ob30_bez_kuenstler':
artistRelationValues['f575d4f2c8ea5d37618cea708c2a7c5e'] = value # Relation
case _:
print(f'{key} is not a valid field, skipping.')
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id='bc8826cc7d9c9373ce71cfc0251c2a4f')
api.save(artistRelation)
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id=bundleId)
api.save(artistRelation)
print(f'Created artistRelation {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
print(f'Created artist assignment {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedArtistAssignment.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artist assignment')

View file

@ -0,0 +1,82 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
def importMarkInformation(api, engine):
print('Importing mark information...')
tableName = "c__6760_markenart"
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
continue
case 'f__uuid':
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
fUuid = value[0]
case 'f__5064_num__dat_':
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
case 'f__6894_anbr_ort':
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
case 'f__6700_mar_dok_nr_':
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
case 'f__6760_markenart':
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
case 'f__684c_bedeutung_bz':
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
case 'f__684a_bedeutung_mz':
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
case 'f__6770_rosenb_nr_':
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created mark information {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')

View file

@ -1,90 +0,0 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
# Load the environment variables
load_dotenv()
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default')
tableName = "c__8490_fotograf"
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
docUuid = value[0]
case 'f__8490_fotograf':
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
case 'f__8494_aufn_datum':
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
case 'f__8470_aufnahmenr_':
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
case 'f__849r_repro_datei': # Image Assignment
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
print('finish')

75
26_importPhotographer.py Normal file
View file

@ -0,0 +1,75 @@
import uuid # For UUID creation
from initDb import initDb # For database initialization
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
def importPhotographer(api, engine):
print('Importing photographer...')
tableName = "c__8490_fotograf"
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__uuid':
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
docUuid = value[0]
case 'f__8490_fotograf':
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
case 'f__8494_aufn_datum':
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
case 'f__8470_aufnahmenr_':
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
case 'f__849r_repro_datei': # Image Assignment
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created Photographer {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finished importing photographer')

View file

@ -5,78 +5,64 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToArtistRelationRelation(api, engine):
print('importing artifact to artist relation relation')
tableName = "r__obj__ob30_bez_kuenstler"
bundleId = 'b8b4e3b3fb7e3b83cec037aea51814bf' # Artifact to artist relation relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load the environment variables
load_dotenv()
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
entityValues = {}
tableName = "r__obj__ob30_bez_kuenstler"
bundleId = 'b8b4e3b3fb7e3b83cec037aea51814bf' # Artifact to artist relation relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f92631e8a40aae0aa8adbe84ab5dc97f'] = value # Artifact UUID
fUuid = value[0]
case 'f__ob30_bez_kuenstler__uuid':
entityValues['f07e9587430d70bc46926488129ba4a8'] = value # Artist Relation UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f92631e8a40aae0aa8adbe84ab5dc97f'] = value # Artifact UUID
fUuid = value[0]
case 'f__ob30_bez_kuenstler__uuid':
entityValues['f07e9587430d70bc46926488129ba4a8'] = value # Artist Relation UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created artifact to artist relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to artist relation relation')

View file

@ -5,80 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToClientAssignmentRelation(api, engine):
print('importing artifact to client assignment relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__410a_auftraggeber"
bundleId = 'b20d53dcc2bad79457251a581611b43f' # Artifact to client assignment relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__410a_auftraggeber"
bundleId = 'b20d53dcc2bad79457251a581611b43f' # Artifact to client assignment relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc369de9f2f7ac73585f7c967f415703'] = value # Artifact UUID
fUuid = value[0]
case 'f__410a_auftraggeber__uuid':
entityValues['fe65c6437d49877bad3de9ce31e19772'] = value # Client UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc369de9f2f7ac73585f7c967f415703'] = value # Artifact UUID
fUuid = value[0]
case 'f__410a_auftraggeber__uuid':
entityValues['fe65c6437d49877bad3de9ce31e19772'] = value # Client UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Client Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to client assignment relation')

View file

@ -5,79 +5,65 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToInspectionMarkLocationRelation(api, engine):
print('importing artifact to inspection mark location relation')
tableName = "r__obj__67b0_bz_dok_nr"
bundleId = 'b7fe64e0326c107a1a4a705be08392fa' # Artifact to inspection mark location relation
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
tableName = "r__obj__67b0_bz_dok_nr"
bundleId = 'b7fe64e0326c107a1a4a705be08392fa' # Artifact to inspection mark location relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f7ed714f705f51f4893427c7ba14dae8'] = value # Artifact UUID
fUuid = value[0]
case 'f__67b0_bz_dok_nr__uuid':
entityValues['f7a330c34474ecf06737a334dd754e8b'] = value # Inspection Mark location assignment
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f7ed714f705f51f4893427c7ba14dae8'] = value # Artifact UUID
fUuid = value[0]
case 'f__67b0_bz_dok_nr__uuid':
entityValues['f7a330c34474ecf06737a334dd754e8b'] = value # Inspection Mark location assignment
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Inspection Mark Location Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to inspection mark location relation')

View file

@ -5,79 +5,65 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToLiteratureReferenceAssignmentRelation(api, engine):
print('importing artifact to literature reference assignment relation')
tableName = "r__obj__8330_lit_kurzt_"
bundleId = 'b6a7b7aad942ecff4b3beadf907d51c8' # Artifact to literature relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load the environment variables
load_dotenv()
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
tableName = "r__obj__8330_lit_kurzt_"
bundleId = 'b6a7b7aad942ecff4b3beadf907d51c8' # Artifact to literature relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f6c41b894b0a00c2c28860f513c5bb77'] = value # Artifact UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f5284765cef8e6974676adcb59791960'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f6c41b894b0a00c2c28860f513c5bb77'] = value # Artifact UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f5284765cef8e6974676adcb59791960'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to literature reference assignment relation')

View file

@ -5,79 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToMarkInformationAssignmentRelation(api, engine):
print('importing artifact to mark information assignment relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__6760_markenart"
bundleId = 'b7112c2a7ea92a1d263d42d5572a05fc' # Artifact to mark information assignment relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__6760_markenart"
bundleId = 'b7112c2a7ea92a1d263d42d5572a05fc' # Artifact to mark information assignment relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fcf4cbb8b01e4a02ffd041ba4040f890'] = value # Artifact UUID
fUuid = value[0]
case 'f__6760_markenart__uuid':
entityValues['fb6de3d2433630fc205fe1ef7f24639f'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fcf4cbb8b01e4a02ffd041ba4040f890'] = value # Artifact UUID
fUuid = value[0]
case 'f__6760_markenart__uuid':
entityValues['fb6de3d2433630fc205fe1ef7f24639f'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Mark Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to mark information assignment relation')

View file

@ -5,79 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToMaterialRelation(api, engine):
print('importing artifact to material relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__5280_material"
bundleId = 'b825aff7df3d48bd875e2a081c796305' # Artifact to material relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__5280_material"
bundleId = 'b825aff7df3d48bd875e2a081c796305' # Artifact to material relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f9f07bf63ccafd4eb2c0de24c73e1664'] = value # Artifact UUID
fUuid = value[0]
case 'f__5280_material__uuid':
entityValues['f820534abde4c2a2d19e0d19f7793cf0'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f9f07bf63ccafd4eb2c0de24c73e1664'] = value # Artifact UUID
fUuid = value[0]
case 'f__5280_material__uuid':
entityValues['f820534abde4c2a2d19e0d19f7793cf0'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Material Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to material relation')

View file

@ -5,80 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToNumericeDateRelation(api, engine):
print('importing artifact to numeric date relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__5064_num__dat_"
bundleId = 'b795fcfa6c684fa707c236c4b0882ad7' # Artifact to numeric date relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__5064_num__dat_"
bundleId = 'b795fcfa6c684fa707c236c4b0882ad7' # Artifact to numeric date relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc69105d5a6931fc1d2b53cee7ef8b22'] = value # Artifact UUID
fUuid = value[0]
case 'f__5064_num__dat___uuid':
entityValues['fff143b7bfc1308cac53789304a1aff2'] = value # Numeric Date UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc69105d5a6931fc1d2b53cee7ef8b22'] = value # Artifact UUID
fUuid = value[0]
case 'f__5064_num__dat___uuid':
entityValues['fff143b7bfc1308cac53789304a1aff2'] = value # Numeric Date UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Numeric Date Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to numeric date relation')

View file

@ -5,79 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToPhotographRelation(api, engine):
print('importing artifact to photograph relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__8490_fotograf"
bundleId = 'b63cd713e60b6e5bc3b2235dffc0dba9' # Artifact to photograph relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__8490_fotograf"
bundleId = 'b63cd713e60b6e5bc3b2235dffc0dba9' # Artifact to photograph relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f88af5d8b4e289c0cde4df32f76a2804'] = value # Artifact UUID
fUuid = value[0]
case 'f__8490_fotograf__uuid':
entityValues['fe2f0af4ba38024fb0f796d4a98af511'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f88af5d8b4e289c0cde4df32f76a2804'] = value # Artifact UUID
fUuid = value[0]
case 'f__8490_fotograf__uuid':
entityValues['fe2f0af4ba38024fb0f796d4a98af511'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Photograph Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to photograph relation')

View file

@ -5,80 +5,67 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToRelationRelation(api, engine):
print('importing artifact to relation relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__5007_beziehung"
bundleId = 'bb878dd9c44c83a70fbd151f1dc06b4d' # Artifact to relation relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__5007_beziehung"
bundleId = 'bb878dd9c44c83a70fbd151f1dc06b4d' # Artifact to relation relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fe537502d55fd4a4482449a0174a3d98'] = value # Artifact UUID
fUuid = value[0]
case 'f__5007_beziehung__uuid':
entityValues['f82f33fa9640d894170c5221d02f583a'] = value # Relation UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fe537502d55fd4a4482449a0174a3d98'] = value # Artifact UUID
fUuid = value[0]
case 'f__5007_beziehung__uuid':
entityValues['f82f33fa9640d894170c5221d02f583a'] = value # Relation UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finish')
print('finished importing artifact to relation relation')

View file

@ -5,78 +5,65 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToSourceRelation(api, engine):
print('importing artifact to source relation')
# Load the environment variables
load_dotenv()
tableName = "r__obj__8130_que_kurzt_"
bundleId = 'bcf720dc0b796043915d6da536414451' # Artifact to source relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__obj__8130_que_kurzt_"
bundleId = 'bcf720dc0b796043915d6da536414451' # Artifact to source relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc8eb74a6ba0c51a82972ff19fec53e8'] = value # Artifact UUID
fUuid = value[0]
case 'f__8130_que_kurzt___uuid':
entityValues['fbfbf828330ed4ec85797ea274f73bb8'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['fc8eb74a6ba0c51a82972ff19fec53e8'] = value # Artifact UUID
fUuid = value[0]
case 'f__8130_que_kurzt___uuid':
entityValues['fbfbf828330ed4ec85797ea274f73bb8'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
print('finished importing artifact to source relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtifactToStatusAdministratorRelation(api, engine):
print('importing artifact to status administrator relation')
test = False
tableName = "r__obj__ob28_status_verwalt_"
bundleId = 'bd4922f100ab534fc1213f767770ed6d' # Artifact to status adminstrator relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load the environment variables
load_dotenv()
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
test = False
tableName = "r__obj__ob28_status_verwalt_"
bundleId = 'bd4922f100ab534fc1213f767770ed6d' # Artifact to status adminstrator relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f355304194b190e2fee22a99d54ebc92'] = value # Artifact UUID
fUuid = value[0]
case 'f__ob28_status_verwalt___uuid':
entityValues['fcc8a9758ce7a2659bfe96242ec4a15e'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__obj__uuid':
entityValues['f355304194b190e2fee22a99d54ebc92'] = value # Artifact UUID
fUuid = value[0]
case 'f__ob28_status_verwalt___uuid':
entityValues['fcc8a9758ce7a2659bfe96242ec4a15e'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artifact to Status Administrator Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
if test:
exit()
print('finish')
print('finished importing artifact to status administrator relation')

View file

@ -5,83 +5,70 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToBirthRelation(api, engine):
print('importing artist to birth relation')
# Load the environment variables
load_dotenv()
test = False
tableName = "r__kue__3270_geb_datum"
bundleId = 'b82e4404cdf641db57c03d7e3b23947c' # Artist to birth relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
test = False
tableName = "r__kue__3270_geb_datum"
bundleId = 'b82e4404cdf641db57c03d7e3b23947c' # Artist to birth relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f07d608ae6abf891e54c0f57b5f78507'] = value # Date
fUuid = value[0]
case 'f__3270_geb_datum__uuid':
entityValues['f70978f842342d920db490d420339dae'] = value # Dating
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f07d608ae6abf891e54c0f57b5f78507'] = value # Date
fUuid = value[0]
case 'f__3270_geb_datum__uuid':
entityValues['f70978f842342d920db490d420339dae'] = value # Dating
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Birth Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finish')
print('finished importing artist to birth relation')

View file

@ -5,81 +5,68 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToDeathRelation(api, engine):
print('importing artist to death relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__3330_todes_dat_"
bundleId = 'b91ed11c8063a363063582f001a3f5a2' # Artist to death relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
test = False
tableName = "r__kue__3330_todes_dat_"
bundleId = 'b91ed11c8063a363063582f001a3f5a2' # Artist to death relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f2b9ebb823502c1bba835d2f57102815'] = value # Artist UUID
fUuid = value[0]
case 'f__3330_todes_dat___uuid':
entityValues['f6286ce1789410919bd6fc3f1a7f2e05'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f2b9ebb823502c1bba835d2f57102815'] = value # Artist UUID
fUuid = value[0]
case 'f__3330_todes_dat___uuid':
entityValues['f6286ce1789410919bd6fc3f1a7f2e05'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Death Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to death relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToGoldsmithRelation(api, engine):
print('importing artist to goldsmith relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__3007_bezieh__zu_gs"
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__kue__3007_bezieh__zu_gs"
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f972dfd248e362846f4cb5cc946eefc2'] = value # Date
fUuid = value[0]
case 'f__3007_bezieh__zu_gs__uuid':
entityValues['f37c88dc7451b8d1b82f702ef64f8b05'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f972dfd248e362846f4cb5cc946eefc2'] = value # Date
fUuid = value[0]
case 'f__3007_bezieh__zu_gs__uuid':
entityValues['f37c88dc7451b8d1b82f702ef64f8b05'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Goldsmith Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to goldsmith relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToLiteratureReferenceRelation(api, engine):
print('importing artist to literature reference relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__8330_lit_kurzt_"
bundleId = 'b7a87e3f3d5f671c1f163101bff30eb6' # Artist to literature relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__kue__8330_lit_kurzt_"
bundleId = 'b7a87e3f3d5f671c1f163101bff30eb6' # Artist to literature relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f0b9b134818c592f93083d444817dffb'] = value # Date
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f70fb4157e3ef66e4d1ed78880f092b2'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f0b9b134818c592f93083d444817dffb'] = value # Date
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f70fb4157e3ef66e4d1ed78880f092b2'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to literature reference relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToMentionedRelation(api, engine):
print('importing artist to mentioned relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__7060_erwaehnt__datum_"
bundleId = 'bc2b0ddca583320a56a67b304dc0a045' # Artist to mentioned relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__kue__7060_erwaehnt__datum_"
bundleId = 'bc2b0ddca583320a56a67b304dc0a045' # Artist to mentioned relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f47b1ffe8394f389497b9e23407ad72f'] = value # Date
fUuid = value[0]
case 'f__7060_erwaehnt__datum___uuid':
entityValues['fabb90d487512fc5bf8d7379ff2d8bdb'] = value # Mentioned UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f47b1ffe8394f389497b9e23407ad72f'] = value # Date
fUuid = value[0]
case 'f__7060_erwaehnt__datum___uuid':
entityValues['fabb90d487512fc5bf8d7379ff2d8bdb'] = value # Mentioned UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Mentioned Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to mentioned relation')

View file

@ -5,81 +5,68 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToOriginRelation(api, engine):
print('importing artist to origin relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__3204_herkunft"
bundleId = 'b5cf6b3e6fd2e4b5575da4347999d6ea' # Artist to origin relation
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
test = False
tableName = "r__kue__3204_herkunft"
bundleId = 'b5cf6b3e6fd2e4b5575da4347999d6ea' # Artist to origin relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f40e702ecb7fe968c77c9f2ed0f1280c'] = value # Artist UUID
fUuid = value[0]
case 'f__3204_herkunft__uuid':
entityValues['f53bcd587a769e93ea54a34e6de4867d'] = value # Origin UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f40e702ecb7fe968c77c9f2ed0f1280c'] = value # Artist UUID
fUuid = value[0]
case 'f__3204_herkunft__uuid':
entityValues['f53bcd587a769e93ea54a34e6de4867d'] = value # Origin UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Origin Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to origin relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importArtistToWorkshopRelation(api, engine):
print('importing artist to workshop relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__kue__nfws_forts_werkst_"
bundleId = 'becb95326a733bdbd0c2dd3d36e3399d' # Artist to workshop relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__kue__nfws_forts_werkst_"
bundleId = 'becb95326a733bdbd0c2dd3d36e3399d' # Artist to workshop relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f1f107b495d9cf3f349932f2c6535505'] = value # Date
fUuid = value[0]
case 'f__nfws_forts_werkst___uuid':
entityValues['fc53912a0acb388e04eb6684eda209f1'] = value # Workshop UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__kue__uuid':
entityValues['f1f107b495d9cf3f349932f2c6535505'] = value # Date
fUuid = value[0]
case 'f__nfws_forts_werkst___uuid':
entityValues['fc53912a0acb388e04eb6684eda209f1'] = value # Workshop UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Artist to Workshop Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing artist to workshop relation')

View file

@ -5,82 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMarkDatingInformationAssignmentRelation(api, engine):
print('importing inspection mark dating information assignment relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__bez__68dm_datierung_marke"
bundleId = 'b1fee832598b2d42ed17a927dad43b90' # Inspection Mark to dating information assignment relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__bez__68dm_datierung_marke"
bundleId = 'b1fee832598b2d42ed17a927dad43b90' # Inspection Mark to dating information assignment relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__bez__uuid':
entityValues['fac07ebf9c19d09995cc13ae1ba6f362'] = value # Date
fUuid = value[0]
case 'f__68dm_datierung_marke__uuid':
entityValues['ffd43be34e81e0dbfc1b8cccc5f32056'] = value # Dating
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__bez__uuid':
entityValues['fac07ebf9c19d09995cc13ae1ba6f362'] = value # Date
fUuid = value[0]
case 'f__68dm_datierung_marke__uuid':
entityValues['ffd43be34e81e0dbfc1b8cccc5f32056'] = value # Dating
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Inspection Mark to Dating Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing inspection mark dating information assignment relation')

View file

@ -5,83 +5,70 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMarkRelationRelation(api, engine):
print('importing inspection mark relation relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__bez__67b7_beziehung"
bundleId = 'bc8dcd233a9b539db407bad219715988' # Inspection Mark Relation Relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__bez__67b7_beziehung"
bundleId = 'bc8dcd233a9b539db407bad219715988' # Inspection Mark Relation Relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__bez__uuid':
entityValues['fb9cc78d2351179c5f2f49b3b01be40b'] = value # Date
fUuid = value[0]
case 'f__67b7_beziehung__uuid':
entityValues['f468e7d8e91f04b902c6bc79fe365074'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
case 'f__bez__uuid':
entityValues['fb9cc78d2351179c5f2f49b3b01be40b'] = value # Date
fUuid = value[0]
case 'f__67b7_beziehung__uuid':
entityValues['f468e7d8e91f04b902c6bc79fe365074'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Inspection Mark to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing inspection mark relation relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importInspectionMarkToLiteratureReferenceRelation(api, engine):
print('importing inspection mark to literature reference relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__bez__8330_lit_kurzt_"
bundleId = 'b32fc778865a1ffd5b165515425f38c6' # Inspection Mark to Dating Assignment
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__bez__8330_lit_kurzt_"
bundleId = 'b32fc778865a1ffd5b165515425f38c6' # Inspection Mark to Dating Assignment
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__bez__uuid':
entityValues['f8670edfe030f375ca0b8b275a394511'] = value # Date
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['fa52476d733d0d106406864245d613b8'] = value # Literature Reference Assignment
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__bez__uuid':
entityValues['f8670edfe030f375ca0b8b275a394511'] = value # Date
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['fa52476d733d0d106406864245d613b8'] = value # Literature Reference Assignment
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Inspection Mark to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing inspection mark to literature reference relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importLiteratureToJournalRelation(api, engine):
print('importing literature to journal relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__lit__8310_zeitschrift"
bundleId = 'b6c2ce0add1e7999f48d66b7ef1a4a26' # Literature to journal relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__lit__8310_zeitschrift"
bundleId = 'b6c2ce0add1e7999f48d66b7ef1a4a26' # Literature to journal relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__lit__uuid':
entityValues['fc751b683ba51648f4e7557e37e18228'] = value # Literature UUID
fUuid = value[0]
case 'f__8310_zeitschrift__uuid':
entityValues['fae46e3ca92e3a84b36df823fe0323bb'] = value # Journal UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__lit__uuid':
entityValues['fc751b683ba51648f4e7557e37e18228'] = value # Literature UUID
fUuid = value[0]
case 'f__8310_zeitschrift__uuid':
entityValues['fae46e3ca92e3a84b36df823fe0323bb'] = value # Journal UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Literature to Journal Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing literature to journal relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importLiteratureToParentPublicationRelation(api, engine):
print('importing literature to parent publication relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__lit__8292_uebergeordn_publ_"
bundleId = 'b2adaaa15714d83ea83cd3333af437df' # Literature to parent publication relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__lit__8292_uebergeordn_publ_"
bundleId = 'b2adaaa15714d83ea83cd3333af437df' # Literature to parent publication relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__lit__uuid':
entityValues['f1ecd1cf9be1081507f9c8f3758bafe9'] = value # Date
fUuid = value[0]
case 'f__8292_uebergeordn_publ___uuid':
entityValues['f9997e4bbacb1c26a945825cfe5b6de2'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__lit__uuid':
entityValues['f1ecd1cf9be1081507f9c8f3758bafe9'] = value # Date
fUuid = value[0]
case 'f__8292_uebergeordn_publ___uuid':
entityValues['f9997e4bbacb1c26a945825cfe5b6de2'] = value #
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Literature to Parent Publication Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing literature to parent publication relation')

View file

@ -5,82 +5,66 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarkToDatingRelation(api, engine):
print('importing mark to dating relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__mar__68dm_datierung_marke"
bundleId = 'b105b749b25de3aa55329b82fe18c18d' # Mark to dating relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__mar__68dm_datierung_marke"
bundleId = 'b105b749b25de3aa55329b82fe18c18d' # Mark to dating relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['f11c6eedcfc833dabffd356f57be7e15'] = value # Date
fUuid = value[0]
case 'f__68dm_datierung_marke__uuid':
entityValues['f2b469f3a10721ab891e01b1d9817612'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['f11c6eedcfc833dabffd356f57be7e15'] = value # Date
fUuid = value[0]
case 'f__68dm_datierung_marke__uuid':
entityValues['f2b469f3a10721ab891e01b1d9817612'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Mark to Dating Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing mark to dating relation')

View file

@ -5,83 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarkToLiteratureRelation(api, engine):
print('importing mark to literature relation')
test = False
test = True
tableName = "r__mar__8330_lit_kurzt_"
bundleId = 'bd58cc7d59ce9f3e593e758a28dfcf4a' # Mark to literature relation
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__mar__8330_lit_kurzt_"
bundleId = 'bd58cc7d59ce9f3e593e758a28dfcf4a' # Mark to literature relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['f4fccc9bad7fc559c153095bdcb32eeb'] = value # Mark UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f19ffb27810f7d14694afb54dd359451'] = value # Literature UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['f4fccc9bad7fc559c153095bdcb32eeb'] = value # Mark UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['f19ffb27810f7d14694afb54dd359451'] = value # Literature UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Mark to Literature Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing mark to literature relation')

View file

@ -5,81 +5,68 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarkToMarkInformationRelation(api, engine):
print('importing mark to mark information relation')
test = False
tableName = "r__mar__6760_markenart"
bundleId = 'b241e8063b9259428967fa4ff134a8bd' # Mark to mark information relation
# Load the environment variables
load_dotenv()
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
test = False
tableName = "r__mar__6760_markenart"
bundleId = 'b241e8063b9259428967fa4ff134a8bd' # Mark to mark information relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['fa64f8812c3c784b2d91454bc9a88279'] = value # Mark UUID
fUuid = value[0]
case 'f__6760_markenart__uuid':
entityValues['f9d5d6723ea78253330dd8e4b346cac6'] = value # Mark information assignment uuidNote
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['fa64f8812c3c784b2d91454bc9a88279'] = value # Mark UUID
fUuid = value[0]
case 'f__6760_markenart__uuid':
entityValues['f9d5d6723ea78253330dd8e4b346cac6'] = value # Mark information assignment uuidNote
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Mark to Mark Information Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing mark to mark information relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importMarkToSourceRelation(api, engine):
print('importing mark to source relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__mar__8130_que_kurzt_"
bundleId = 'b0edbf644e07765a5ae319802ec0289b' # Mark to source relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__mar__8130_que_kurzt_"
bundleId = 'b0edbf644e07765a5ae319802ec0289b' # Mark to source relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['ffe35cef0c5d28bbebe195436706fc7c'] = value # Date
fUuid = value[0]
case 'f__8130_que_kurzt___uuid':
entityValues['f86e4b7f52add5640b824a601c66a2f6'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__mar__uuid':
entityValues['ffe35cef0c5d28bbebe195436706fc7c'] = value # Date
fUuid = value[0]
case 'f__8130_que_kurzt___uuid':
entityValues['f86e4b7f52add5640b824a601c66a2f6'] = value # Note
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Mark to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing mark to source relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importSourceToDateRelation(api, engine):
print('importing source to date relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__que__8100_datum"
bundleId = 'b4b8ba242075bf2c778894911c7f3264' # Source to date relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__que__8100_datum"
bundleId = 'b4b8ba242075bf2c778894911c7f3264' # Source to date relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__que__uuid':
entityValues['f2e8d1b76c8b196c8deb9e0abe90a5b3'] = value # Source UUID
fUuid = value[0]
case 'f__8100_datum__uuid':
entityValues['ff5ac62e6327599566d4474e18423265'] = value # Date UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__que__uuid':
entityValues['f2e8d1b76c8b196c8deb9e0abe90a5b3'] = value # Source UUID
fUuid = value[0]
case 'f__8100_datum__uuid':
entityValues['ff5ac62e6327599566d4474e18423265'] = value # Date UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Source to Date Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing source to date relation')

View file

@ -5,82 +5,69 @@ import os # For environment variable loading
from dotenv import load_dotenv # For environment variable loading
import pandas as pd # For dataframe handling
# Initialize the database
print('Initializing the database...')
engine, metadata, Session = initDb(True, './schemas/')
if engine == False:
print('Database initialization failed.')
exit()
def importSourceToLiteratureReferenceAssignmentRelation(api, engine):
print('importing source to literature reference assignment relation')
test = False
# Load the environment variables
load_dotenv()
tableName = "r__que__8330_lit_kurzt_"
bundleId = 'bed2f320214a0344287c6c4db40e9331' # Source to literature reference assignemnt relation
# Initialize the WissKI API
print('Initializing the WissKI API...')
api_url = os.getenv('API_URL')
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations')
try:
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
test = False
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
tableName = "r__que__8330_lit_kurzt_"
bundleId = 'bed2f320214a0344287c6c4db40e9331' # Source to literature reference assignemnt relation
try:
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
except FileNotFoundError:
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
# Load sources table
sqlTable = pd.read_sql_table(tableName, con=engine)
entityValues = {}
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
# Create entities
for index, row in sqlTable.iterrows():
# For every row in table...
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
# skip if already processed
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
continue
# Properties of an entity have to be an array, so...
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__que__uuid':
entityValues['faeb9c96c23eadd1a58df9ecd2154b68'] = value # Source UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['fc15a069f1a7694c13107a348d3b7a39'] = value # Literature reference assignment UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Entity property dicts
entityValues = {}
for key, value in row.items():
# For every column in row...
if (value is None) or (value == ''):
# skip if cell has no value
continue
# Properties of an entity have to be an array, so...
value = str(value).replace('&###{{new_line}}###'.format(), '&')
value = str(value).replace('###{{new_line}}###', '&')
value = str(value).replace(' & ', '&')
if '&' in str(value):
# ...Explode "&"-separated values to array items
value = [x.strip() for x in str(value).split('&')]
else:
# ...Or parse to array
value = [value]
# Map columns to fields. We use assignments for reification.
docId = ''
match key:
case 'id':
docId = value[0]
case 'f__que__uuid':
entityValues['faeb9c96c23eadd1a58df9ecd2154b68'] = value # Source UUID
fUuid = value[0]
case 'f__8330_lit_kurzt___uuid':
entityValues['fc15a069f1a7694c13107a348d3b7a39'] = value # Literature reference assignment UUID
case _:
print(f'{key} is not a valid field, skipping.')
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
# Create Material
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
api.save(entity)
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
print(f'Created Source to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
# Write log
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
exit()
print('finish')
# Write log
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
if test:
exit()
print('finished importing source to literature reference assignment relation')

View file

@ -19,7 +19,7 @@ def initDb(_production, schemaDir):
return (False, False)
if _production:
dbName = 'ngk'
dbName = 'ngk_data_alt'
else:
dbName = 'testngk'

View file

@ -20,7 +20,7 @@ def createClass(name, columns):
tableName = name.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
# Transform columns and add prefix
attrs = {'__tablename__': tableName}
attrs = {'__tablename__': tableName, '__table_args__': {'extend_existing': True}}
attrs.update({prop.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')','_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_'): (Column(String(36), primary_key=True) if prop.lower() == 'uuid' else Column(Text)) for prop in columns})
# If 'uuid' is not in columns, add 'id' as primary key
@ -30,9 +30,6 @@ def createClass(name, columns):
# Create SQLAlchemy class
cls = type(className, (Base,), attrs)
# Define the table with extend_existing=True
Table(tableName, Base.metadata, extend_existing=True)
return cls
def initClassesFromSchemas(schemaDir):

View file

@ -3,4 +3,4 @@ pandas
pymysql
sqlalchemy
tqdm
wisski_py