new commit
This commit is contained in:
parent
da296f8a64
commit
e46a9fd4ec
69 changed files with 4199 additions and 4805 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -11,3 +11,5 @@ wisski_py
|
|||
__pycache__
|
||||
logs/*
|
||||
.venv
|
||||
.env
|
||||
.vscode
|
||||
|
|
|
|||
15
.vscode/launch.json
vendored
Normal file
15
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
160
00_start.py
Normal file
160
00_start.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
from importlib import import_module
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
from time import sleep
|
||||
|
||||
# Import entities
|
||||
material_module = import_module("01_importMaterialsAndTechnique")
|
||||
administrator_module = import_module("02_importAdministrator")
|
||||
administrator_status_module = import_module("03_importAdministratorStatus")
|
||||
source_module = import_module("03_importSource")
|
||||
artist_source_reference_assignment_module = import_module("04_importArtistSourceReferenceAssignment")
|
||||
marks_module = import_module("04_importMarks")
|
||||
source_reference_assignment_module = import_module("04_importSourceReferenceAssignment")
|
||||
artist_module = import_module("05_importArtist")
|
||||
literature_module = import_module("06_importLiterature")
|
||||
inspection_mark_module = import_module("07_importInspectionMark")
|
||||
journal_assignment_module = import_module("07_importJournalAssignment")
|
||||
literature_reference_assignment_module = import_module("07_importLiteratureReferenceAssignment")
|
||||
parent_literature_assignment_module = import_module("07_importParentLiteratureAssignment")
|
||||
inspection_mark_location_module = import_module("08_importInspectionMarkLocation")
|
||||
inspection_mark_relation_module = import_module("09_importInspectionMarkRelation")
|
||||
mark_dating_info_module = import_module("10_importMarkDatingInfo")
|
||||
birth_module = import_module("12_importBirth")
|
||||
death_module = import_module("13_importDeath")
|
||||
dating_module = import_module("14_importDating")
|
||||
goldsmith_relation_module = import_module("15_importGoldsmithRelation")
|
||||
client_module = import_module("16_importClient")
|
||||
mentioned_module = import_module("17_importMentioned")
|
||||
num_dating_module = import_module("18_importNumDating")
|
||||
origin_assignment_module = import_module("19_importOriginAssignment")
|
||||
workshops_module = import_module("20_importWorkshops")
|
||||
artifacts_module = import_module("21_importArtifacts")
|
||||
artifact_relation_module = import_module("22_importArtifactRelation")
|
||||
artist_assignment_module = import_module("24_importArtistAssignment")
|
||||
mark_information_module = import_module("25_importMarkInformation")
|
||||
photographer_module = import_module("26_importPhotographer")
|
||||
|
||||
# Import relations
|
||||
artifact_to_artist_relation_module = import_module("98__r__importArtifactToArtistRelationRelation")
|
||||
artifact_to_client_assignment_relation_module = import_module("98__r__importArtifactToClientAssignmentRelation")
|
||||
artifact_to_inspection_mark_location_relation_module = import_module("98__r__importArtifactToInspectionMarkLocationRelation")
|
||||
artifact_to_literature_reference_assignment_relation_module = import_module("98__r__importArtifactToLiteratureReferenceAssignmentRelation")
|
||||
artifact_to_mark_information_assignment_relation_module = import_module("98__r__importArtifactToMarkInformationAssignmentRelation")
|
||||
artifact_to_material_relation_module = import_module("98__r__importArtifactToMaterialRelation")
|
||||
artifact_to_numerice_date_relation_module = import_module("98__r__importArtifactToNumericeDateRelation")
|
||||
artifact_to_photograph_relation_module = import_module("98__r__importArtifactToPhotographRelation")
|
||||
artifact_to_relation_relation_module = import_module("98__r__importArtifactToRelationRelation")
|
||||
artifact_to_source_relation_module = import_module("98__r__importArtifactToSourceRelation")
|
||||
artifact_to_status_administrator_relation_module = import_module("98__r__importArtifactToStatusAdministratorRelation")
|
||||
artist_to_birth_relation_module = import_module("98__r__importArtistToBirthRelation")
|
||||
artist_to_death_relation_module = import_module("98__r__importArtistToDeathRelation")
|
||||
artist_to_goldsmith_relation_module = import_module("98__r__importArtistToGoldsmithRelation")
|
||||
artist_to_literature_reference_relation_module = import_module("98__r__importArtistToLiteratureReferenceRelation")
|
||||
artist_to_mentioned_relation_module = import_module("98__r__importArtistToMentionedRelation")
|
||||
artist_to_origin_relation_module = import_module("98__r__importArtistToOriginRelation")
|
||||
artist_to_workshop_relation_module = import_module("98__r__importArtistToWorkshopRelation")
|
||||
inspection_mark_dating_information_assignment_relation_module = import_module("98__r__importInspectionMarkDatingInformationAssignmentRelation")
|
||||
inspection_mark_relation_relation_module = import_module("98__r__importInspectionMarkRelationRelation")
|
||||
inspection_mark_to_literature_reference_relation_module = import_module("98__r__importInspectionMarkToLiteratureReferenceRelation")
|
||||
literature_to_journal_relation_module = import_module("98__r__importLiteratureToJournalRelation")
|
||||
literature_to_parent_publication_relation_module = import_module("98__r__importLiteratureToParentPublicationRelation")
|
||||
mark_to_dating_relation_module = import_module("98__r__importMarkToDatingRelation")
|
||||
mark_to_literature_relation_module = import_module("98__r__importMarkToLiteratureRelation")
|
||||
mark_to_mark_information_relation_module = import_module("98__r__importMarkToMarkInformationRelation")
|
||||
mark_to_source_relation_module = import_module("98__r__importMarkToSourceRelation")
|
||||
source_to_date_relation_module = import_module("98__r__importSourceToDateRelation")
|
||||
source_to_literature_reference_assignment_relation_module = import_module("98__r__importSourceToLiteratureReferenceAssignmentRelation")
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
|
||||
trials = 0
|
||||
while trials < 3 :
|
||||
trials += 1
|
||||
try:
|
||||
# Call the function from the imported module
|
||||
material_module.importMaterialsAndTechnique(api, engine)
|
||||
administrator_module.importAdministrator(api, engine)
|
||||
administrator_status_module.importAdministratorStatus(api, engine)
|
||||
source_module.importSource(api, engine)
|
||||
artist_source_reference_assignment_module.importArtistSourceReferenceAssignment(api, engine)
|
||||
marks_module.importMarks(api, engine)
|
||||
source_reference_assignment_module.importSourceReferenceAssignment(api, engine)
|
||||
artist_module.importArtist(api, engine)
|
||||
literature_module.importLiterature(api, engine)
|
||||
inspection_mark_module.importInspectionMark(api, engine)
|
||||
journal_assignment_module.importJournalAssignment(api, engine)
|
||||
literature_reference_assignment_module.importLiteratureReferenceAssignment(api, engine)
|
||||
parent_literature_assignment_module.importParentLiteratureAssignment(api, engine)
|
||||
inspection_mark_location_module.importInspectionMarkLocation(api, engine)
|
||||
inspection_mark_relation_module.importInspectionMarkRelation(api, engine)
|
||||
mark_dating_info_module.importMarkDatingInfo(api, engine)
|
||||
birth_module.importBirth(api, engine)
|
||||
death_module.importDeath(api, engine)
|
||||
dating_module.importDating(api, engine)
|
||||
goldsmith_relation_module.importGoldsmithRelation(api, engine)
|
||||
client_module.importClient(api, engine)
|
||||
mentioned_module.importMentioned(api, engine)
|
||||
num_dating_module.importNumDating(api, engine)
|
||||
origin_assignment_module.importOriginAssignment(api, engine)
|
||||
workshops_module.importWorkshops(api, engine)
|
||||
artifacts_module.importArtifacts(api, engine)
|
||||
artifact_relation_module.importArtifactRelation(api, engine)
|
||||
artist_assignment_module.importArtistAssignment(api, engine)
|
||||
mark_information_module.importMarkInformation(api, engine)
|
||||
photographer_module.importPhotographer(api, engine)
|
||||
|
||||
api.pathbuilders = ['relations']
|
||||
artifact_to_artist_relation_module.importArtifactToArtistRelationRelation(api, engine)
|
||||
artifact_to_client_assignment_relation_module.importArtifactToClientAssignmentRelation(api, engine)
|
||||
artifact_to_inspection_mark_location_relation_module.importArtifactToInspectionMarkLocationRelation(api, engine)
|
||||
artifact_to_literature_reference_assignment_relation_module.importArtifactToLiteratureReferenceAssignmentRelation(api, engine)
|
||||
artifact_to_mark_information_assignment_relation_module.importArtifactToMarkInformationAssignmentRelation(api, engine)
|
||||
artifact_to_material_relation_module.importArtifactToMaterialRelation(api, engine)
|
||||
artifact_to_numerice_date_relation_module.importArtifactToNumericeDateRelation(api, engine)
|
||||
artifact_to_photograph_relation_module.importArtifactToPhotographRelation(api, engine)
|
||||
artifact_to_relation_relation_module.importArtifactToRelationRelation(api, engine)
|
||||
artifact_to_source_relation_module.importArtifactToSourceRelation(api, engine)
|
||||
artifact_to_status_administrator_relation_module.importArtifactToStatusAdministratorRelation(api, engine)
|
||||
artist_to_birth_relation_module.importArtistToBirthRelation(api, engine)
|
||||
artist_to_death_relation_module.importArtistToDeathRelation(api, engine)
|
||||
artist_to_goldsmith_relation_module.importArtistToGoldsmithRelation(api, engine)
|
||||
artist_to_literature_reference_relation_module.importArtistToLiteratureReferenceRelation(api, engine)
|
||||
artist_to_mentioned_relation_module.importArtistToMentionedRelation(api, engine)
|
||||
artist_to_origin_relation_module.importArtistToOriginRelation(api, engine)
|
||||
artist_to_workshop_relation_module.importArtistToWorkshopRelation(api, engine)
|
||||
inspection_mark_dating_information_assignment_relation_module.importInspectionMarkDatingInformationAssignmentRelation(api, engine)
|
||||
inspection_mark_relation_relation_module.importInspectionMarkRelationRelation(api, engine)
|
||||
inspection_mark_to_literature_reference_relation_module.importInspectionMarkToLiteratureReferenceRelation(api, engine)
|
||||
literature_to_journal_relation_module.importLiteratureToJournalRelation(api, engine)
|
||||
literature_to_parent_publication_relation_module.importLiteratureToParentPublicationRelation(api, engine)
|
||||
mark_to_dating_relation_module.importMarkToDatingRelation(api, engine)
|
||||
mark_to_literature_relation_module.importMarkToLiteratureRelation(api, engine)
|
||||
mark_to_mark_information_relation_module.importMarkToMarkInformationRelation(api, engine)
|
||||
mark_to_source_relation_module.importMarkToSourceRelation(api, engine)
|
||||
source_to_date_relation_module.importSourceToDateRelation(api, engine)
|
||||
source_to_literature_reference_assignment_relation_module.importSourceToLiteratureReferenceAssignmentRelation(api, engine)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}')
|
||||
print(f'Trial {trials} of 3 failed.')
|
||||
print(f'Retrying in 10 seconds...')
|
||||
sleep(10)
|
||||
continue
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedMaterials.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load materials table
|
||||
materialsTable = pd.read_sql_table('c__5280_material', con=engine)
|
||||
|
||||
# Create materials
|
||||
for index, row in materialsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and materialsTable.loc[index, 'id'] == processedRows.iloc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed material {materialsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
materialValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
|
||||
case 'f__5280_material':
|
||||
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
|
||||
case 'f__5300_technik':
|
||||
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
material = Entity(api=api, fields=materialValues, bundle_id='b45978f2b073ff3c73b3c7220ebb3b89')
|
||||
api.save(material)
|
||||
|
||||
print(f'Created material {index}: {material.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedMaterials.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
79
01_importMaterialsAndTechnique.py
Normal file
79
01_importMaterialsAndTechnique.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importMaterialsAndTechnique(api, engine):
|
||||
print('Importing materials and technique...')
|
||||
|
||||
tableName = 'c__5280_material'
|
||||
bundleId = 'b45978f2b073ff3c73b3c7220ebb3b89'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load materials table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create materials
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed material {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
materialValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# If value is a list of comma-separated strings, split each item by ',' and flatten.
|
||||
if isinstance(value, list):
|
||||
new_value = []
|
||||
for v in value:
|
||||
if isinstance(v, str) and ',' in v:
|
||||
new_value.extend([x.strip() for x in v.split(',') if x.strip()])
|
||||
else:
|
||||
new_value.append(v)
|
||||
value = new_value
|
||||
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
|
||||
case 'f__5280_material':
|
||||
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
|
||||
case 'f__5300_technik':
|
||||
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
material = Entity(api=api, fields=materialValues, bundle_id=bundleId)
|
||||
api.save(material)
|
||||
|
||||
print(f'Created material {index}: {material.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,105 +5,92 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importAdministrator(api, engine):
|
||||
print('Importing administrators...')
|
||||
tableName = 'c__vwr'
|
||||
bundleId = 'b4e5a6a31ff575ab09b07b5f27d322ab'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
# Load sources table
|
||||
administratorsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedAdministrators.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['administratorId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
administratorsTable = pd.read_sql_table('c__vwr', con=engine)
|
||||
|
||||
administratorValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create administrators
|
||||
for index, row in administratorsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administrator {administratorsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
administratorValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create administrators
|
||||
for index, row in administratorsTable.iterrows():
|
||||
administratorValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administrator {administratorsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
administratorValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
administratorValues['f707e595ce7301d61c064e8e44c9c4f4'] = value # UUID
|
||||
case 'f__vwra_vwr_adresse':
|
||||
administratorValues['f303bbabf3d97536777b0f552d20bc7a'] = value # Address
|
||||
case 'f__vwrn_vwr_dok_nr_':
|
||||
administratorValues['f37e82c36b4fc6b275a1a86a389481e1'] = value # Administrator document number
|
||||
case 'f__vwrb_verw_publ_bez':
|
||||
administratorValues['ffc50ffbcc3f411ed63e3c6dfc6b4d80'] = value # Appellation in publication
|
||||
case 'f__9990_kommentar':
|
||||
administratorValues['fcf9600af8c3eff355eb42466e9aac39'] = value # Comment
|
||||
case 'f__2900_verw_langbez_':
|
||||
administratorValues['f78d3c9e6800adbb8a9af0867cbdf3c7'] = value # Long Appellation
|
||||
case 'f__2864_ort':
|
||||
administratorValues['fecf6c9d7cbae513923e411178516378'] = value # Place
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
administratorValues['fddaae99f4c6a835d9f9f195523c85f7'] = value # Short appellation
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
administratorValues['f707e595ce7301d61c064e8e44c9c4f4'] = value # UUID
|
||||
case 'f__vwra_vwr_adresse':
|
||||
administratorValues['f303bbabf3d97536777b0f552d20bc7a'] = value # Address
|
||||
case 'f__vwrn_vwr_dok_nr_':
|
||||
administratorValues['f37e82c36b4fc6b275a1a86a389481e1'] = value # Administrator document number
|
||||
case 'f__vwrb_verw_publ_bez':
|
||||
administratorValues['ffc50ffbcc3f411ed63e3c6dfc6b4d80'] = value # Appellation in publication
|
||||
case 'f__9990_kommentar':
|
||||
administratorValues['fcf9600af8c3eff355eb42466e9aac39'] = value # Comment
|
||||
case 'f__2900_verw_langbez_':
|
||||
administratorValues['f78d3c9e6800adbb8a9af0867cbdf3c7'] = value # Long Appellation
|
||||
case 'f__2864_ort':
|
||||
administratorValues['fecf6c9d7cbae513923e411178516378'] = value # Place
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
administratorValues['fddaae99f4c6a835d9f9f195523c85f7'] = value # Short appellation
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Set Digitisation Process
|
||||
administratorValues['f3ec4640a87bd4534763af0fca050193'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
# Set Digitisation Process
|
||||
administratorValues['f3ec4640a87bd4534763af0fca050193'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Material
|
||||
administrator = Entity(api=api, fields=administratorValues, bundle_id='b4e5a6a31ff575ab09b07b5f27d322ab') # Administrator
|
||||
api.save(administrator)
|
||||
# Create Administrator
|
||||
administrator = Entity(api=api, fields=administratorValues, bundle_id=bundleId) # Administrator
|
||||
api.save(administrator)
|
||||
|
||||
print(f'Created administrator {index}: {administrator.uri}')
|
||||
print(f'Created administrator {index}: {administrator.uri} of {len(administratorsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'administratorId': administratorValues['f37e82c36b4fc6b275a1a86a389481e1'][0], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedAdministrators.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing administrators')
|
||||
|
|
|
|||
|
|
@ -5,84 +5,74 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importAdministratorStatus(api, engine):
|
||||
print('Importing administrator statuses...')
|
||||
tableName = 'c__ob28_status_verwalt_'
|
||||
bundleId = 'b45447146729190da3a1d3e19165a6f8'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load sources table
|
||||
administratorStatusTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedAdministratorStatus.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
# Create administratorStatuss
|
||||
for index, row in administratorStatusTable.iterrows():
|
||||
administratorStatusValues = {}
|
||||
|
||||
# Load sources table
|
||||
administratorStatusTable = pd.read_sql_table('c__ob28_status_verwalt_', con=engine)
|
||||
|
||||
administratorStatusValues = {}
|
||||
|
||||
# Create administratorStatuss
|
||||
for index, row in administratorStatusTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorStatusTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administratorStatus {administratorStatusTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
administratorStatusValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorStatusTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administratorStatus {administratorStatusTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
administratorStatusValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'] = value # UUID
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
administratorStatusValues['f08562a866d00cd5245c380c20e4e7f9'] = value # Admistrator short appellation
|
||||
case 'f__2950_invent_nr_':
|
||||
administratorStatusValues['f92ac041f6098335bf4075942a771ee3'] = value # Inventary
|
||||
case 'f__2952_alte_i_nr_':
|
||||
administratorStatusValues['fdc070143457df491f18347ac97b0f24'] = value # Old Identifier
|
||||
case 'f__2864_ort':
|
||||
administratorStatusValues['f9bc3796ceff9a3581bd8047545628b9'] = value # Place
|
||||
case 'f__ob28_status_verwalt_':
|
||||
administratorStatusValues['ff0265deb26c28f139345b89577b2539'] = value # Status
|
||||
case 'f__2996_gelt_dauer':
|
||||
administratorStatusValues['f3363962b4eaa4d38358bc1d2bda1a7f'] = value # Time-Span
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'] = value # UUID
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
administratorStatusValues['f08562a866d00cd5245c380c20e4e7f9'] = value # Admistrator short appellation
|
||||
case 'f__2950_invent_nr_':
|
||||
administratorStatusValues['f92ac041f6098335bf4075942a771ee3'] = value # Inventary
|
||||
case 'f__2952_alte_i_nr_':
|
||||
administratorStatusValues['fdc070143457df491f18347ac97b0f24'] = value # Old Identifier
|
||||
case 'f__2864_ort':
|
||||
administratorStatusValues['f9bc3796ceff9a3581bd8047545628b9'] = value # Place
|
||||
case 'f__ob28_status_verwalt_':
|
||||
administratorStatusValues['ff0265deb26c28f139345b89577b2539'] = value # Status
|
||||
case 'f__2996_gelt_dauer':
|
||||
administratorStatusValues['f3363962b4eaa4d38358bc1d2bda1a7f'] = value # Time-Span
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
administratorStatus = Entity(api=api, fields=administratorStatusValues, bundle_id='b45447146729190da3a1d3e19165a6f8')
|
||||
api.save(administratorStatus)
|
||||
# Create Material
|
||||
administratorStatus = Entity(api=api, fields=administratorStatusValues, bundle_id='b45447146729190da3a1d3e19165a6f8')
|
||||
api.save(administratorStatus)
|
||||
|
||||
print(f'Created administratorStatus {index}: {administratorStatus.uri} of {len(administratorStatusTable)}')
|
||||
print(f'Created administratorStatus {index}: {administratorStatus.uri} of {len(administratorStatusTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'][0], 'uri': administratorStatus.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedAdministratorStatus.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'][0], 'uri': administratorStatus.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing administrator statuses')
|
||||
|
|
|
|||
|
|
@ -5,124 +5,112 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importSource(api, engine):
|
||||
print('Importing sources...')
|
||||
tableName = 'c__que'
|
||||
bundleId = 'b7dc57a93e008a58514b0d4ca26147b1'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id','sourceId', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load sources table
|
||||
sourcesTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedSources.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id','sourceId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sourcesTable = pd.read_sql_table('c__que', con=engine)
|
||||
|
||||
sourceValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create sources
|
||||
for index, row in sourcesTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < processedRows['id'].max():
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed source {row['id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
sourceValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create sources
|
||||
for index, row in sourcesTable.iterrows():
|
||||
sourceValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sourcesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f"Skipping already processed source {sourcesTable.loc[index, 'id']}")
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
sourceValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
sourceValues['f9f02815a5631a85948d4d258a455f49'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
sourceValues['f89a563b07f965ca2dcb0b1bd178e863'] = value # Comment
|
||||
case 'f__8080_verfasser':
|
||||
sourceValues['f2d2934a6c72b5552f01042338ff5d67'] = value # Creator
|
||||
case 'f__80bs_que__beschr_':
|
||||
sourceValues['fd2122de6bcd62c61fcb7a9223baa20f'] = value # Description
|
||||
case 'f__80bw_que__bewertung':
|
||||
sourceValues['f70a7818de6e31eacea22148c92737ac'] = value # Evalutation
|
||||
case 'f__8182_transkr__extern':
|
||||
sourceValues['f409a3ea352d6bc55c27f6a93d239191'] = value # External Transkript
|
||||
case 'f__2950_invent_nr_':
|
||||
sourceValues['f71605f258ceb37ee5fcf2cd7871de2c'] = value # Inventary number
|
||||
case 'f__2900_verw_langbez_':
|
||||
sourceValues['f19d275cd6f48ef64d104997ca99291d'] = value # Long appellation administrator
|
||||
case 'f__8540_repro_nr_':
|
||||
sourceValues['f881dd5566725dc26a8b25cfba181792'] = value # Reproduction Number
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
sourceValues['f343d954f8d95f1da98201a7f29ac81f'] = value # Short appellation Administrator
|
||||
case 'f__8130_que_kurzt_':
|
||||
sourceValues['f3faea3691516939fc4b0c2149ee2e5b'] = value # Shorttitle
|
||||
case 'f__8000_que_dok_nr_':
|
||||
sourceValues['f50ad6021b42c094f7e551faec831802'] = value # Source Document Identifier
|
||||
case 'f__8092_untertitel':
|
||||
sourceValues['fb734bd50628353b7b5c0bfc88f2cbdc'] = value # Subtitle
|
||||
case 'f__80fp_vorhanden_als':
|
||||
sourceValues['fd7b99a3db6191382401d69710ac192f'] = value # There as
|
||||
case 'f__8090_titel':
|
||||
sourceValues['f399332f583d268f07200efd1e3bb3c5'] = value # Title
|
||||
case 'f__8180_transkript_':
|
||||
sourceValues['f6585008a698902f45dc2a79b9a3a9de'] = value # Transcript
|
||||
case 'f__8060_art':
|
||||
sourceValues['f38c664e4f9b2effc83ebc50e1244442'] = value # Type
|
||||
case 'f__2990_verbleib':
|
||||
sourceValues['fae3bc551d146652898782f712f95749'] = value # Whereabouts
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
sourceValues['f9f02815a5631a85948d4d258a455f49'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
sourceValues['f89a563b07f965ca2dcb0b1bd178e863'] = value # Comment
|
||||
case 'f__8080_verfasser':
|
||||
sourceValues['f2d2934a6c72b5552f01042338ff5d67'] = value # Creator
|
||||
case 'f__80bs_que__beschr_':
|
||||
sourceValues['fd2122de6bcd62c61fcb7a9223baa20f'] = value # Description
|
||||
case 'f__80bw_que__bewertung':
|
||||
sourceValues['f70a7818de6e31eacea22148c92737ac'] = value # Evalutation
|
||||
case 'f__8182_transkr__extern':
|
||||
sourceValues['f409a3ea352d6bc55c27f6a93d239191'] = value # External Transkript
|
||||
case 'f__2950_invent_nr_':
|
||||
sourceValues['f71605f258ceb37ee5fcf2cd7871de2c'] = value # Inventary number
|
||||
case 'f__2900_verw_langbez_':
|
||||
sourceValues['f19d275cd6f48ef64d104997ca99291d'] = value # Long appellation administrator
|
||||
case 'f__8540_repro_nr_':
|
||||
sourceValues['f881dd5566725dc26a8b25cfba181792'] = value # Reproduction Number
|
||||
case 'f__290a_verw_kurzbez_':
|
||||
sourceValues['f343d954f8d95f1da98201a7f29ac81f'] = value # Short appellation Administrator
|
||||
case 'f__8130_que_kurzt_':
|
||||
sourceValues['f3faea3691516939fc4b0c2149ee2e5b'] = value # Shorttitle
|
||||
case 'f__8000_que_dok_nr_':
|
||||
sourceValues['f50ad6021b42c094f7e551faec831802'] = value # Source Document Identifier
|
||||
case 'f__8092_untertitel':
|
||||
sourceValues['fb734bd50628353b7b5c0bfc88f2cbdc'] = value # Subtitle
|
||||
case 'f__80fp_vorhanden_als':
|
||||
sourceValues['fd7b99a3db6191382401d69710ac192f'] = value # There as
|
||||
case 'f__8090_titel':
|
||||
sourceValues['f399332f583d268f07200efd1e3bb3c5'] = value # Title
|
||||
case 'f__8180_transkript_':
|
||||
sourceValues['f6585008a698902f45dc2a79b9a3a9de'] = value # Transcript
|
||||
case 'f__8060_art':
|
||||
sourceValues['f38c664e4f9b2effc83ebc50e1244442'] = value # Type
|
||||
case 'f__2990_verbleib':
|
||||
sourceValues['fae3bc551d146652898782f712f95749'] = value # Whereabouts
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Set Digitisation Process
|
||||
sourceValues['ffdf27e75013fa55d31f728ff5166f06'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
# Set Digitisation Process
|
||||
sourceValues['ffdf27e75013fa55d31f728ff5166f06'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Material
|
||||
source = Entity(api=api, fields=sourceValues, bundle_id='b7dc57a93e008a58514b0d4ca26147b1')
|
||||
api.save(source)
|
||||
# Create Material
|
||||
source = Entity(api=api, fields=sourceValues, bundle_id=bundleId)
|
||||
api.save(source)
|
||||
|
||||
print(f'Created source {index}: {source.uri}')
|
||||
print(f'Created source {index}: {source.uri} of {len(sourcesTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'sourceId': sourceValues['f50ad6021b42c094f7e551faec831802'][0], 'uuid': sourceValues['f9f02815a5631a85948d4d258a455f49'][0], 'uri': source.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedSources.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'sourceId': sourceValues['f50ad6021b42c094f7e551faec831802'][0], 'uuid': sourceValues['f9f02815a5631a85948d4d258a455f49'][0], 'uri': source.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,83 +5,70 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistSourceReferenceAssignment(api, engine):
|
||||
print('Importing artist source reference assignments...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__81kr_que_kt_kue"
|
||||
bundleId = 'bf71940d0b18c20511e2141159afb9de' # Artist source reference assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "c__81kr_que_kt_kue"
|
||||
bundleId = 'bf71940d0b18c20511e2141159afb9de' # Artist source reference assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
entityValues = {}
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['fe3139ac03bd854ac9196fc240e7c68b'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8134_stelle':
|
||||
entityValues['f58c13c5502baef24ede2a8a977ae6c6'] = value # Source reference
|
||||
case 'f__81kr_que_kt_kue':
|
||||
entityValues['f14d2d19f879d7398a384bdc132921a3'] = value # Source short title
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['fe3139ac03bd854ac9196fc240e7c68b'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8134_stelle':
|
||||
entityValues['f58c13c5502baef24ede2a8a977ae6c6'] = value # Source reference
|
||||
case 'f__81kr_que_kt_kue':
|
||||
entityValues['f14d2d19f879d7398a384bdc132921a3'] = value # Source short title
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,268 +5,255 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarks(api, engine):
|
||||
print('Importing marks...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = 'c__mar'
|
||||
bundleId = 'b2c4e1c984d7758d7c7ec719110f7125'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'markId', 'uuid', 'uri'])
|
||||
|
||||
# Simple log
|
||||
# Load mark table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
print(f'Processing {len(sqlTable)} marks...')
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedMarks.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'markId', 'uuid', 'uri'])
|
||||
|
||||
# Load mark table
|
||||
markTable = pd.read_sql_table('c__mar', con=engine)
|
||||
print(f'Processing {len(markTable)} marks...')
|
||||
|
||||
# Create mark
|
||||
for index, row in markTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < processedRows['id'].max():
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed mark {row['id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
markValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
featureValues = {}
|
||||
featureDimensionValues = {}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create mark
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed mark {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification
|
||||
# for nested semantics, because we need to be efficient.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
markValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
featureValues = {}
|
||||
featureDimensionValues = {}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
markValues['fb40b199b4032e55acc152f994e93b45'] = value # UUID
|
||||
case 'f__3002_pub_kue_nr_':
|
||||
markValues['f6f0572ebec9c98e164d0e9aa0650c2e'] = value # Artist Number
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
markValues['fe577970c02f173170ff3848a36b3b79'] = value # Mark Document Number
|
||||
case 'f__6770_rosenb_nr_':
|
||||
markValues['f6fc4b5726c97bad8b03ede860491649'] = value # Rosenberg Number
|
||||
case 'f__9990_kommentar':
|
||||
markValues['f01e527e707ff36bf966baa01c163378'] = value # Comment
|
||||
case 'f__68an_abdruck_nr_':
|
||||
markValues['f8324ea3c9ee378f1e19035e092aadb9'] = value # Print Number
|
||||
case 'f__68nk_besonderheiten':
|
||||
markValues['fa21e323a8a7a99ce3489e1f7753ac5f'] = value # Special Features
|
||||
case 'f__8470_aufnahmenr_':
|
||||
markValues['f67031e2a2b81ad9f318dc5b11d5a6af'] = value # Recording number
|
||||
case 'f__684b_breite_marke':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification
|
||||
# for nested semantics, because we need to be efficient.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
markValues['fb40b199b4032e55acc152f994e93b45'] = value # UUID
|
||||
case 'f__3002_pub_kue_nr_':
|
||||
markValues['f6f0572ebec9c98e164d0e9aa0650c2e'] = value # Artist Number
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
markValues['fe577970c02f173170ff3848a36b3b79'] = value # Mark Document Number
|
||||
case 'f__6770_rosenb_nr_':
|
||||
markValues['f6fc4b5726c97bad8b03ede860491649'] = value # Rosenberg Number
|
||||
case 'f__9990_kommentar':
|
||||
markValues['f01e527e707ff36bf966baa01c163378'] = value # Comment
|
||||
case 'f__68an_abdruck_nr_':
|
||||
markValues['f8324ea3c9ee378f1e19035e092aadb9'] = value # Print Number
|
||||
case 'f__68nk_besonderheiten':
|
||||
markValues['fa21e323a8a7a99ce3489e1f7753ac5f'] = value # Special Features
|
||||
case 'f__8470_aufnahmenr_':
|
||||
markValues['f67031e2a2b81ad9f318dc5b11d5a6af'] = value # Recording number
|
||||
case 'f__684b_breite_marke':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__684h_hoehe_marke':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['hight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68na_bz_breite_hoehe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width_x_hight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__6840_rahmenform':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['frame_shape'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__684d_darst__marke':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['design'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__684l_text_marke':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['text'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nb_randanschluss':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['edge_connection'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nc_form_haste':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['haste_mould'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nd_form_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['slash_form_shape'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ne_haste_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nf_n_knick':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ng_ueberg__serifen':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
|
||||
'transition_serif_haste'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nh_dicke_ser__max_':
|
||||
# We map (features) dimensions to Dimension entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ni_dicke_ser__min':
|
||||
# We map features to Feature entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
|
||||
'minimum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nj_breite_serife':
|
||||
# We map features to Feature entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
|
||||
'width'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Goldschmiede/', 'goldsmiths/')
|
||||
item = item.replace('Goldschmiede\\', 'goldsmiths/')
|
||||
item = item.replace('Epitaphien/', 'epitaphies/')
|
||||
item = item.replace('Epitaphien\\', 'epitaphies/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
markValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case 'f__684h_hoehe_marke':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['hight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68na_bz_breite_hoehe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width_x_hight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__6840_rahmenform':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['frame_shape'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__684d_darst__marke':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['design'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__684l_text_marke':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['text'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nb_randanschluss':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['edge_connection'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nc_form_haste':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['haste_mould'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nd_form_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['slash_form_shape'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ne_haste_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nf_n_knick':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ng_ueberg__serifen':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
|
||||
'transition_serif_haste'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nh_dicke_ser__max_':
|
||||
# We map (features) dimensions to Dimension entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ni_dicke_ser__min':
|
||||
# We map features to Feature entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
|
||||
'minimum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nj_breite_serife':
|
||||
# We map features to Feature entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = [
|
||||
'width'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Goldschmiede/', 'goldsmiths/')
|
||||
item = item.replace('Goldschmiede\\', 'goldsmiths/')
|
||||
item = item.replace('Epitaphien/', 'epitaphies/')
|
||||
item = item.replace('Epitaphien\\', 'epitaphies/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
markValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
if value:
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
if value:
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create (feature) Dimension entities and add their UUIDs to a list
|
||||
# because we link Feature and its Dimension over the UUID
|
||||
featureDimension = []
|
||||
for key, value in featureDimensionValues.items():
|
||||
if value:
|
||||
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
|
||||
api.save(featureDimensionItem)
|
||||
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
|
||||
# Create (feature) Dimension entities and add their UUIDs to a list
|
||||
# because we link Feature and its Dimension over the UUID
|
||||
featureDimension = []
|
||||
for key, value in featureDimensionValues.items():
|
||||
if value:
|
||||
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
|
||||
api.save(featureDimensionItem)
|
||||
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
|
||||
|
||||
# Add the serif feature t the feature list
|
||||
if featureDimension:
|
||||
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
|
||||
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
|
||||
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
# Add the serif feature t the feature list
|
||||
if featureDimension:
|
||||
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
|
||||
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
|
||||
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
feature = []
|
||||
for key, value in featureValues.items():
|
||||
if value:
|
||||
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
|
||||
api.save(featureItem)
|
||||
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
feature = []
|
||||
for key, value in featureValues.items():
|
||||
if value:
|
||||
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
|
||||
api.save(featureItem)
|
||||
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
if digitisationProcessValues:
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
if digitisationProcessValues:
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
if dimension:
|
||||
markValues['f05807c9d81cd39b814f83de0175d66a'] = dimension # Dimension
|
||||
if feature:
|
||||
markValues['f3ce49288bc03e9d799f20ea277429db'] = feature # Feature
|
||||
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
|
||||
markValues['f73e27498813a922032b18b3f3ab8d10'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
markValues['f3baf98f752fc9638de175985183119a'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
# Add the field values for reference
|
||||
if dimension:
|
||||
markValues['f05807c9d81cd39b814f83de0175d66a'] = dimension # Dimension
|
||||
if feature:
|
||||
markValues['f3ce49288bc03e9d799f20ea277429db'] = feature # Feature
|
||||
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
|
||||
markValues['f73e27498813a922032b18b3f3ab8d10'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
markValues['f3baf98f752fc9638de175985183119a'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Mark
|
||||
mark = Entity(api=api, fields=markValues, bundle_id='b2c4e1c984d7758d7c7ec719110f7125')
|
||||
api.save(mark)
|
||||
# Create Mark
|
||||
mark = Entity(api=api, fields=markValues, bundle_id=bundleId)
|
||||
api.save(mark)
|
||||
|
||||
print(f'Created mark number {index}: {mark.uri} of {len(markTable)}')
|
||||
print(f'Created mark number {index}: {mark.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'markId': markValues['fe577970c02f173170ff3848a36b3b79'][0], 'uuid': markValues['fb40b199b4032e55acc152f994e93b45'][0], 'uri': mark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedMarks.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'markId': markValues['fe577970c02f173170ff3848a36b3b79'][0], 'uuid': markValues['fb40b199b4032e55acc152f994e93b45'][0], 'uri': mark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing marks')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importSourceReferenceAssignment(api, engine):
|
||||
print('Importing source reference assignments...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__8130_que_kurzt_"
|
||||
bundleId = 'b3c4232e84c2f39795bd602f152ed6f0' # Source reference assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "c__8130_que_kurzt_"
|
||||
bundleId = 'b3c4232e84c2f39795bd602f152ed6f0' # Source reference assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['fbe74fcb0ab0ce5a0181467b9b07e12e'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8134_stelle':
|
||||
entityValues['f769795b4fd628d01692dd4516322db4'] = value # Source reference
|
||||
case 'f__8130_que_kurzt_':
|
||||
entityValues['f3e841bf3b4e91716d1ff5b83bf293d9'] = value # Source short title
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['fbe74fcb0ab0ce5a0181467b9b07e12e'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8134_stelle':
|
||||
entityValues['f769795b4fd628d01692dd4516322db4'] = value # Source reference
|
||||
case 'f__8130_que_kurzt_':
|
||||
entityValues['f3e841bf3b4e91716d1ff5b83bf293d9'] = value # Source short title
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created source reference assignment {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
print('finished importing source reference assignments')
|
||||
|
|
|
|||
|
|
@ -5,165 +5,154 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtist(api, engine):
|
||||
print('Importing artists...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = 'c__kue'
|
||||
bundleId = 'bc322be33491dacc600dd43fdee09a5c'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
test = False
|
||||
|
||||
test = True
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtists.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['artistId', 'uuid', 'uri'])
|
||||
# Load sources table
|
||||
artistsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Load sources table
|
||||
artistsTable = pd.read_sql_table('c__kue', con=engine)
|
||||
# Create artists
|
||||
for index, row in artistsTable.iterrows():
|
||||
|
||||
artistValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
|
||||
|
||||
# Create artists
|
||||
for index, row in artistsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistsTable.loc[index, 'f__3000_kue_dok_nr_'] == processedRows.loc[index, 'artistId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artist {artistsTable.loc[index, "f__3000_kue_dok_nr_"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artistValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artist {artistsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
artistValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artistValues['fff2eb2283e4cd8df3783602a1bc96ab'] = value # UUID
|
||||
case 'f__3170_and__taetigkeit':
|
||||
artistValues['f01f51e385e5f206653e029ff5c845c4'] = value # Alternate occupation
|
||||
case 'f__3000_kue_dok_nr_':
|
||||
artistValues['f61deac361ac5e0731edbf214761d15c'] = value # Artist Document Number
|
||||
case 'f__3002_pub_kue_nr_':
|
||||
artistValues['f46b2ec14ce05d2618427c526198d64e'] = value # Artist published number
|
||||
case 'f__9990_kommentar':
|
||||
artistValues['fedc08e4225ac800e5d9f16bf345d181'] = value # Comment
|
||||
case 'f__3360_letzte_erw_':
|
||||
artistValues['f1419788b918f4c4a13393fd09ff37b3'] = value # Last Mentioned
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
artistValues['f3d63eec34c00556cbadf635f78d815a'] = value # Mark Assignment
|
||||
case 'f__33gs_meister_als':
|
||||
artistValues['f30b60be791fb13f919c31510ca4de50'] = value # Master Education
|
||||
case 'f__33mj_meisterjahr':
|
||||
artistValues['fd2d07bb9ea1eadacdf28e41cacb92c1'] = value # Master Year
|
||||
case 'f__3100_name':
|
||||
artistValues['f71c047dad23083850a13d489386bf31'] = value # Name
|
||||
case 'f__3105_abw_schreibw_':
|
||||
artistValues['fbe84024bf9fad8f6a545b3af75d8b1b'] = value # Name Variants
|
||||
case 'f__3166_fakt__taetig_als':
|
||||
artistValues['fb0373e9fd949984cf9c09ec1ea0746c'] = value # Occupation
|
||||
case 'f__336p_1__posth__erw_':
|
||||
artistValues['fe079424bb6196d4a9721f84c43361f8'] = value # Posthumous Mentioned
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Goldschmiede/', 'goldsmiths/')
|
||||
item = item.replace('Goldschmiede\\', 'goldsmiths/')
|
||||
item = item.replace('Epitaphien/', 'epitaphies/')
|
||||
item = item.replace('Epitaphien\\', 'epitaphies/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__6770_rosenb_nr_':
|
||||
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artistValues['fff2eb2283e4cd8df3783602a1bc96ab'] = value # UUID
|
||||
case 'f__3170_and__taetigkeit':
|
||||
artistValues['f01f51e385e5f206653e029ff5c845c4'] = value # Alternate occupation
|
||||
case 'f__3000_kue_dok_nr_':
|
||||
artistValues['f61deac361ac5e0731edbf214761d15c'] = value # Artist Document Number
|
||||
case 'f__3002_pub_kue_nr_':
|
||||
artistValues['f46b2ec14ce05d2618427c526198d64e'] = value # Artist published number
|
||||
case 'f__9990_kommentar':
|
||||
artistValues['fedc08e4225ac800e5d9f16bf345d181'] = value # Comment
|
||||
case 'f__3360_letzte_erw_':
|
||||
artistValues['f1419788b918f4c4a13393fd09ff37b3'] = value # Last Mentioned
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
artistValues['f3d63eec34c00556cbadf635f78d815a'] = value # Mark Assignment
|
||||
case 'f__33gs_meister_als':
|
||||
artistValues['f30b60be791fb13f919c31510ca4de50'] = value # Master Education
|
||||
case 'f__33mj_meisterjahr':
|
||||
artistValues['fd2d07bb9ea1eadacdf28e41cacb92c1'] = value # Master Year
|
||||
case 'f__3100_name':
|
||||
artistValues['f71c047dad23083850a13d489386bf31'] = value # Name
|
||||
case 'f__3105_abw_schreibw_':
|
||||
artistValues['fbe84024bf9fad8f6a545b3af75d8b1b'] = value # Name Variants
|
||||
case 'f__3166_fakt__taetig_als':
|
||||
artistValues['fb0373e9fd949984cf9c09ec1ea0746c'] = value # Occupation
|
||||
case 'f__336p_1__posth__erw_':
|
||||
artistValues['fe079424bb6196d4a9721f84c43361f8'] = value # Posthumous Mentioned
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Goldschmiede/', 'goldsmiths/')
|
||||
item = item.replace('Goldschmiede\\', 'goldsmiths/')
|
||||
item = item.replace('Epitaphien/', 'epitaphies/')
|
||||
item = item.replace('Epitaphien\\', 'epitaphies/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__6770_rosenb_nr_':
|
||||
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0]) # add UUID to list
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artist and Image Assignment over the UUID
|
||||
if imageList:
|
||||
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
|
||||
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
|
||||
api.save(reproNumberAssignment)
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artist and Image Assignment over the UUID
|
||||
if imageList:
|
||||
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
|
||||
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
|
||||
api.save(reproNumberAssignment)
|
||||
else:
|
||||
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = []
|
||||
|
||||
|
||||
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'][0]:
|
||||
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44']:
|
||||
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
|
||||
# Create Material
|
||||
artist = Entity(api=api, fields=artistValues, bundle_id='bc322be33491dacc600dd43fdee09a5c')
|
||||
api.save(artist)
|
||||
# Create Material
|
||||
artist = Entity(api=api, fields=artistValues, bundle_id=bundleId)
|
||||
api.save(artist)
|
||||
|
||||
print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}')
|
||||
print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
|
||||
if test:
|
||||
print('Testing mode activated. Exiting.')
|
||||
exit()
|
||||
if test:
|
||||
print('Testing mode activated. Exiting.')
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artists')
|
||||
|
|
|
|||
|
|
@ -5,122 +5,108 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importLiterature(api, engine):
|
||||
print('Importing literature...')
|
||||
tableName = 'c__lit'
|
||||
bundleId = 'bafe9c3d3b640d4d1a16b104f367ac91'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load sources table
|
||||
literaturesTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedLiteratures.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'literatureId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
literaturesTable = pd.read_sql_table('c__lit', con=engine)
|
||||
|
||||
literatureValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create literatures
|
||||
for index, row in literaturesTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and literaturesTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed literature {literaturesTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
literatureValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create literatures
|
||||
for index, row in literaturesTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and literaturesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed literature {literaturesTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
literatureValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
literatureValues['fd58e0884f7cf63f8436c2789fcd2745'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
literatureValues['f3208633f7767cc9f5e44e768818df20'] = value # Comment
|
||||
case 'f__8270_verfasser':
|
||||
literatureValues['f60a88060c75068b4bf2eefd5221793f'] = value # Creator
|
||||
case 'f__8324_ersch_jahr':
|
||||
literatureValues['fdae7bd743ae58bf623feca3a26bcf6c'] = value # Date
|
||||
case 'f__8280_hrsg':
|
||||
literatureValues['fd0bc706876adee304892f8f9e34567f'] = value # Editor
|
||||
case 'f__8346_signatur':
|
||||
literatureValues['fb434c214be21f7e82a851d6524c2850'] = value # Identifier
|
||||
case 'f__9970_schlagwort':
|
||||
literatureValues['f1a55055944adf5d4e866a1768633a7f'] = value # Keyword
|
||||
case 'f__8200_lit_dok_nr_':
|
||||
literatureValues['f3bdd54b9ea5808a571200e9c60e103e'] = value # Literature Document Identifier
|
||||
case 'f__9971_sw_goldschmied':
|
||||
literatureValues['f21a286fec5d48ea238c10877ee2b0db'] = value # Mentioned Actor
|
||||
case 'f__8308_bibl_zusatz':
|
||||
literatureValues['f1674a743a13a3d74b0c6ebb2cf0043f'] = value # Note
|
||||
case 'f__8319_seitenangabe':
|
||||
literatureValues['f0d1716a40498f52abd4a6522aa5f3ef'] = value # Pages
|
||||
case 'f__8320_ersch_ort':
|
||||
literatureValues['fc3cafc0f542cef2a0e1189873ff58a3'] = value # Publication Place
|
||||
case 'f__8300_serientitel':
|
||||
literatureValues['f660f34eb7091c1b0f4b492e49a0e71b'] = value # Series Title
|
||||
case 'f__8330_lit_kurzt_':
|
||||
literatureValues['f84416d4380cdd30e8b9fcea57f58957'] = value # Shorttitle
|
||||
case 'f__8307_titelzusatz':
|
||||
literatureValues['f8521679ac8f6441ddb086f1c5ed7528'] = value # Subtitle
|
||||
case 'f__8290_titel':
|
||||
literatureValues['fa1ae40cc9940569d5a1e3ea13e33488'] = value # Title
|
||||
case 'f__8260_art':
|
||||
literatureValues['f92c6453d265a952a56252e7d93cedea'] = value # Type
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
literatureValues['fd58e0884f7cf63f8436c2789fcd2745'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
literatureValues['f3208633f7767cc9f5e44e768818df20'] = value # Comment
|
||||
case 'f__8270_verfasser':
|
||||
literatureValues['f60a88060c75068b4bf2eefd5221793f'] = value # Creator
|
||||
case 'f__8324_ersch_jahr':
|
||||
literatureValues['fdae7bd743ae58bf623feca3a26bcf6c'] = value # Date
|
||||
case 'f__8280_hrsg':
|
||||
literatureValues['fd0bc706876adee304892f8f9e34567f'] = value # Editor
|
||||
case 'f__8346_signatur':
|
||||
literatureValues['fb434c214be21f7e82a851d6524c2850'] = value # Identifier
|
||||
case 'f__9970_schlagwort':
|
||||
literatureValues['f1a55055944adf5d4e866a1768633a7f'] = value # Keyword
|
||||
case 'f__8200_lit_dok_nr_':
|
||||
literatureValues['f3bdd54b9ea5808a571200e9c60e103e'] = value # Literature Document Identifier
|
||||
case 'f__9971_sw_goldschmied':
|
||||
literatureValues['f21a286fec5d48ea238c10877ee2b0db'] = value # Mentioned Actor
|
||||
case 'f__8308_bibl_zusatz':
|
||||
literatureValues['f1674a743a13a3d74b0c6ebb2cf0043f'] = value # Note
|
||||
case 'f__8319_seitenangabe':
|
||||
literatureValues['f0d1716a40498f52abd4a6522aa5f3ef'] = value # Pages
|
||||
case 'f__8320_ersch_ort':
|
||||
literatureValues['fc3cafc0f542cef2a0e1189873ff58a3'] = value # Publication Place
|
||||
case 'f__8300_serientitel':
|
||||
literatureValues['f660f34eb7091c1b0f4b492e49a0e71b'] = value # Series Title
|
||||
case 'f__8330_lit_kurzt_':
|
||||
literatureValues['f84416d4380cdd30e8b9fcea57f58957'] = value # Shorttitle
|
||||
case 'f__8307_titelzusatz':
|
||||
literatureValues['f8521679ac8f6441ddb086f1c5ed7528'] = value # Subtitle
|
||||
case 'f__8290_titel':
|
||||
literatureValues['fa1ae40cc9940569d5a1e3ea13e33488'] = value # Title
|
||||
case 'f__8260_art':
|
||||
literatureValues['f92c6453d265a952a56252e7d93cedea'] = value # Type
|
||||
# Digitisation Process
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Set Digitisation Process
|
||||
literatureValues['f59a2ad5cce3e51f172215ea88afac41'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
# Set Digitisation Process
|
||||
literatureValues['f59a2ad5cce3e51f172215ea88afac41'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Material
|
||||
literature = Entity(api=api, fields=literatureValues, bundle_id='bafe9c3d3b640d4d1a16b104f367ac91')
|
||||
api.save(literature)
|
||||
# Create Material
|
||||
literature = Entity(api=api, fields=literatureValues, bundle_id='bafe9c3d3b640d4d1a16b104f367ac91')
|
||||
api.save(literature)
|
||||
|
||||
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
|
||||
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'literatureId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedLiteratures.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,197 +5,182 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMark(api, engine):
|
||||
print('Importing inspection marks...')
|
||||
tableName = 'c__bez'
|
||||
bundleId = 'baad021dfda9b89d5ba407dd0fca0d03'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load inspectionMark table
|
||||
inspectionMarkTable = pd.read_sql_table('c__bez', con=engine)
|
||||
|
||||
# Simple log
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarks.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'inspectionMarkId', 'uuid', 'uri'])
|
||||
|
||||
# Load inspectionMark table
|
||||
inspectionMarkTable = pd.read_sql_table('c__bez', con=engine)
|
||||
|
||||
# Create inspectionMark
|
||||
for index, row in inspectionMarkTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMark {inspectionMarkTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
inspectionMarkValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
featureValues = {}
|
||||
featureDimensionValues = {}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create inspectionMark
|
||||
for index, row in inspectionMarkTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMark {inspectionMarkTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification
|
||||
# for nested semantics, because we need to be efficient.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
inspectionMarkValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
featureValues = {}
|
||||
featureDimensionValues = {}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
inspectionMarkValues['f31eb01562daaeaa27d6c02012fccf02'] = value # Comment
|
||||
case 'f__67bn_bz_kat_nr':
|
||||
inspectionMarkValues['f275b0537ab47b15c24f31ad8a8aa226'] = value # Inspection Mark Cataloque Identifer
|
||||
case 'f__67b0_bz_dok_nr':
|
||||
inspectionMarkValues['f1cfc4053651e47d629bd5fc9fd707c1'] = value # Inspection Mark Identifier
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'] = value # Mark Document Identifier
|
||||
case 'f__8470_aufnahmenr_':
|
||||
inspectionMarkValues['f58febbb759a07a75edf9978771c1013'] = value # Recording Number
|
||||
case 'f__68an_abdruck_nr_':
|
||||
inspectionMarkValues['f7c155684a82af5caa3191f2646b51da'] = value # Reproduction Number
|
||||
case 'f__68nk_besonderheiten':
|
||||
inspectionMarkValues['fd980fca65d9ffd2f95859c4c5b9d284'] = value # Special Feature
|
||||
case 'f__68ne_haste_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nf_n_knick':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ng_ueberg__serifen':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
|
||||
'transition_serif_haste'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nh_dicke_ser__max_':
|
||||
# We map (features) dimensions to Dimension entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
inspectionMarkValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification
|
||||
# for nested semantics, because we need to be efficient.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'] = value # UUID
|
||||
case 'f__9990_kommentar':
|
||||
inspectionMarkValues['f31eb01562daaeaa27d6c02012fccf02'] = value # Comment
|
||||
case 'f__67bn_bz_kat_nr':
|
||||
inspectionMarkValues['f275b0537ab47b15c24f31ad8a8aa226'] = value # Inspection Mark Cataloque Identifer
|
||||
case 'f__67b0_bz_dok_nr':
|
||||
inspectionMarkValues['f1cfc4053651e47d629bd5fc9fd707c1'] = value # Inspection Mark Identifier
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'] = value # Mark Document Identifier
|
||||
case 'f__8470_aufnahmenr_':
|
||||
inspectionMarkValues['f58febbb759a07a75edf9978771c1013'] = value # Recording Number
|
||||
case 'f__68an_abdruck_nr_':
|
||||
inspectionMarkValues['f7c155684a82af5caa3191f2646b51da'] = value # Reproduction Number
|
||||
case 'f__68nk_besonderheiten':
|
||||
inspectionMarkValues['fd980fca65d9ffd2f95859c4c5b9d284'] = value # Special Feature
|
||||
case 'f__68ne_haste_schraegstr_':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nf_n_knick':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['transition_haste_slash_kink'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68ng_ueberg__serifen':
|
||||
# We map features to Feature entity.
|
||||
featureValues.setdefault(key, {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = [
|
||||
'transition_serif_haste'] # Type
|
||||
featureValues[key]['fbccee184fa531d58b3b46eb8ac4626f'] = value # Feature
|
||||
featureValues[key]['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__68nh_dicke_ser__max_':
|
||||
# We map (features) dimensions to Dimension entity.
|
||||
featureDimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['maximum_thickness'] # Type
|
||||
featureDimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
featureDimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('MArken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
inspectionMarkValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create (feature) Dimension entities and add their UUIDs to a list
|
||||
# because we link Feature and its Dimension over the UUID
|
||||
featureDimension = []
|
||||
for key, value in featureDimensionValues.items():
|
||||
if value:
|
||||
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
|
||||
api.save(featureDimensionItem)
|
||||
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
|
||||
# Create (feature) Dimension entities and add their UUIDs to a list
|
||||
# because we link Feature and its Dimension over the UUID
|
||||
featureDimension = []
|
||||
for key, value in featureDimensionValues.items():
|
||||
if value:
|
||||
featureDimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558') # Dimension Bundle
|
||||
api.save(featureDimensionItem)
|
||||
featureDimension.append(value['f802fd7bf45be523a9b188411a591420'][0]) # Dimension UUID
|
||||
|
||||
# Add the serif feature t the feature list
|
||||
if featureDimension:
|
||||
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
|
||||
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
|
||||
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
# Add the serif feature t the feature list
|
||||
if featureDimension:
|
||||
featureValues.setdefault('serif', {})['fdfb3c4f670aa1260924cecd09ca4bbb'] = ['serif'] # Feature Type
|
||||
featureValues['serif']['f0f825f5d3a6f0e2d67eee311b94cd6f'] = featureDimension # Dimension UUIDs
|
||||
featureValues['serif']['f299e2a145b508e376f2bf2e44cbe219'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
feature = []
|
||||
for key, value in featureValues.items():
|
||||
if value:
|
||||
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
|
||||
api.save(featureItem)
|
||||
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Mark and Dimension over the UUID
|
||||
feature = []
|
||||
for key, value in featureValues.items():
|
||||
if value:
|
||||
featureItem = Entity(api=api, fields=value, bundle_id='b393e1c3db202fbb7a8b54e65eb38227') # Feature Bundle
|
||||
api.save(featureItem)
|
||||
feature.append(value['f299e2a145b508e376f2bf2e44cbe219'][0]) # Feature UUID
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
if digitisationProcessValues:
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
# Create Digitisation Process
|
||||
if digitisationProcessValues:
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
if feature:
|
||||
inspectionMarkValues['f7eba97158ff1b9afc5fa0a5823145b4'] = feature # Feature UUID
|
||||
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
|
||||
inspectionMarkValues['fc697a5ad97f3277f20f67e18085b544'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
inspectionMarkValues['f998036ccd7daaf2d9938934c93938f3'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
# Add the field values for reference
|
||||
if feature:
|
||||
inspectionMarkValues['f7eba97158ff1b9afc5fa0a5823145b4'] = feature # Feature UUID
|
||||
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
|
||||
inspectionMarkValues['fc697a5ad97f3277f20f67e18085b544'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
inspectionMarkValues['f998036ccd7daaf2d9938934c93938f3'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Mark
|
||||
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id='baad021dfda9b89d5ba407dd0fca0d03')
|
||||
api.save(inspectionMark)
|
||||
# Create Mark
|
||||
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id=bundleId)
|
||||
api.save(inspectionMark)
|
||||
|
||||
print(f'Created inspectionMark number {index}: {inspectionMark.uri} of {len(inspectionMarkTable)}')
|
||||
print(f'Created inspectionMark number {index}: {inspectionMark.uri} of {len(inspectionMarkTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'inspectionMarkId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarks.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing inspection marks')
|
||||
|
|
|
|||
|
|
@ -5,84 +5,70 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importJournalAssignment(api, engine):
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
test = False
|
||||
tableName = "c__8310_zeitschrift"
|
||||
bundleId = 'b5508ef3bb28f139ebdd9f6d545825c4'
|
||||
|
||||
test = False
|
||||
tableName = "c__8310_zeitschrift"
|
||||
bundleId = 'b5508ef3bb28f139ebdd9f6d545825c4'
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fadaaac928ec555c2574b3a9a4f5543d'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8310_zeitschrift':
|
||||
entityValues['fd8fc741f6d4142637c061900b1cdd01'] = value # Client
|
||||
case 'f__8312_zusatzzschr':
|
||||
entityValues['f51edfb30c99d28bee1cf32b81190254'] = value # Date
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fadaaac928ec555c2574b3a9a4f5543d'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8310_zeitschrift':
|
||||
entityValues['fd8fc741f6d4142637c061900b1cdd01'] = value # Client
|
||||
case 'f__8312_zusatzzschr':
|
||||
entityValues['f51edfb30c99d28bee1cf32b81190254'] = value # Date
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created journal assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,84 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importLiteratureReferenceAssignment(api, engine):
|
||||
print('Importing literature reference assignments...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__8330_lit_kurzt_"
|
||||
bundleId = 'bdda154adecb26deed2d8b67dab8a0db' # Literature Reference Assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "c__8330_lit_kurzt_"
|
||||
bundleId = 'bdda154adecb26deed2d8b67dab8a0db' # Literature Reference Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['facb3fc9d13472b00f59d506acece535'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8334_stelle':
|
||||
entityValues['f099466b679af216600fdbfa722ddcb7'] = value # Literature Reference
|
||||
case 'f__833r_repro_datei':
|
||||
entityValues['fe145f4fec0a71a954bc3c75cf7b370a'] = value # Repro File
|
||||
case 'f__8330_lit_kurzt_':
|
||||
entityValues['ff2d656706c2ff11089f196ccab51843'] = value # Short Title
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['facb3fc9d13472b00f59d506acece535'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8334_stelle':
|
||||
entityValues['f099466b679af216600fdbfa722ddcb7'] = value # Literature Reference
|
||||
case 'f__833r_repro_datei':
|
||||
entityValues['fe145f4fec0a71a954bc3c75cf7b370a'] = value # Repro File
|
||||
case 'f__8330_lit_kurzt_':
|
||||
entityValues['ff2d656706c2ff11089f196ccab51843'] = value # Short Title
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created literature reference assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,84 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importParentLiteratureAssignment(api, engine):
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
tableName = "c__8292_uebergeordn_publ_"
|
||||
bundleId = 'bf55dda81ca0ddb4237a0d3ea495579b' # Parent literature assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
tableName = "c__8292_uebergeordn_publ_"
|
||||
bundleId = 'bf55dda81ca0ddb4237a0d3ea495579b' # Parent literature assignment
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f8cced7d1c2f8d0d3fa9aa36b7e123bd'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8292_uebergeordn_publ_':
|
||||
entityValues['f97ea22d9dd853c8f1cced6bc85c59b2'] = value # Parent literature
|
||||
case 'f__8294_zusatzsatit':
|
||||
entityValues['faf62c71a8e5844241899c0aa7801a9c'] = value # Subtitle
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f8cced7d1c2f8d0d3fa9aa36b7e123bd'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8292_uebergeordn_publ_':
|
||||
entityValues['f97ea22d9dd853c8f1cced6bc85c59b2'] = value # Parent literature
|
||||
case 'f__8294_zusatzsatit':
|
||||
entityValues['faf62c71a8e5844241899c0aa7801a9c'] = value # Subtitle
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created parent literature assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,81 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkLocation(api, engine):
|
||||
print('Importing inspection mark locations...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = 'c__67b0_bz_dok_nr'
|
||||
bundleId = 'b4158ec3a326d8ab504062296a82f13a'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarkLocation.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
# Load sources table
|
||||
inspectionMarkLocationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Load sources table
|
||||
inspectionMarkLocationsTable = pd.read_sql_table('c__67b0_bz_dok_nr', con=engine)
|
||||
|
||||
inspectionMarkLocationValues = {}
|
||||
|
||||
# Create inspectionMarkLocations
|
||||
for index, row in inspectionMarkLocationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkLocationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
inspectionMarkLocationValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create inspectionMarkLocations
|
||||
for index, row in inspectionMarkLocationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkLocationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
inspectionMarkLocationValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'] = value # UUID
|
||||
case 'f__67b0_bz_dok_nr':
|
||||
inspectionMarkLocationValues['f2d0b120ed40e17a5ad3f31d594d9b1c'] = value # Inspection Mark Identifier
|
||||
case 'f__67b4_anbr_ort':
|
||||
inspectionMarkLocationValues['f8a6343c2a8a5523eb2f0602f2baae04'] = value # Location
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'] = value # UUID
|
||||
case 'f__67b0_bz_dok_nr':
|
||||
inspectionMarkLocationValues['f2d0b120ed40e17a5ad3f31d594d9b1c'] = value # Inspection Mark Identifier
|
||||
case 'f__67b4_anbr_ort':
|
||||
inspectionMarkLocationValues['f8a6343c2a8a5523eb2f0602f2baae04'] = value # Location
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id='b4158ec3a326d8ab504062296a82f13a')
|
||||
api.save(inspectionMarkLocation)
|
||||
# Create Material
|
||||
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id=bundleId)
|
||||
api.save(inspectionMarkLocation)
|
||||
|
||||
print(f'Created inspectionMarkLocation {index}: {inspectionMarkLocation.uri}')
|
||||
print(f'Created inspectionMarkLocation {index}: {inspectionMarkLocation.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'][0], 'uri': inspectionMarkLocation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarkLocation.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'][0], 'uri': inspectionMarkLocation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing inspection mark locations')
|
||||
|
|
|
|||
|
|
@ -5,80 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkRelation(api, engine):
|
||||
print('Importing inspection mark relations...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = 'c__67b7_beziehung'
|
||||
bundleId = 'bd9b0ff8dc3a6d9284e1798531389bf1'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarkRelation.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
# Load sources table
|
||||
inspectionMarkRelationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Load sources table
|
||||
inspectionMarkRelationsTable = pd.read_sql_table('c__67b7_beziehung', con=engine)
|
||||
|
||||
inspectionMarkRelationValues = {}
|
||||
|
||||
# Create inspectionMarkRelations
|
||||
for index, row in inspectionMarkRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkRelationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
inspectionMarkRelationValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create inspectionMarkRelations
|
||||
for index, row in inspectionMarkRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
inspectionMarkRelationValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'] = value # UUID
|
||||
case 'f__67b8_bez_bz_nr':
|
||||
inspectionMarkRelationValues['ff3f6dd331ed27515f6721ac8312706c'] = value # Inspection Mark Identifier
|
||||
case 'f__67b7_beziehung':
|
||||
inspectionMarkRelationValues['f1cb8db7e1c26a5b5fe0c9d8fca60de2'] = value # Relation
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'] = value # UUID
|
||||
case 'f__67b8_bez_bz_nr':
|
||||
inspectionMarkRelationValues['ff3f6dd331ed27515f6721ac8312706c'] = value # Inspection Mark Identifier
|
||||
case 'f__67b7_beziehung':
|
||||
inspectionMarkRelationValues['f1cb8db7e1c26a5b5fe0c9d8fca60de2'] = value # Relation
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
inspectionMarkRelation = Entity(api=api, fields=inspectionMarkRelationValues, bundle_id='bd9b0ff8dc3a6d9284e1798531389bf1')
|
||||
api.save(inspectionMarkRelation)
|
||||
# Create Material
|
||||
inspectionMarkRelation = Entity(api=api, fields=inspectionMarkRelationValues, bundle_id='bd9b0ff8dc3a6d9284e1798531389bf1')
|
||||
api.save(inspectionMarkRelation)
|
||||
|
||||
print(f'Created inspectionMarkRelation {index}: {inspectionMarkRelation.uri}')
|
||||
print(f'Created inspection mark relation {index}: {inspectionMarkRelation.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarkRelation.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,80 +5,65 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarkDatingInfo(api, engine):
|
||||
print('Importing mark dating info...')
|
||||
tableName = 'c__68dm_datierung_marke'
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load sources table
|
||||
datingInfosTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedDatingInfo.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
datingInfosTable = pd.read_sql_table('c__68dm_datierung_marke', con=engine)
|
||||
|
||||
datingInfoValues = {}
|
||||
|
||||
# Create datingInfos
|
||||
for index, row in datingInfosTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and datingInfosTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed datingInfo {datingInfosTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
datingInfoValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create datingInfos
|
||||
for index, row in datingInfosTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and datingInfosTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed datingInfo {datingInfosTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
# Create Entity property dicts
|
||||
datingInfoValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
case 'f__uuid':
|
||||
datingInfoValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
case 'f__68dm_datierung_marke':
|
||||
datingInfoValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__68bm_bem_dat_marke':
|
||||
datingInfoValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
datingInfoValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
case 'f__68dm_datierung_marke':
|
||||
datingInfoValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__68bm_bem_dat_marke':
|
||||
datingInfoValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
datingInfo = Entity(api=api, fields=datingInfoValues, bundle_id='b9cfb95e627e1710cf8d736d4ca5db64') #Dating Information Assignment
|
||||
api.save(datingInfo)
|
||||
# Create Material
|
||||
datingInfo = Entity(api=api, fields=datingInfoValues, bundle_id='b9cfb95e627e1710cf8d736d4ca5db64') #Dating Information Assignment
|
||||
api.save(datingInfo)
|
||||
|
||||
print(f'Created datingInfo {index}: {datingInfo.uri} of {len(datingInfosTable)}')
|
||||
print(f'Created mark dating info {index}: {datingInfo.uri} of {len(datingInfosTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedDatingInfo.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing mark dating info')
|
||||
|
|
|
|||
|
|
@ -1,97 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
|
||||
tableName = "c__6760_markenart"
|
||||
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
|
||||
case 'f__6894_anbr_ort':
|
||||
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
|
||||
case 'f__6760_markenart':
|
||||
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
|
||||
case 'f__684c_bedeutung_bz':
|
||||
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
|
||||
case 'f__684a_bedeutung_mz':
|
||||
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
|
||||
case 'f__6770_rosenb_nr_':
|
||||
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,94 +5,80 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importBirth(api, engine):
|
||||
print('Importing birth...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
tableName = "c__3270_geb_datum"
|
||||
bundleId = 'b54049ec931bffb62359b4bdb11435fc'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
tableName = "c__3270_geb_datum"
|
||||
bundleId = 'b54049ec931bffb62359b4bdb11435fc'
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['ff2a4da76944f5aba7d625c169d9ff66'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3290_geb_ort':
|
||||
entityValues['fe71d86a78289c0b54242f5a3b67f81f'] = value # Birth place
|
||||
case 'f__3270_geb_datum':
|
||||
entityValues['ff3a9f042976963ac356db02d764b002'] = value # Date
|
||||
case 'f__32ls_lit__stelle':
|
||||
entityValues['fa03638df8a53e9aae38471fe10f409a'] = value # Literature Reference
|
||||
case 'f__32lt_lit__kurztitel':
|
||||
entityValues['f1af25f1770bd0db1982780697600cf4'] = value # Literature short title
|
||||
case 'f__32bm_bem_geburt':
|
||||
entityValues['f572f5e0f02f1c9b7c3ece5ffcf86c43'] = value # Note
|
||||
case 'f__32qs_quelle_stelle':
|
||||
entityValues['f1ebceaa76bac9ebf266733f64caa37c'] = value # Source reference
|
||||
case 'f__32qt_quelle_kurztitel':
|
||||
entityValues['f1a3597a874b3df9c1d87c5a32b487b0'] = value # Source short title
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['ff2a4da76944f5aba7d625c169d9ff66'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3290_geb_ort':
|
||||
entityValues['fe71d86a78289c0b54242f5a3b67f81f'] = value # Birth place
|
||||
case 'f__3270_geb_datum':
|
||||
entityValues['ff3a9f042976963ac356db02d764b002'] = value # Date
|
||||
case 'f__32ls_lit__stelle':
|
||||
entityValues['fa03638df8a53e9aae38471fe10f409a'] = value # Literature Reference
|
||||
case 'f__32lt_lit__kurztitel':
|
||||
entityValues['f1af25f1770bd0db1982780697600cf4'] = value # Literature short title
|
||||
case 'f__32bm_bem_geburt':
|
||||
entityValues['f572f5e0f02f1c9b7c3ece5ffcf86c43'] = value # Note
|
||||
case 'f__32qs_quelle_stelle':
|
||||
entityValues['f1ebceaa76bac9ebf266733f64caa37c'] = value # Source reference
|
||||
case 'f__32qt_quelle_kurztitel':
|
||||
entityValues['f1a3597a874b3df9c1d87c5a32b487b0'] = value # Source short title
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created birth {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing birth')
|
||||
|
|
|
|||
|
|
@ -5,98 +5,84 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importDeath(api, engine):
|
||||
print('Importing death...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = "c__3330_todes_dat_"
|
||||
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
|
||||
|
||||
test = True
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
tableName = "c__3330_todes_dat_"
|
||||
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3330_todes_dat_':
|
||||
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
|
||||
case 'f__33ls_lit__stelle':
|
||||
entityValues['fb4f168aa6a73169ef0350408a6260cc'] = value # Literature Reference
|
||||
case 'f__33lt_lit__kurztitel':
|
||||
entityValues['fd4ed8828d72a575f8609ba2c442b4b2'] = value # Literature short title
|
||||
case 'f__33bm_bem_tod':
|
||||
entityValues['f3028661430081ae44aa950abe0afbac'] = value # Note
|
||||
case 'f__3350_tod_ort':
|
||||
entityValues['fd80c2c8ba4c64c01e9c46ac7ae00d93'] = value # Place
|
||||
case 'f__33qs_quelle_stelle':
|
||||
entityValues['fd98cf7fbc0de4529e2a2d5e0b0c28bf'] = value # Source reference
|
||||
case 'f__33qt_quelle_kurztitel':
|
||||
entityValues['f973818e6c3d36ddd44ba3a713e308e6'] = value # Source short title
|
||||
case 'f__710t_art_ereignis':
|
||||
entityValues['fc039c43502b3525a92a8330d91f7944'] = value # Event type
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3330_todes_dat_':
|
||||
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
|
||||
case 'f__33ls_lit__stelle':
|
||||
entityValues['fb4f168aa6a73169ef0350408a6260cc'] = value # Literature Reference
|
||||
case 'f__33lt_lit__kurztitel':
|
||||
entityValues['fd4ed8828d72a575f8609ba2c442b4b2'] = value # Literature short title
|
||||
case 'f__33bm_bem_tod':
|
||||
entityValues['f3028661430081ae44aa950abe0afbac'] = value # Note
|
||||
case 'f__3350_tod_ort':
|
||||
entityValues['fd80c2c8ba4c64c01e9c46ac7ae00d93'] = value # Place
|
||||
case 'f__33qs_quelle_stelle':
|
||||
entityValues['fd98cf7fbc0de4529e2a2d5e0b0c28bf'] = value # Source reference
|
||||
case 'f__33qt_quelle_kurztitel':
|
||||
entityValues['f973818e6c3d36ddd44ba3a713e308e6'] = value # Source short title
|
||||
case 'f__710t_art_ereignis':
|
||||
entityValues['fc039c43502b3525a92a8330d91f7944'] = value # Event type
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created death {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
break
|
||||
if test:
|
||||
break
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importDating(api, engine):
|
||||
print('Importing dating...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__8100_datum"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "c__8100_datum"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
uuid = value[0]
|
||||
case 'f__8100_datum':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__81bm_bem__datierung':
|
||||
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8100_datum':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__81bm_bem__datierung':
|
||||
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created dating {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,89 +5,75 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importGoldsmithRelation(api, engine):
|
||||
print('Importing goldsmith relation...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = "c__3007_bezieh__zu_gs"
|
||||
bundleId = 'bef43e8a958e6a9bee04534b3841f6a0'
|
||||
|
||||
test = False
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
tableName = "c__3007_bezieh__zu_gs"
|
||||
bundleId = 'bef43e8a958e6a9bee04534b3841f6a0'
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f588ff2629e3758ae18ec28c02270d27'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3011_verw__art':
|
||||
entityValues['f2de276528d6b020306b8c7784008e5c'] = value # Actor relation type
|
||||
case 'f__3010_name_gs':
|
||||
entityValues['fc16719402aff4a1afec3387bf2bbc34'] = value # Goldsmith
|
||||
case 'f__30bm_bem_beziehung':
|
||||
entityValues['f7de6b267146070fa38ea5dc45150fa4'] = value # Note
|
||||
case 'f__3007_bezieh__zu_gs':
|
||||
entityValues['f8a46491ebad0ba670384a049402d697'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f588ff2629e3758ae18ec28c02270d27'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3011_verw__art':
|
||||
entityValues['f2de276528d6b020306b8c7784008e5c'] = value # Actor relation type
|
||||
case 'f__3010_name_gs':
|
||||
entityValues['fc16719402aff4a1afec3387bf2bbc34'] = value # Goldsmith
|
||||
case 'f__30bm_bem_beziehung':
|
||||
entityValues['f7de6b267146070fa38ea5dc45150fa4'] = value # Note
|
||||
case 'f__3007_bezieh__zu_gs':
|
||||
entityValues['f8a46491ebad0ba670384a049402d697'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created goldsmith relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing goldsmith relation')
|
||||
|
|
|
|||
|
|
@ -5,86 +5,72 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importClient(api, engine):
|
||||
print('Importing client...')
|
||||
|
||||
|
||||
tableName = "c__410a_auftraggeber"
|
||||
bundleId = 'b85d9987d762fb4e8ce89a69b0b8de31'
|
||||
tableName = "c__410a_auftraggeber"
|
||||
bundleId = 'b85d9987d762fb4e8ce89a69b0b8de31'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fe0c458dfe9c0657fd02f312c2154d62'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber':
|
||||
entityValues['f5ab8fb89d793bd5d27740c2b26bf672'] = value # Client
|
||||
case 'f__41bm_bem__auftragg_':
|
||||
entityValues['f0f33e0d5b40933d83260da3876a6cd3'] = value # Note
|
||||
case 'f__41aa_anlass_auftrag':
|
||||
entityValues['f88f0dbbcaff35acc80f1e6be571bd9e'] = value # Reason
|
||||
case 'f__41as_stand_auftragg_':
|
||||
entityValues['f9d4601e72d705c12fd7f09560e90d37'] = value # Status
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fe0c458dfe9c0657fd02f312c2154d62'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber':
|
||||
entityValues['f5ab8fb89d793bd5d27740c2b26bf672'] = value # Client
|
||||
case 'f__41bm_bem__auftragg_':
|
||||
entityValues['f0f33e0d5b40933d83260da3876a6cd3'] = value # Note
|
||||
case 'f__41aa_anlass_auftrag':
|
||||
entityValues['f88f0dbbcaff35acc80f1e6be571bd9e'] = value # Reason
|
||||
case 'f__41as_stand_auftragg_':
|
||||
entityValues['f9d4601e72d705c12fd7f09560e90d37'] = value # Status
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created client {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,100 +5,86 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMentioned(api, engine):
|
||||
print('Importing mentioned...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
tableName = "c__7060_erwaehnt__datum_"
|
||||
bundleId = 'b04b1756b09ba3260de278824332ad6c'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
tableName = "c__7060_erwaehnt__datum_"
|
||||
bundleId = 'b04b1756b09ba3260de278824332ad6c'
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fac8bbc9701f5da711a6a49beca1b3e4'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber':
|
||||
entityValues['f6b456466f45f72952a953bf169a47cc'] = value # Client
|
||||
case 'f__7060_erwaehnt__datum_':
|
||||
entityValues['ffdae7d7aeb84467faebf5468fb8b94f'] = value # Date
|
||||
case 'f__7100_art_ereignis':
|
||||
entityValues['fb462fbc544045fc244da8d490ed1cfc'] = value # Event type
|
||||
case 'f__70ls_lit__stelle':
|
||||
entityValues['f11f8bc3fdbedc686430ef57edfcf620'] = value # Literature Reference
|
||||
case 'f__70lt_lit__kurztitel':
|
||||
entityValues['f4ed2a340720f643bcc49ac9581b1181'] = value # Literature short title
|
||||
case 'f__34ms_bei_meister_':
|
||||
entityValues['f9d8ac79df3eb667db8fb8b23e52a816'] = value # Master
|
||||
case 'f__70bm_bem_ereignis':
|
||||
entityValues['f37dbed94d03576c91fff9c3c9026da5'] = value # Note
|
||||
case 'f__70qs_quelle_stelle':
|
||||
entityValues['ffc72e8058fd9efd4bb92270520942bd'] = value # Source reference
|
||||
case 'f__70qt_quelle_kurztitel':
|
||||
entityValues['f433afdf58621b6962dea8821cf21bb9'] = value # Source short title
|
||||
case 'f__3420_taet_ort':
|
||||
entityValues['f53e436b293c82f07fb17dd40c01f868'] = value # Workplace
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['fac8bbc9701f5da711a6a49beca1b3e4'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber':
|
||||
entityValues['f6b456466f45f72952a953bf169a47cc'] = value # Client
|
||||
case 'f__7060_erwaehnt__datum_':
|
||||
entityValues['ffdae7d7aeb84467faebf5468fb8b94f'] = value # Date
|
||||
case 'f__7100_art_ereignis':
|
||||
entityValues['fb462fbc544045fc244da8d490ed1cfc'] = value # Event type
|
||||
case 'f__70ls_lit__stelle':
|
||||
entityValues['f11f8bc3fdbedc686430ef57edfcf620'] = value # Literature Reference
|
||||
case 'f__70lt_lit__kurztitel':
|
||||
entityValues['f4ed2a340720f643bcc49ac9581b1181'] = value # Literature short title
|
||||
case 'f__34ms_bei_meister_':
|
||||
entityValues['f9d8ac79df3eb667db8fb8b23e52a816'] = value # Master
|
||||
case 'f__70bm_bem_ereignis':
|
||||
entityValues['f37dbed94d03576c91fff9c3c9026da5'] = value # Note
|
||||
case 'f__70qs_quelle_stelle':
|
||||
entityValues['ffc72e8058fd9efd4bb92270520942bd'] = value # Source reference
|
||||
case 'f__70qt_quelle_kurztitel':
|
||||
entityValues['f433afdf58621b6962dea8821cf21bb9'] = value # Source short title
|
||||
case 'f__3420_taet_ort':
|
||||
entityValues['f53e436b293c82f07fb17dd40c01f868'] = value # Workplace
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created mentioned {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importNumDating(api, engine):
|
||||
print('Importing num dating...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__5064_num__dat_"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64' # Dating Information Assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "c__5064_num__dat_"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64' # Dating Information Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.iloc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
uuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__50bm_bem__datierung':
|
||||
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__50bm_bem__datierung':
|
||||
entityValues['fe7870b5a86040d81140bccb01697765'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created num dating {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,85 +5,70 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importOriginAssignment(api, engine):
|
||||
print('Importing origin assignment...')
|
||||
test = False
|
||||
tableName = "c__3204_herkunft"
|
||||
bundleId = 'b1d5be81f8b3dfbf9d6d90379cc0a14f'
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
tableName = "c__3204_herkunft"
|
||||
bundleId = 'b1d5be81f8b3dfbf9d6d90379cc0a14f'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f0d656adf9a5a9501e2f837af2e71dd6'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3hbm_bem_herkunft':
|
||||
entityValues['f3755949b812523c5d2005ea831c122f'] = value # Note
|
||||
case 'f__3204_herkunft':
|
||||
entityValues['fecbc849373f6a48c23be62619da3b09'] = value # Place
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f0d656adf9a5a9501e2f837af2e71dd6'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3hbm_bem_herkunft':
|
||||
entityValues['f3755949b812523c5d2005ea831c122f'] = value # Note
|
||||
case 'f__3204_herkunft':
|
||||
entityValues['fecbc849373f6a48c23be62619da3b09'] = value # Place
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created origin assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing origin assignments')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,71 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importWorkshops(api, engine):
|
||||
print('Importing workshops...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__nfws_forts_werkst_"
|
||||
bundleId = 'beb03bccbdffdd31567df370303c1e2d'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedWorkshops.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
test = False
|
||||
# Load sources table
|
||||
workshopsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
workshopsTable = pd.read_sql_table('c__nfws_forts_werkst_', con=engine)
|
||||
|
||||
workshopValues = {}
|
||||
|
||||
# Create workshops
|
||||
for index, row in workshopsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and workshopsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed workshop {workshopsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
workshopValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create workshops
|
||||
for index, row in workshopsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and workshopsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {workshopsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
workshopValues['fa7c19f4d03d7d15acf588460654bbf2'] = value # UUID
|
||||
case 'f__nfws_forts_werkst_':
|
||||
workshopValues['ff1aaeb118005d8506af6f56f7e424a4'] = value # Continued by
|
||||
case 'f__nfbm_bem_forts_':
|
||||
workshopValues['f71d24e2922d3151603ce144c0972f40'] = value # Note
|
||||
case 'f__nfzr_zeitraumforts_':
|
||||
workshopValues['f865ade60ba332a0a3ab4b77c39af7f4'] = value # Time-Span
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
workshopValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
workshopValues['fa7c19f4d03d7d15acf588460654bbf2'] = value # UUID
|
||||
case 'f__nfws_forts_werkst_':
|
||||
workshopValues['ff1aaeb118005d8506af6f56f7e424a4'] = value # Continued by
|
||||
case 'f__nfbm_bem_forts_':
|
||||
workshopValues['f71d24e2922d3151603ce144c0972f40'] = value # Note
|
||||
case 'f__nfzr_zeitraumforts_':
|
||||
workshopValues['f865ade60ba332a0a3ab4b77c39af7f4'] = value # Time-Span
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
workshop = Entity(api=api, fields=workshopValues, bundle_id='beb03bccbdffdd31567df370303c1e2d')
|
||||
api.save(workshop)
|
||||
# Create Material
|
||||
workshop = Entity(api=api, fields=workshopValues, bundle_id=bundleId)
|
||||
api.save(workshop)
|
||||
|
||||
print(f'Created workshop {index}: {workshop.uri} of {len(workshopsTable)}')
|
||||
print(f'Created workshop {index}: {workshop.uri} of {len(workshopsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedWorkshops.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing workshops')
|
||||
|
|
|
|||
207
21_importArtifacts.py
Normal file
207
21_importArtifacts.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importArtifacts(api, engine):
|
||||
print('Importing artifacts...')
|
||||
|
||||
tableName = "c__obj"
|
||||
bundleId = 'bd30c2c64a3caa8bb1628c780c3f24bb'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load artifacts table
|
||||
artifactsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create artifacts
|
||||
for index, row in artifactsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artifactsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact {artifactsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artifactValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
|
||||
case 'f__5000_obj_dok_nr_':
|
||||
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
|
||||
docId = value[0]
|
||||
case 'f__500n_ngk_nr_':
|
||||
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
|
||||
case 'f__5130_entst_ort':
|
||||
# We map productions place to Production Place Assignment entity.
|
||||
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
|
||||
case 'f__5200_obj_titel':
|
||||
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
|
||||
case 'f__5210_status':
|
||||
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
|
||||
case 'f__5220_gattung':
|
||||
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
|
||||
case 'f__5223_form__attribut':
|
||||
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
|
||||
case 'f__5226_art':
|
||||
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
|
||||
case 'f__523f_funktion':
|
||||
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
|
||||
case 'f__5240_formtyp':
|
||||
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
|
||||
case 'f__524g_gestalt':
|
||||
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
|
||||
case 'f__5362_hoehe':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5364_breite':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5366_tiefe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5368_laenge':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5370_durchmesser':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5380_gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__538h_hist__gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__55ng_darst__schlagw_':
|
||||
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
|
||||
case 'f__5bes_beschreibung':
|
||||
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
|
||||
case 'f__5ges_geschichte':
|
||||
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
|
||||
case 'f__68an_abdruck_nr_':
|
||||
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__stwv_statwerkverz':
|
||||
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
|
||||
case 'f__9990_kommentar':
|
||||
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Production Place Assignment
|
||||
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
|
||||
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
|
||||
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Artifact and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
|
||||
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
|
||||
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
|
||||
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Artifact
|
||||
artifact = Entity(api=api, fields=artifactValues, bundle_id=bundleId)
|
||||
api.save(artifact)
|
||||
|
||||
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finished importing artifacts')
|
||||
|
|
@ -1,213 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtifacts.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['artifactId', 'uuid', 'uri'])
|
||||
|
||||
# Load artifacts table
|
||||
artifactsTable = pd.read_sql_table('c__obj', con=engine)
|
||||
|
||||
# Create artifacts
|
||||
for index, row in artifactsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artifactsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact {artifactsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artifactValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
|
||||
case 'f__5000_obj_dok_nr_':
|
||||
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
|
||||
docId = value[0]
|
||||
case 'f__500n_ngk_nr_':
|
||||
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
|
||||
case 'f__5130_entst_ort':
|
||||
# We map productions place to Production Place Assignment entity.
|
||||
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
|
||||
case 'f__5200_obj_titel':
|
||||
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
|
||||
case 'f__5210_status':
|
||||
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
|
||||
case 'f__5220_gattung':
|
||||
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
|
||||
case 'f__5223_form__attribut':
|
||||
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
|
||||
case 'f__5226_art':
|
||||
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
|
||||
case 'f__523f_funktion':
|
||||
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
|
||||
case 'f__5240_formtyp':
|
||||
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
|
||||
case 'f__524g_gestalt':
|
||||
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
|
||||
case 'f__5362_hoehe':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5364_breite':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5366_tiefe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5368_laenge':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5370_durchmesser':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5380_gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__538h_hist__gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__55ng_darst__schlagw_':
|
||||
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
|
||||
case 'f__5bes_beschreibung':
|
||||
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
|
||||
case 'f__5ges_geschichte':
|
||||
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
|
||||
case 'f__68an_abdruck_nr_':
|
||||
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__stwv_statwerkverz':
|
||||
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
|
||||
case 'f__9990_kommentar':
|
||||
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Production Place Assignment
|
||||
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
|
||||
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
|
||||
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Artifact and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
|
||||
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
|
||||
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
|
||||
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Artifact
|
||||
artifact = Entity(api=api, fields=artifactValues, bundle_id='bd30c2c64a3caa8bb1628c780c3f24bb')
|
||||
api.save(artifact)
|
||||
|
||||
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'artifactId': artifactValues['f7e2a8a273ab3d577bf5854902550c09'][0], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtifacts.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,84 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactRelation(api, engine):
|
||||
print('Importing artifact relation...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__5007_beziehung"
|
||||
bundleId = 'bf4a13ee46de57819f88834caaddc301' # Artifact relation assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "c__5007_beziehung"
|
||||
bundleId = 'bf4a13ee46de57819f88834caaddc301' # Artifact relation assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.ioc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact relation {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['ff7ebd530eb53efc489e80d9bbef293e'] = value # UUID
|
||||
uuid = value[0]
|
||||
case 'f__5008_bez_obj_nr_':
|
||||
entityValues['f39d0e5207a375070d84b958017a62e8'] = value # Artifact Document Identifier
|
||||
case 'f__bebm_bem_beziehung':
|
||||
entityValues['f9cc743b648716684ccc3a7b9710d0ed'] = value # Note
|
||||
case 'f__5007_beziehung':
|
||||
entityValues['f4d3047b3b54285aa5a86183aedb1680'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['ff7ebd530eb53efc489e80d9bbef293e'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5008_bez_obj_nr_':
|
||||
entityValues['f39d0e5207a375070d84b958017a62e8'] = value # Artifact Document Identifier
|
||||
case 'f__bebm_bem_beziehung':
|
||||
entityValues['f9cc743b648716684ccc3a7b9710d0ed'] = value # Note
|
||||
case 'f__5007_beziehung':
|
||||
entityValues['f4d3047b3b54285aa5a86183aedb1680'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created artifact relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistAssignment(api, engine):
|
||||
print('Importing artist assignment...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "c__ob30_bez_kuenstler"
|
||||
bundleId = 'bc8826cc7d9c9373ce71cfc0251c2a4f'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtistAssignment.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
# Load sources table
|
||||
artistRelationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Load sources table
|
||||
artistRelationsTable = pd.read_sql_table('c__ob30_bez_kuenstler', con=engine)
|
||||
|
||||
artistRelationValues = {}
|
||||
|
||||
# Create artistRelations
|
||||
for index, row in artistRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artistRelation {artistRelationsTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
print('value: ', value)
|
||||
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create artistRelations
|
||||
for index, row in artistRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artistAssignment {artistRelationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'] = value # UUID
|
||||
case 'f__3100_name':
|
||||
artistRelationValues['ff5bf58133f9351d03e2ee92b6f8bb7e'] = value # Artist Name
|
||||
case 'f__3475_ber__funkt_':
|
||||
artistRelationValues['fc0c7d8c6b736489210bc42ef0f1406a'] = value # Occupation
|
||||
case 'f__ob30_bez_kuenstler':
|
||||
artistRelationValues['f575d4f2c8ea5d37618cea708c2a7c5e'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
artistRelationValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'] = value # UUID
|
||||
case 'f__3100_name':
|
||||
artistRelationValues['ff5bf58133f9351d03e2ee92b6f8bb7e'] = value # Artist Name
|
||||
case 'f__3475_ber__funkt_':
|
||||
artistRelationValues['fc0c7d8c6b736489210bc42ef0f1406a'] = value # Occupation
|
||||
case 'f__ob30_bez_kuenstler':
|
||||
artistRelationValues['f575d4f2c8ea5d37618cea708c2a7c5e'] = value # Relation
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
|
||||
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id='bc8826cc7d9c9373ce71cfc0251c2a4f')
|
||||
api.save(artistRelation)
|
||||
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id=bundleId)
|
||||
api.save(artistRelation)
|
||||
|
||||
print(f'Created artistRelation {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
|
||||
print(f'Created artist assignment {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtistAssignment.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artist assignment')
|
||||
|
|
|
|||
82
25_importMarkInformation.py
Normal file
82
25_importMarkInformation.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importMarkInformation(api, engine):
|
||||
print('Importing mark information...')
|
||||
|
||||
tableName = "c__6760_markenart"
|
||||
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
|
||||
case 'f__6894_anbr_ort':
|
||||
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
|
||||
case 'f__6760_markenart':
|
||||
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
|
||||
case 'f__684c_bedeutung_bz':
|
||||
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
|
||||
case 'f__684a_bedeutung_mz':
|
||||
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
|
||||
case 'f__6770_rosenb_nr_':
|
||||
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created mark information {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
|
||||
tableName = "c__8490_fotograf"
|
||||
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
|
||||
docUuid = value[0]
|
||||
case 'f__8490_fotograf':
|
||||
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
|
||||
case 'f__8494_aufn_datum':
|
||||
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
|
||||
case 'f__8470_aufnahmenr_':
|
||||
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
|
||||
case 'f__849r_repro_datei': # Image Assignment
|
||||
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
75
26_importPhotographer.py
Normal file
75
26_importPhotographer.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importPhotographer(api, engine):
|
||||
print('Importing photographer...')
|
||||
|
||||
tableName = "c__8490_fotograf"
|
||||
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
|
||||
docUuid = value[0]
|
||||
case 'f__8490_fotograf':
|
||||
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
|
||||
case 'f__8494_aufn_datum':
|
||||
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
|
||||
case 'f__8470_aufnahmenr_':
|
||||
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
|
||||
case 'f__849r_repro_datei': # Image Assignment
|
||||
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created Photographer {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finished importing photographer')
|
||||
|
|
@ -5,78 +5,64 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToArtistRelationRelation(api, engine):
|
||||
print('importing artifact to artist relation relation')
|
||||
tableName = "r__obj__ob30_bez_kuenstler"
|
||||
bundleId = 'b8b4e3b3fb7e3b83cec037aea51814bf' # Artifact to artist relation relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
entityValues = {}
|
||||
|
||||
|
||||
tableName = "r__obj__ob30_bez_kuenstler"
|
||||
bundleId = 'b8b4e3b3fb7e3b83cec037aea51814bf' # Artifact to artist relation relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f92631e8a40aae0aa8adbe84ab5dc97f'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__ob30_bez_kuenstler__uuid':
|
||||
entityValues['f07e9587430d70bc46926488129ba4a8'] = value # Artist Relation UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f92631e8a40aae0aa8adbe84ab5dc97f'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__ob30_bez_kuenstler__uuid':
|
||||
entityValues['f07e9587430d70bc46926488129ba4a8'] = value # Artist Relation UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created artifact to artist relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to artist relation relation')
|
||||
|
|
|
|||
|
|
@ -5,80 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToClientAssignmentRelation(api, engine):
|
||||
print('importing artifact to client assignment relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__410a_auftraggeber"
|
||||
bundleId = 'b20d53dcc2bad79457251a581611b43f' # Artifact to client assignment relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__obj__410a_auftraggeber"
|
||||
bundleId = 'b20d53dcc2bad79457251a581611b43f' # Artifact to client assignment relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc369de9f2f7ac73585f7c967f415703'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber__uuid':
|
||||
entityValues['fe65c6437d49877bad3de9ce31e19772'] = value # Client UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc369de9f2f7ac73585f7c967f415703'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__410a_auftraggeber__uuid':
|
||||
entityValues['fe65c6437d49877bad3de9ce31e19772'] = value # Client UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Client Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to client assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,65 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToInspectionMarkLocationRelation(api, engine):
|
||||
print('importing artifact to inspection mark location relation')
|
||||
tableName = "r__obj__67b0_bz_dok_nr"
|
||||
bundleId = 'b7fe64e0326c107a1a4a705be08392fa' # Artifact to inspection mark location relation
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
tableName = "r__obj__67b0_bz_dok_nr"
|
||||
bundleId = 'b7fe64e0326c107a1a4a705be08392fa' # Artifact to inspection mark location relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f7ed714f705f51f4893427c7ba14dae8'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__67b0_bz_dok_nr__uuid':
|
||||
entityValues['f7a330c34474ecf06737a334dd754e8b'] = value # Inspection Mark location assignment
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f7ed714f705f51f4893427c7ba14dae8'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__67b0_bz_dok_nr__uuid':
|
||||
entityValues['f7a330c34474ecf06737a334dd754e8b'] = value # Inspection Mark location assignment
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Inspection Mark Location Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to inspection mark location relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,65 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToLiteratureReferenceAssignmentRelation(api, engine):
|
||||
print('importing artifact to literature reference assignment relation')
|
||||
tableName = "r__obj__8330_lit_kurzt_"
|
||||
bundleId = 'b6a7b7aad942ecff4b3beadf907d51c8' # Artifact to literature relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
|
||||
tableName = "r__obj__8330_lit_kurzt_"
|
||||
bundleId = 'b6a7b7aad942ecff4b3beadf907d51c8' # Artifact to literature relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f6c41b894b0a00c2c28860f513c5bb77'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f5284765cef8e6974676adcb59791960'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f6c41b894b0a00c2c28860f513c5bb77'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f5284765cef8e6974676adcb59791960'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to literature reference assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToMarkInformationAssignmentRelation(api, engine):
|
||||
print('importing artifact to mark information assignment relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__6760_markenart"
|
||||
bundleId = 'b7112c2a7ea92a1d263d42d5572a05fc' # Artifact to mark information assignment relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "r__obj__6760_markenart"
|
||||
bundleId = 'b7112c2a7ea92a1d263d42d5572a05fc' # Artifact to mark information assignment relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fcf4cbb8b01e4a02ffd041ba4040f890'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__6760_markenart__uuid':
|
||||
entityValues['fb6de3d2433630fc205fe1ef7f24639f'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fcf4cbb8b01e4a02ffd041ba4040f890'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__6760_markenart__uuid':
|
||||
entityValues['fb6de3d2433630fc205fe1ef7f24639f'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Mark Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to mark information assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToMaterialRelation(api, engine):
|
||||
print('importing artifact to material relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__5280_material"
|
||||
bundleId = 'b825aff7df3d48bd875e2a081c796305' # Artifact to material relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "r__obj__5280_material"
|
||||
bundleId = 'b825aff7df3d48bd875e2a081c796305' # Artifact to material relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f9f07bf63ccafd4eb2c0de24c73e1664'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5280_material__uuid':
|
||||
entityValues['f820534abde4c2a2d19e0d19f7793cf0'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f9f07bf63ccafd4eb2c0de24c73e1664'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5280_material__uuid':
|
||||
entityValues['f820534abde4c2a2d19e0d19f7793cf0'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Material Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to material relation')
|
||||
|
|
|
|||
|
|
@ -5,80 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToNumericeDateRelation(api, engine):
|
||||
print('importing artifact to numeric date relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__5064_num__dat_"
|
||||
bundleId = 'b795fcfa6c684fa707c236c4b0882ad7' # Artifact to numeric date relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__obj__5064_num__dat_"
|
||||
bundleId = 'b795fcfa6c684fa707c236c4b0882ad7' # Artifact to numeric date relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc69105d5a6931fc1d2b53cee7ef8b22'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat___uuid':
|
||||
entityValues['fff143b7bfc1308cac53789304a1aff2'] = value # Numeric Date UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc69105d5a6931fc1d2b53cee7ef8b22'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat___uuid':
|
||||
entityValues['fff143b7bfc1308cac53789304a1aff2'] = value # Numeric Date UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Numeric Date Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to numeric date relation')
|
||||
|
|
|
|||
|
|
@ -5,79 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToPhotographRelation(api, engine):
|
||||
print('importing artifact to photograph relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__8490_fotograf"
|
||||
bundleId = 'b63cd713e60b6e5bc3b2235dffc0dba9' # Artifact to photograph relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "r__obj__8490_fotograf"
|
||||
bundleId = 'b63cd713e60b6e5bc3b2235dffc0dba9' # Artifact to photograph relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f88af5d8b4e289c0cde4df32f76a2804'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8490_fotograf__uuid':
|
||||
entityValues['fe2f0af4ba38024fb0f796d4a98af511'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f88af5d8b4e289c0cde4df32f76a2804'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8490_fotograf__uuid':
|
||||
entityValues['fe2f0af4ba38024fb0f796d4a98af511'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Photograph Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to photograph relation')
|
||||
|
|
|
|||
|
|
@ -5,80 +5,67 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToRelationRelation(api, engine):
|
||||
print('importing artifact to relation relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__5007_beziehung"
|
||||
bundleId = 'bb878dd9c44c83a70fbd151f1dc06b4d' # Artifact to relation relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__obj__5007_beziehung"
|
||||
bundleId = 'bb878dd9c44c83a70fbd151f1dc06b4d' # Artifact to relation relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fe537502d55fd4a4482449a0174a3d98'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5007_beziehung__uuid':
|
||||
entityValues['f82f33fa9640d894170c5221d02f583a'] = value # Relation UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fe537502d55fd4a4482449a0174a3d98'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5007_beziehung__uuid':
|
||||
entityValues['f82f33fa9640d894170c5221d02f583a'] = value # Relation UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to relation relation')
|
||||
|
|
|
|||
|
|
@ -5,78 +5,65 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToSourceRelation(api, engine):
|
||||
print('importing artifact to source relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__obj__8130_que_kurzt_"
|
||||
bundleId = 'bcf720dc0b796043915d6da536414451' # Artifact to source relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "r__obj__8130_que_kurzt_"
|
||||
bundleId = 'bcf720dc0b796043915d6da536414451' # Artifact to source relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc8eb74a6ba0c51a82972ff19fec53e8'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8130_que_kurzt___uuid':
|
||||
entityValues['fbfbf828330ed4ec85797ea274f73bb8'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['fc8eb74a6ba0c51a82972ff19fec53e8'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8130_que_kurzt___uuid':
|
||||
entityValues['fbfbf828330ed4ec85797ea274f73bb8'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
print('finished importing artifact to source relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtifactToStatusAdministratorRelation(api, engine):
|
||||
print('importing artifact to status administrator relation')
|
||||
test = False
|
||||
tableName = "r__obj__ob28_status_verwalt_"
|
||||
bundleId = 'bd4922f100ab534fc1213f767770ed6d' # Artifact to status adminstrator relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
test = False
|
||||
tableName = "r__obj__ob28_status_verwalt_"
|
||||
bundleId = 'bd4922f100ab534fc1213f767770ed6d' # Artifact to status adminstrator relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f355304194b190e2fee22a99d54ebc92'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__ob28_status_verwalt___uuid':
|
||||
entityValues['fcc8a9758ce7a2659bfe96242ec4a15e'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__obj__uuid':
|
||||
entityValues['f355304194b190e2fee22a99d54ebc92'] = value # Artifact UUID
|
||||
fUuid = value[0]
|
||||
case 'f__ob28_status_verwalt___uuid':
|
||||
entityValues['fcc8a9758ce7a2659bfe96242ec4a15e'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Status Administrator Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to status administrator relation')
|
||||
|
|
|
|||
|
|
@ -5,83 +5,70 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToBirthRelation(api, engine):
|
||||
print('importing artist to birth relation')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
test = False
|
||||
tableName = "r__kue__3270_geb_datum"
|
||||
bundleId = 'b82e4404cdf641db57c03d7e3b23947c' # Artist to birth relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
tableName = "r__kue__3270_geb_datum"
|
||||
bundleId = 'b82e4404cdf641db57c03d7e3b23947c' # Artist to birth relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f07d608ae6abf891e54c0f57b5f78507'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__3270_geb_datum__uuid':
|
||||
entityValues['f70978f842342d920db490d420339dae'] = value # Dating
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f07d608ae6abf891e54c0f57b5f78507'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__3270_geb_datum__uuid':
|
||||
entityValues['f70978f842342d920db490d420339dae'] = value # Dating
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Birth Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artist to birth relation')
|
||||
|
|
|
|||
|
|
@ -5,81 +5,68 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToDeathRelation(api, engine):
|
||||
print('importing artist to death relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__3330_todes_dat_"
|
||||
bundleId = 'b91ed11c8063a363063582f001a3f5a2' # Artist to death relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__3330_todes_dat_"
|
||||
bundleId = 'b91ed11c8063a363063582f001a3f5a2' # Artist to death relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f2b9ebb823502c1bba835d2f57102815'] = value # Artist UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3330_todes_dat___uuid':
|
||||
entityValues['f6286ce1789410919bd6fc3f1a7f2e05'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f2b9ebb823502c1bba835d2f57102815'] = value # Artist UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3330_todes_dat___uuid':
|
||||
entityValues['f6286ce1789410919bd6fc3f1a7f2e05'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Death Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to death relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToGoldsmithRelation(api, engine):
|
||||
print('importing artist to goldsmith relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__3007_bezieh__zu_gs"
|
||||
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__kue__3007_bezieh__zu_gs"
|
||||
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f972dfd248e362846f4cb5cc946eefc2'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__3007_bezieh__zu_gs__uuid':
|
||||
entityValues['f37c88dc7451b8d1b82f702ef64f8b05'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f972dfd248e362846f4cb5cc946eefc2'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__3007_bezieh__zu_gs__uuid':
|
||||
entityValues['f37c88dc7451b8d1b82f702ef64f8b05'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Goldsmith Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to goldsmith relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToLiteratureReferenceRelation(api, engine):
|
||||
print('importing artist to literature reference relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__8330_lit_kurzt_"
|
||||
bundleId = 'b7a87e3f3d5f671c1f163101bff30eb6' # Artist to literature relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__kue__8330_lit_kurzt_"
|
||||
bundleId = 'b7a87e3f3d5f671c1f163101bff30eb6' # Artist to literature relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f0b9b134818c592f93083d444817dffb'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f70fb4157e3ef66e4d1ed78880f092b2'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f0b9b134818c592f93083d444817dffb'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f70fb4157e3ef66e4d1ed78880f092b2'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to literature reference relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToMentionedRelation(api, engine):
|
||||
print('importing artist to mentioned relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__7060_erwaehnt__datum_"
|
||||
bundleId = 'bc2b0ddca583320a56a67b304dc0a045' # Artist to mentioned relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__kue__7060_erwaehnt__datum_"
|
||||
bundleId = 'bc2b0ddca583320a56a67b304dc0a045' # Artist to mentioned relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f47b1ffe8394f389497b9e23407ad72f'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__7060_erwaehnt__datum___uuid':
|
||||
entityValues['fabb90d487512fc5bf8d7379ff2d8bdb'] = value # Mentioned UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f47b1ffe8394f389497b9e23407ad72f'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__7060_erwaehnt__datum___uuid':
|
||||
entityValues['fabb90d487512fc5bf8d7379ff2d8bdb'] = value # Mentioned UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Mentioned Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to mentioned relation')
|
||||
|
|
|
|||
|
|
@ -5,81 +5,68 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToOriginRelation(api, engine):
|
||||
print('importing artist to origin relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__3204_herkunft"
|
||||
bundleId = 'b5cf6b3e6fd2e4b5575da4347999d6ea' # Artist to origin relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__3204_herkunft"
|
||||
bundleId = 'b5cf6b3e6fd2e4b5575da4347999d6ea' # Artist to origin relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f40e702ecb7fe968c77c9f2ed0f1280c'] = value # Artist UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3204_herkunft__uuid':
|
||||
entityValues['f53bcd587a769e93ea54a34e6de4867d'] = value # Origin UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f40e702ecb7fe968c77c9f2ed0f1280c'] = value # Artist UUID
|
||||
fUuid = value[0]
|
||||
case 'f__3204_herkunft__uuid':
|
||||
entityValues['f53bcd587a769e93ea54a34e6de4867d'] = value # Origin UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Origin Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to origin relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistToWorkshopRelation(api, engine):
|
||||
print('importing artist to workshop relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__kue__nfws_forts_werkst_"
|
||||
bundleId = 'becb95326a733bdbd0c2dd3d36e3399d' # Artist to workshop relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__kue__nfws_forts_werkst_"
|
||||
bundleId = 'becb95326a733bdbd0c2dd3d36e3399d' # Artist to workshop relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f1f107b495d9cf3f349932f2c6535505'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__nfws_forts_werkst___uuid':
|
||||
entityValues['fc53912a0acb388e04eb6684eda209f1'] = value # Workshop UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__kue__uuid':
|
||||
entityValues['f1f107b495d9cf3f349932f2c6535505'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__nfws_forts_werkst___uuid':
|
||||
entityValues['fc53912a0acb388e04eb6684eda209f1'] = value # Workshop UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Workshop Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing artist to workshop relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkDatingInformationAssignmentRelation(api, engine):
|
||||
print('importing inspection mark dating information assignment relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__bez__68dm_datierung_marke"
|
||||
bundleId = 'b1fee832598b2d42ed17a927dad43b90' # Inspection Mark to dating information assignment relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__bez__68dm_datierung_marke"
|
||||
bundleId = 'b1fee832598b2d42ed17a927dad43b90' # Inspection Mark to dating information assignment relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__bez__uuid':
|
||||
entityValues['fac07ebf9c19d09995cc13ae1ba6f362'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__68dm_datierung_marke__uuid':
|
||||
entityValues['ffd43be34e81e0dbfc1b8cccc5f32056'] = value # Dating
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__bez__uuid':
|
||||
entityValues['fac07ebf9c19d09995cc13ae1ba6f362'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__68dm_datierung_marke__uuid':
|
||||
entityValues['ffd43be34e81e0dbfc1b8cccc5f32056'] = value # Dating
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Dating Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing inspection mark dating information assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,83 +5,70 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkRelationRelation(api, engine):
|
||||
print('importing inspection mark relation relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__bez__67b7_beziehung"
|
||||
bundleId = 'bc8dcd233a9b539db407bad219715988' # Inspection Mark Relation Relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__bez__67b7_beziehung"
|
||||
bundleId = 'bc8dcd233a9b539db407bad219715988' # Inspection Mark Relation Relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
|
||||
case 'f__bez__uuid':
|
||||
entityValues['fb9cc78d2351179c5f2f49b3b01be40b'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__67b7_beziehung__uuid':
|
||||
entityValues['f468e7d8e91f04b902c6bc79fe365074'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
case 'f__bez__uuid':
|
||||
entityValues['fb9cc78d2351179c5f2f49b3b01be40b'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__67b7_beziehung__uuid':
|
||||
entityValues['f468e7d8e91f04b902c6bc79fe365074'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing inspection mark relation relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkToLiteratureReferenceRelation(api, engine):
|
||||
print('importing inspection mark to literature reference relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__bez__8330_lit_kurzt_"
|
||||
bundleId = 'b32fc778865a1ffd5b165515425f38c6' # Inspection Mark to Dating Assignment
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__bez__8330_lit_kurzt_"
|
||||
bundleId = 'b32fc778865a1ffd5b165515425f38c6' # Inspection Mark to Dating Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__bez__uuid':
|
||||
entityValues['f8670edfe030f375ca0b8b275a394511'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['fa52476d733d0d106406864245d613b8'] = value # Literature Reference Assignment
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__bez__uuid':
|
||||
entityValues['f8670edfe030f375ca0b8b275a394511'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['fa52476d733d0d106406864245d613b8'] = value # Literature Reference Assignment
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing inspection mark to literature reference relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importLiteratureToJournalRelation(api, engine):
|
||||
print('importing literature to journal relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__lit__8310_zeitschrift"
|
||||
bundleId = 'b6c2ce0add1e7999f48d66b7ef1a4a26' # Literature to journal relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__lit__8310_zeitschrift"
|
||||
bundleId = 'b6c2ce0add1e7999f48d66b7ef1a4a26' # Literature to journal relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__lit__uuid':
|
||||
entityValues['fc751b683ba51648f4e7557e37e18228'] = value # Literature UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8310_zeitschrift__uuid':
|
||||
entityValues['fae46e3ca92e3a84b36df823fe0323bb'] = value # Journal UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__lit__uuid':
|
||||
entityValues['fc751b683ba51648f4e7557e37e18228'] = value # Literature UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8310_zeitschrift__uuid':
|
||||
entityValues['fae46e3ca92e3a84b36df823fe0323bb'] = value # Journal UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Literature to Journal Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing literature to journal relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importLiteratureToParentPublicationRelation(api, engine):
|
||||
print('importing literature to parent publication relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__lit__8292_uebergeordn_publ_"
|
||||
bundleId = 'b2adaaa15714d83ea83cd3333af437df' # Literature to parent publication relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__lit__8292_uebergeordn_publ_"
|
||||
bundleId = 'b2adaaa15714d83ea83cd3333af437df' # Literature to parent publication relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__lit__uuid':
|
||||
entityValues['f1ecd1cf9be1081507f9c8f3758bafe9'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8292_uebergeordn_publ___uuid':
|
||||
entityValues['f9997e4bbacb1c26a945825cfe5b6de2'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__lit__uuid':
|
||||
entityValues['f1ecd1cf9be1081507f9c8f3758bafe9'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8292_uebergeordn_publ___uuid':
|
||||
entityValues['f9997e4bbacb1c26a945825cfe5b6de2'] = value #
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Literature to Parent Publication Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing literature to parent publication relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,66 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarkToDatingRelation(api, engine):
|
||||
print('importing mark to dating relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__mar__68dm_datierung_marke"
|
||||
bundleId = 'b105b749b25de3aa55329b82fe18c18d' # Mark to dating relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__mar__68dm_datierung_marke"
|
||||
bundleId = 'b105b749b25de3aa55329b82fe18c18d' # Mark to dating relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['f11c6eedcfc833dabffd356f57be7e15'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__68dm_datierung_marke__uuid':
|
||||
entityValues['f2b469f3a10721ab891e01b1d9817612'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['f11c6eedcfc833dabffd356f57be7e15'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__68dm_datierung_marke__uuid':
|
||||
entityValues['f2b469f3a10721ab891e01b1d9817612'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Dating Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing mark to dating relation')
|
||||
|
|
|
|||
|
|
@ -5,83 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarkToLiteratureRelation(api, engine):
|
||||
print('importing mark to literature relation')
|
||||
test = False
|
||||
|
||||
test = True
|
||||
tableName = "r__mar__8330_lit_kurzt_"
|
||||
bundleId = 'bd58cc7d59ce9f3e593e758a28dfcf4a' # Mark to literature relation
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
tableName = "r__mar__8330_lit_kurzt_"
|
||||
bundleId = 'bd58cc7d59ce9f3e593e758a28dfcf4a' # Mark to literature relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['f4fccc9bad7fc559c153095bdcb32eeb'] = value # Mark UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f19ffb27810f7d14694afb54dd359451'] = value # Literature UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['f4fccc9bad7fc559c153095bdcb32eeb'] = value # Mark UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['f19ffb27810f7d14694afb54dd359451'] = value # Literature UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Literature Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing mark to literature relation')
|
||||
|
|
|
|||
|
|
@ -5,81 +5,68 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarkToMarkInformationRelation(api, engine):
|
||||
print('importing mark to mark information relation')
|
||||
test = False
|
||||
tableName = "r__mar__6760_markenart"
|
||||
bundleId = 'b241e8063b9259428967fa4ff134a8bd' # Mark to mark information relation
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
test = False
|
||||
tableName = "r__mar__6760_markenart"
|
||||
bundleId = 'b241e8063b9259428967fa4ff134a8bd' # Mark to mark information relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['fa64f8812c3c784b2d91454bc9a88279'] = value # Mark UUID
|
||||
fUuid = value[0]
|
||||
case 'f__6760_markenart__uuid':
|
||||
entityValues['f9d5d6723ea78253330dd8e4b346cac6'] = value # Mark information assignment uuidNote
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['fa64f8812c3c784b2d91454bc9a88279'] = value # Mark UUID
|
||||
fUuid = value[0]
|
||||
case 'f__6760_markenart__uuid':
|
||||
entityValues['f9d5d6723ea78253330dd8e4b346cac6'] = value # Mark information assignment uuidNote
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Mark Information Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing mark to mark information relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarkToSourceRelation(api, engine):
|
||||
print('importing mark to source relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__mar__8130_que_kurzt_"
|
||||
bundleId = 'b0edbf644e07765a5ae319802ec0289b' # Mark to source relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__mar__8130_que_kurzt_"
|
||||
bundleId = 'b0edbf644e07765a5ae319802ec0289b' # Mark to source relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['ffe35cef0c5d28bbebe195436706fc7c'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8130_que_kurzt___uuid':
|
||||
entityValues['f86e4b7f52add5640b824a601c66a2f6'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__mar__uuid':
|
||||
entityValues['ffe35cef0c5d28bbebe195436706fc7c'] = value # Date
|
||||
fUuid = value[0]
|
||||
case 'f__8130_que_kurzt___uuid':
|
||||
entityValues['f86e4b7f52add5640b824a601c66a2f6'] = value # Note
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing mark to source relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importSourceToDateRelation(api, engine):
|
||||
print('importing source to date relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__que__8100_datum"
|
||||
bundleId = 'b4b8ba242075bf2c778894911c7f3264' # Source to date relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__que__8100_datum"
|
||||
bundleId = 'b4b8ba242075bf2c778894911c7f3264' # Source to date relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__que__uuid':
|
||||
entityValues['f2e8d1b76c8b196c8deb9e0abe90a5b3'] = value # Source UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8100_datum__uuid':
|
||||
entityValues['ff5ac62e6327599566d4474e18423265'] = value # Date UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__que__uuid':
|
||||
entityValues['f2e8d1b76c8b196c8deb9e0abe90a5b3'] = value # Source UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8100_datum__uuid':
|
||||
entityValues['ff5ac62e6327599566d4474e18423265'] = value # Date UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Source to Date Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing source to date relation')
|
||||
|
|
|
|||
|
|
@ -5,82 +5,69 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importSourceToLiteratureReferenceAssignmentRelation(api, engine):
|
||||
print('importing source to literature reference assignment relation')
|
||||
test = False
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = "r__que__8330_lit_kurzt_"
|
||||
bundleId = 'bed2f320214a0344287c6c4db40e9331' # Source to literature reference assignemnt relation
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
tableName = "r__que__8330_lit_kurzt_"
|
||||
bundleId = 'bed2f320214a0344287c6c4db40e9331' # Source to literature reference assignemnt relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__que__uuid':
|
||||
entityValues['faeb9c96c23eadd1a58df9ecd2154b68'] = value # Source UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['fc15a069f1a7694c13107a348d3b7a39'] = value # Literature reference assignment UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__que__uuid':
|
||||
entityValues['faeb9c96c23eadd1a58df9ecd2154b68'] = value # Source UUID
|
||||
fUuid = value[0]
|
||||
case 'f__8330_lit_kurzt___uuid':
|
||||
entityValues['fc15a069f1a7694c13107a348d3b7a39'] = value # Literature reference assignment UUID
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Source to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finished importing source to literature reference assignment relation')
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ def initDb(_production, schemaDir):
|
|||
return (False, False)
|
||||
|
||||
if _production:
|
||||
dbName = 'ngk'
|
||||
dbName = 'ngk_data_alt'
|
||||
else:
|
||||
dbName = 'testngk'
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ def createClass(name, columns):
|
|||
tableName = name.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
|
||||
|
||||
# Transform columns and add prefix
|
||||
attrs = {'__tablename__': tableName}
|
||||
attrs = {'__tablename__': tableName, '__table_args__': {'extend_existing': True}}
|
||||
attrs.update({prop.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')','_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_'): (Column(String(36), primary_key=True) if prop.lower() == 'uuid' else Column(Text)) for prop in columns})
|
||||
|
||||
# If 'uuid' is not in columns, add 'id' as primary key
|
||||
|
|
@ -30,9 +30,6 @@ def createClass(name, columns):
|
|||
# Create SQLAlchemy class
|
||||
cls = type(className, (Base,), attrs)
|
||||
|
||||
# Define the table with extend_existing=True
|
||||
Table(tableName, Base.metadata, extend_existing=True)
|
||||
|
||||
return cls
|
||||
|
||||
def initClassesFromSchemas(schemaDir):
|
||||
|
|
|
|||
|
|
@ -3,4 +3,4 @@ pandas
|
|||
pymysql
|
||||
sqlalchemy
|
||||
tqdm
|
||||
wisski_py
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue