new commit
This commit is contained in:
parent
da296f8a64
commit
e46a9fd4ec
69 changed files with 4199 additions and 4805 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -11,3 +11,5 @@ wisski_py
|
|||
__pycache__
|
||||
logs/*
|
||||
.venv
|
||||
.env
|
||||
.vscode
|
||||
|
|
|
|||
15
.vscode/launch.json
vendored
Normal file
15
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
160
00_start.py
Normal file
160
00_start.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
from importlib import import_module
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
from time import sleep
|
||||
|
||||
# Import entities
|
||||
material_module = import_module("01_importMaterialsAndTechnique")
|
||||
administrator_module = import_module("02_importAdministrator")
|
||||
administrator_status_module = import_module("03_importAdministratorStatus")
|
||||
source_module = import_module("03_importSource")
|
||||
artist_source_reference_assignment_module = import_module("04_importArtistSourceReferenceAssignment")
|
||||
marks_module = import_module("04_importMarks")
|
||||
source_reference_assignment_module = import_module("04_importSourceReferenceAssignment")
|
||||
artist_module = import_module("05_importArtist")
|
||||
literature_module = import_module("06_importLiterature")
|
||||
inspection_mark_module = import_module("07_importInspectionMark")
|
||||
journal_assignment_module = import_module("07_importJournalAssignment")
|
||||
literature_reference_assignment_module = import_module("07_importLiteratureReferenceAssignment")
|
||||
parent_literature_assignment_module = import_module("07_importParentLiteratureAssignment")
|
||||
inspection_mark_location_module = import_module("08_importInspectionMarkLocation")
|
||||
inspection_mark_relation_module = import_module("09_importInspectionMarkRelation")
|
||||
mark_dating_info_module = import_module("10_importMarkDatingInfo")
|
||||
birth_module = import_module("12_importBirth")
|
||||
death_module = import_module("13_importDeath")
|
||||
dating_module = import_module("14_importDating")
|
||||
goldsmith_relation_module = import_module("15_importGoldsmithRelation")
|
||||
client_module = import_module("16_importClient")
|
||||
mentioned_module = import_module("17_importMentioned")
|
||||
num_dating_module = import_module("18_importNumDating")
|
||||
origin_assignment_module = import_module("19_importOriginAssignment")
|
||||
workshops_module = import_module("20_importWorkshops")
|
||||
artifacts_module = import_module("21_importArtifacts")
|
||||
artifact_relation_module = import_module("22_importArtifactRelation")
|
||||
artist_assignment_module = import_module("24_importArtistAssignment")
|
||||
mark_information_module = import_module("25_importMarkInformation")
|
||||
photographer_module = import_module("26_importPhotographer")
|
||||
|
||||
# Import relations
|
||||
artifact_to_artist_relation_module = import_module("98__r__importArtifactToArtistRelationRelation")
|
||||
artifact_to_client_assignment_relation_module = import_module("98__r__importArtifactToClientAssignmentRelation")
|
||||
artifact_to_inspection_mark_location_relation_module = import_module("98__r__importArtifactToInspectionMarkLocationRelation")
|
||||
artifact_to_literature_reference_assignment_relation_module = import_module("98__r__importArtifactToLiteratureReferenceAssignmentRelation")
|
||||
artifact_to_mark_information_assignment_relation_module = import_module("98__r__importArtifactToMarkInformationAssignmentRelation")
|
||||
artifact_to_material_relation_module = import_module("98__r__importArtifactToMaterialRelation")
|
||||
artifact_to_numerice_date_relation_module = import_module("98__r__importArtifactToNumericeDateRelation")
|
||||
artifact_to_photograph_relation_module = import_module("98__r__importArtifactToPhotographRelation")
|
||||
artifact_to_relation_relation_module = import_module("98__r__importArtifactToRelationRelation")
|
||||
artifact_to_source_relation_module = import_module("98__r__importArtifactToSourceRelation")
|
||||
artifact_to_status_administrator_relation_module = import_module("98__r__importArtifactToStatusAdministratorRelation")
|
||||
artist_to_birth_relation_module = import_module("98__r__importArtistToBirthRelation")
|
||||
artist_to_death_relation_module = import_module("98__r__importArtistToDeathRelation")
|
||||
artist_to_goldsmith_relation_module = import_module("98__r__importArtistToGoldsmithRelation")
|
||||
artist_to_literature_reference_relation_module = import_module("98__r__importArtistToLiteratureReferenceRelation")
|
||||
artist_to_mentioned_relation_module = import_module("98__r__importArtistToMentionedRelation")
|
||||
artist_to_origin_relation_module = import_module("98__r__importArtistToOriginRelation")
|
||||
artist_to_workshop_relation_module = import_module("98__r__importArtistToWorkshopRelation")
|
||||
inspection_mark_dating_information_assignment_relation_module = import_module("98__r__importInspectionMarkDatingInformationAssignmentRelation")
|
||||
inspection_mark_relation_relation_module = import_module("98__r__importInspectionMarkRelationRelation")
|
||||
inspection_mark_to_literature_reference_relation_module = import_module("98__r__importInspectionMarkToLiteratureReferenceRelation")
|
||||
literature_to_journal_relation_module = import_module("98__r__importLiteratureToJournalRelation")
|
||||
literature_to_parent_publication_relation_module = import_module("98__r__importLiteratureToParentPublicationRelation")
|
||||
mark_to_dating_relation_module = import_module("98__r__importMarkToDatingRelation")
|
||||
mark_to_literature_relation_module = import_module("98__r__importMarkToLiteratureRelation")
|
||||
mark_to_mark_information_relation_module = import_module("98__r__importMarkToMarkInformationRelation")
|
||||
mark_to_source_relation_module = import_module("98__r__importMarkToSourceRelation")
|
||||
source_to_date_relation_module = import_module("98__r__importSourceToDateRelation")
|
||||
source_to_literature_reference_assignment_relation_module = import_module("98__r__importSourceToLiteratureReferenceAssignmentRelation")
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
|
||||
trials = 0
|
||||
while trials < 3 :
|
||||
trials += 1
|
||||
try:
|
||||
# Call the function from the imported module
|
||||
material_module.importMaterialsAndTechnique(api, engine)
|
||||
administrator_module.importAdministrator(api, engine)
|
||||
administrator_status_module.importAdministratorStatus(api, engine)
|
||||
source_module.importSource(api, engine)
|
||||
artist_source_reference_assignment_module.importArtistSourceReferenceAssignment(api, engine)
|
||||
marks_module.importMarks(api, engine)
|
||||
source_reference_assignment_module.importSourceReferenceAssignment(api, engine)
|
||||
artist_module.importArtist(api, engine)
|
||||
literature_module.importLiterature(api, engine)
|
||||
inspection_mark_module.importInspectionMark(api, engine)
|
||||
journal_assignment_module.importJournalAssignment(api, engine)
|
||||
literature_reference_assignment_module.importLiteratureReferenceAssignment(api, engine)
|
||||
parent_literature_assignment_module.importParentLiteratureAssignment(api, engine)
|
||||
inspection_mark_location_module.importInspectionMarkLocation(api, engine)
|
||||
inspection_mark_relation_module.importInspectionMarkRelation(api, engine)
|
||||
mark_dating_info_module.importMarkDatingInfo(api, engine)
|
||||
birth_module.importBirth(api, engine)
|
||||
death_module.importDeath(api, engine)
|
||||
dating_module.importDating(api, engine)
|
||||
goldsmith_relation_module.importGoldsmithRelation(api, engine)
|
||||
client_module.importClient(api, engine)
|
||||
mentioned_module.importMentioned(api, engine)
|
||||
num_dating_module.importNumDating(api, engine)
|
||||
origin_assignment_module.importOriginAssignment(api, engine)
|
||||
workshops_module.importWorkshops(api, engine)
|
||||
artifacts_module.importArtifacts(api, engine)
|
||||
artifact_relation_module.importArtifactRelation(api, engine)
|
||||
artist_assignment_module.importArtistAssignment(api, engine)
|
||||
mark_information_module.importMarkInformation(api, engine)
|
||||
photographer_module.importPhotographer(api, engine)
|
||||
|
||||
api.pathbuilders = ['relations']
|
||||
artifact_to_artist_relation_module.importArtifactToArtistRelationRelation(api, engine)
|
||||
artifact_to_client_assignment_relation_module.importArtifactToClientAssignmentRelation(api, engine)
|
||||
artifact_to_inspection_mark_location_relation_module.importArtifactToInspectionMarkLocationRelation(api, engine)
|
||||
artifact_to_literature_reference_assignment_relation_module.importArtifactToLiteratureReferenceAssignmentRelation(api, engine)
|
||||
artifact_to_mark_information_assignment_relation_module.importArtifactToMarkInformationAssignmentRelation(api, engine)
|
||||
artifact_to_material_relation_module.importArtifactToMaterialRelation(api, engine)
|
||||
artifact_to_numerice_date_relation_module.importArtifactToNumericeDateRelation(api, engine)
|
||||
artifact_to_photograph_relation_module.importArtifactToPhotographRelation(api, engine)
|
||||
artifact_to_relation_relation_module.importArtifactToRelationRelation(api, engine)
|
||||
artifact_to_source_relation_module.importArtifactToSourceRelation(api, engine)
|
||||
artifact_to_status_administrator_relation_module.importArtifactToStatusAdministratorRelation(api, engine)
|
||||
artist_to_birth_relation_module.importArtistToBirthRelation(api, engine)
|
||||
artist_to_death_relation_module.importArtistToDeathRelation(api, engine)
|
||||
artist_to_goldsmith_relation_module.importArtistToGoldsmithRelation(api, engine)
|
||||
artist_to_literature_reference_relation_module.importArtistToLiteratureReferenceRelation(api, engine)
|
||||
artist_to_mentioned_relation_module.importArtistToMentionedRelation(api, engine)
|
||||
artist_to_origin_relation_module.importArtistToOriginRelation(api, engine)
|
||||
artist_to_workshop_relation_module.importArtistToWorkshopRelation(api, engine)
|
||||
inspection_mark_dating_information_assignment_relation_module.importInspectionMarkDatingInformationAssignmentRelation(api, engine)
|
||||
inspection_mark_relation_relation_module.importInspectionMarkRelationRelation(api, engine)
|
||||
inspection_mark_to_literature_reference_relation_module.importInspectionMarkToLiteratureReferenceRelation(api, engine)
|
||||
literature_to_journal_relation_module.importLiteratureToJournalRelation(api, engine)
|
||||
literature_to_parent_publication_relation_module.importLiteratureToParentPublicationRelation(api, engine)
|
||||
mark_to_dating_relation_module.importMarkToDatingRelation(api, engine)
|
||||
mark_to_literature_relation_module.importMarkToLiteratureRelation(api, engine)
|
||||
mark_to_mark_information_relation_module.importMarkToMarkInformationRelation(api, engine)
|
||||
mark_to_source_relation_module.importMarkToSourceRelation(api, engine)
|
||||
source_to_date_relation_module.importSourceToDateRelation(api, engine)
|
||||
source_to_literature_reference_assignment_relation_module.importSourceToLiteratureReferenceAssignmentRelation(api, engine)
|
||||
except Exception as e:
|
||||
print(f'Error: {e}')
|
||||
print(f'Trial {trials} of 3 failed.')
|
||||
print(f'Retrying in 10 seconds...')
|
||||
sleep(10)
|
||||
continue
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedMaterials.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load materials table
|
||||
materialsTable = pd.read_sql_table('c__5280_material', con=engine)
|
||||
|
||||
# Create materials
|
||||
for index, row in materialsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and materialsTable.loc[index, 'id'] == processedRows.iloc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed material {materialsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
materialValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
|
||||
case 'f__5280_material':
|
||||
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
|
||||
case 'f__5300_technik':
|
||||
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
material = Entity(api=api, fields=materialValues, bundle_id='b45978f2b073ff3c73b3c7220ebb3b89')
|
||||
api.save(material)
|
||||
|
||||
print(f'Created material {index}: {material.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedMaterials.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
79
01_importMaterialsAndTechnique.py
Normal file
79
01_importMaterialsAndTechnique.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importMaterialsAndTechnique(api, engine):
|
||||
print('Importing materials and technique...')
|
||||
|
||||
tableName = 'c__5280_material'
|
||||
bundleId = 'b45978f2b073ff3c73b3c7220ebb3b89'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load materials table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create materials
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed material {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
materialValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# If value is a list of comma-separated strings, split each item by ',' and flatten.
|
||||
if isinstance(value, list):
|
||||
new_value = []
|
||||
for v in value:
|
||||
if isinstance(v, str) and ',' in v:
|
||||
new_value.extend([x.strip() for x in v.split(',') if x.strip()])
|
||||
else:
|
||||
new_value.append(v)
|
||||
value = new_value
|
||||
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
materialValues['fedfe553c2332bd4902c887813f29ed8'] = value # UUID
|
||||
case 'f__5280_material':
|
||||
materialValues['f5f4251312f54c0d104ea87761b94bde'] = value # Material
|
||||
case 'f__5300_technik':
|
||||
materialValues['f231e08850022f091ebd5055d8aad30f'] = value # Technique
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
material = Entity(api=api, fields=materialValues, bundle_id=bundleId)
|
||||
api.save(material)
|
||||
|
||||
print(f'Created material {index}: {material.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': materialValues['fedfe553c2332bd4902c887813f29ed8'][0], 'uri': material.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,41 +5,29 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilders = ['default']
|
||||
def importAdministrator(api, engine):
|
||||
print('Importing administrators...')
|
||||
tableName = 'c__vwr'
|
||||
bundleId = 'b4e5a6a31ff575ab09b07b5f27d322ab'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedAdministrators.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['administratorId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
administratorsTable = pd.read_sql_table('c__vwr', con=engine)
|
||||
administratorsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
administratorValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create administrators
|
||||
for index, row in administratorsTable.iterrows():
|
||||
administratorValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and administratorsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administrator {administratorsTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed administrator {administratorsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
administratorValues = {}
|
||||
|
|
@ -49,10 +37,9 @@ for index, row in administratorsTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -96,14 +83,14 @@ for index, row in administratorsTable.iterrows():
|
|||
# Set Digitisation Process
|
||||
administratorValues['f3ec4640a87bd4534763af0fca050193'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Material
|
||||
administrator = Entity(api=api, fields=administratorValues, bundle_id='b4e5a6a31ff575ab09b07b5f27d322ab') # Administrator
|
||||
# Create Administrator
|
||||
administrator = Entity(api=api, fields=administratorValues, bundle_id=bundleId) # Administrator
|
||||
api.save(administrator)
|
||||
|
||||
print(f'Created administrator {index}: {administrator.uri}')
|
||||
print(f'Created administrator {index}: {administrator.uri} of {len(administratorsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'administratorId': administratorValues['f37e82c36b4fc6b275a1a86a389481e1'][0], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedAdministrators.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorValues['f707e595ce7301d61c064e8e44c9c4f4'][0], 'uri': administrator.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing administrators')
|
||||
|
|
|
|||
|
|
@ -5,40 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importAdministratorStatus(api, engine):
|
||||
print('Importing administrator statuses...')
|
||||
tableName = 'c__ob28_status_verwalt_'
|
||||
bundleId = 'b45447146729190da3a1d3e19165a6f8'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedAdministratorStatus.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
administratorStatusTable = pd.read_sql_table('c__ob28_status_verwalt_', con=engine)
|
||||
|
||||
administratorStatusValues = {}
|
||||
administratorStatusTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create administratorStatuss
|
||||
for index, row in administratorStatusTable.iterrows():
|
||||
administratorStatusValues = {}
|
||||
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and administratorStatusTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and administratorStatusTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed administratorStatus {administratorStatusTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed administratorStatus {administratorStatusTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
administratorStatusValues = {}
|
||||
|
|
@ -48,6 +35,9 @@ for index, row in administratorStatusTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -83,6 +73,6 @@ for index, row in administratorStatusTable.iterrows():
|
|||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': administratorStatusValues['f5ea2a7495ec872781ddc06f862b4270'][0], 'uri': administratorStatus.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedAdministratorStatus.csv', index=False)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing administrator statuses')
|
||||
|
|
|
|||
|
|
@ -5,41 +5,29 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importSource(api, engine):
|
||||
print('Importing sources...')
|
||||
tableName = 'c__que'
|
||||
bundleId = 'b7dc57a93e008a58514b0d4ca26147b1'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedSources.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id','sourceId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sourcesTable = pd.read_sql_table('c__que', con=engine)
|
||||
sourcesTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
|
||||
sourceValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create sources
|
||||
for index, row in sourcesTable.iterrows():
|
||||
sourceValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
# For every row in table...
|
||||
if index < processedRows['id'].max():
|
||||
if index < len(processedRows) and sourcesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed source {row['id']}')
|
||||
print(f"Skipping already processed source {sourcesTable.loc[index, 'id']}")
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
sourceValues = {}
|
||||
|
|
@ -49,9 +37,9 @@ for index, row in sourcesTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -116,13 +104,13 @@ for index, row in sourcesTable.iterrows():
|
|||
sourceValues['ffdf27e75013fa55d31f728ff5166f06'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Material
|
||||
source = Entity(api=api, fields=sourceValues, bundle_id='b7dc57a93e008a58514b0d4ca26147b1')
|
||||
source = Entity(api=api, fields=sourceValues, bundle_id=bundleId)
|
||||
api.save(source)
|
||||
|
||||
print(f'Created source {index}: {source.uri}')
|
||||
print(f'Created source {index}: {source.uri} of {len(sourcesTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'sourceId': sourceValues['f50ad6021b42c094f7e551faec831802'][0], 'uuid': sourceValues['f9f02815a5631a85948d4d258a455f49'][0], 'uri': source.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedSources.csv', index=False)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,28 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importArtistSourceReferenceAssignment(api, engine):
|
||||
print('Importing artist source reference assignments...')
|
||||
|
||||
tableName = "c__81kr_que_kt_kue"
|
||||
bundleId = 'bf71940d0b18c20511e2141159afb9de' # Artist source reference assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
entityValues = {}
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -81,7 +68,7 @@ for index, row in sqlTable.iterrows():
|
|||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,41 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importMarks(api, engine):
|
||||
print('Importing marks...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
# Simple log
|
||||
tableName = 'c__mar'
|
||||
bundleId = 'b2c4e1c984d7758d7c7ec719110f7125'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedMarks.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'markId', 'uuid', 'uri'])
|
||||
|
||||
# Load mark table
|
||||
markTable = pd.read_sql_table('c__mar', con=engine)
|
||||
print(f'Processing {len(markTable)} marks...')
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
print(f'Processing {len(sqlTable)} marks...')
|
||||
|
||||
# Create mark
|
||||
for index, row in markTable.iterrows():
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < processedRows['id'].max():
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed mark {row['id']}')
|
||||
print(f'Skipping already processed mark {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
markValues = {}
|
||||
|
|
@ -57,9 +43,9 @@ for index, row in markTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -172,6 +158,7 @@ for index, row in markTable.iterrows():
|
|||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
|
|
@ -260,13 +247,13 @@ for index, row in markTable.iterrows():
|
|||
markValues['f3baf98f752fc9638de175985183119a'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Mark
|
||||
mark = Entity(api=api, fields=markValues, bundle_id='b2c4e1c984d7758d7c7ec719110f7125')
|
||||
mark = Entity(api=api, fields=markValues, bundle_id=bundleId)
|
||||
api.save(mark)
|
||||
|
||||
print(f'Created mark number {index}: {mark.uri} of {len(markTable)}')
|
||||
print(f'Created mark number {index}: {mark.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'markId': markValues['fe577970c02f173170ff3848a36b3b79'][0], 'uuid': markValues['fb40b199b4032e55acc152f994e93b45'][0], 'uri': mark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedMarks.csv', index=False)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing marks')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,28 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importSourceReferenceAssignment(api, engine):
|
||||
print('Importing source reference assignments...')
|
||||
|
||||
tableName = "c__8130_que_kurzt_"
|
||||
bundleId = 'b3c4232e84c2f39795bd602f152ed6f0' # Source reference assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -78,9 +65,9 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created source reference assignment {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
print('finish')
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
print('finished importing source reference assignments')
|
||||
|
|
|
|||
|
|
@ -5,57 +5,44 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtist(api, engine):
|
||||
print('Importing artists...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
tableName = 'c__kue'
|
||||
bundleId = 'bc322be33491dacc600dd43fdee09a5c'
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
test = True
|
||||
test = False
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtists.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['artistId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
artistsTable = pd.read_sql_table('c__kue', con=engine)
|
||||
artistsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create artists
|
||||
for index, row in artistsTable.iterrows():
|
||||
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artist {artistsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artistValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
|
||||
|
||||
# Create artists
|
||||
for index, row in artistsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistsTable.loc[index, 'f__3000_kue_dok_nr_'] == processedRows.loc[index, 'artistId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artist {artistsTable.loc[index, "f__3000_kue_dok_nr_"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artistValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -135,7 +122,7 @@ for index, row in artistsTable.iterrows():
|
|||
if value:
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0]) # add UUID to list
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artist and Image Assignment over the UUID
|
||||
|
|
@ -143,27 +130,29 @@ for index, row in artistsTable.iterrows():
|
|||
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
|
||||
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
|
||||
api.save(reproNumberAssignment)
|
||||
else:
|
||||
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = []
|
||||
|
||||
|
||||
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'][0]:
|
||||
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44']:
|
||||
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
|
||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
|
||||
# Create Material
|
||||
artist = Entity(api=api, fields=artistValues, bundle_id='bc322be33491dacc600dd43fdee09a5c')
|
||||
artist = Entity(api=api, fields=artistValues, bundle_id=bundleId)
|
||||
api.save(artist)
|
||||
|
||||
print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
|
||||
if test:
|
||||
print('Testing mode activated. Exiting.')
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artists')
|
||||
|
|
|
|||
|
|
@ -5,53 +5,39 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importLiterature(api, engine):
|
||||
print('Importing literature...')
|
||||
tableName = 'c__lit'
|
||||
bundleId = 'bafe9c3d3b640d4d1a16b104f367ac91'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedLiteratures.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'literatureId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
literaturesTable = pd.read_sql_table('c__lit', con=engine)
|
||||
literaturesTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
literatureValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
|
||||
# Create literatures
|
||||
for index, row in literaturesTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and literaturesTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and literaturesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed literature {literaturesTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed literature {literaturesTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
literatureValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -120,7 +106,7 @@ for index, row in literaturesTable.iterrows():
|
|||
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'literatureId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedLiteratures.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,30 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
# Simple log
|
||||
def importInspectionMark(api, engine):
|
||||
print('Importing inspection marks...')
|
||||
tableName = 'c__bez'
|
||||
bundleId = 'baad021dfda9b89d5ba407dd0fca0d03'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarks.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'inspectionMarkId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load inspectionMark table
|
||||
inspectionMarkTable = pd.read_sql_table('c__bez', con=engine)
|
||||
|
|
@ -56,10 +41,9 @@ for index, row in inspectionMarkTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -114,6 +98,7 @@ for index, row in inspectionMarkTable.iterrows():
|
|||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
|
|
@ -189,13 +174,13 @@ for index, row in inspectionMarkTable.iterrows():
|
|||
inspectionMarkValues['f998036ccd7daaf2d9938934c93938f3'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Mark
|
||||
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id='baad021dfda9b89d5ba407dd0fca0d03')
|
||||
inspectionMark = Entity(api=api, fields=inspectionMarkValues, bundle_id=bundleId)
|
||||
api.save(inspectionMark)
|
||||
|
||||
print(f'Created inspectionMark number {index}: {inspectionMark.uri} of {len(inspectionMarkTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'inspectionMarkId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarks.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': inspectionMarkValues['fcdb19d95832ac030d353b5ba92796b7'][0], 'uuid': inspectionMarkValues['fb125fa322fe7c3c98446e382b1f22b9'][0], 'uri': inspectionMark.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing inspection marks')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importJournalAssignment(api, engine):
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
test = False
|
||||
tableName = "c__8310_zeitschrift"
|
||||
bundleId = 'b5508ef3bb28f139ebdd9f6d545825c4'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -77,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created journal assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importLiteratureReferenceAssignment(api, engine):
|
||||
print('Importing literature reference assignments...')
|
||||
|
||||
tableName = "c__8330_lit_kurzt_"
|
||||
bundleId = 'bdda154adecb26deed2d8b67dab8a0db' # Literature Reference Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -79,10 +64,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created literature reference assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importParentLiteratureAssignment(api, engine):
|
||||
|
||||
test = False
|
||||
tableName = "c__8292_uebergeordn_publ_"
|
||||
bundleId = 'bf55dda81ca0ddb4237a0d3ea495579b' # Parent literature assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -77,11 +62,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created parent literature assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,40 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkLocation(api, engine):
|
||||
print('Importing inspection mark locations...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = 'c__67b0_bz_dok_nr'
|
||||
bundleId = 'b4158ec3a326d8ab504062296a82f13a'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarkLocation.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
inspectionMarkLocationsTable = pd.read_sql_table('c__67b0_bz_dok_nr', con=engine)
|
||||
|
||||
inspectionMarkLocationValues = {}
|
||||
inspectionMarkLocationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create inspectionMarkLocations
|
||||
for index, row in inspectionMarkLocationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkLocationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and inspectionMarkLocationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed inspectionMarkLocation {inspectionMarkLocationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
inspectionMarkLocationValues = {}
|
||||
|
|
@ -48,10 +34,9 @@ for index, row in inspectionMarkLocationsTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{new_line', '')
|
||||
value = str(value).replace('}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -73,13 +58,13 @@ for index, row in inspectionMarkLocationsTable.iterrows():
|
|||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id='b4158ec3a326d8ab504062296a82f13a')
|
||||
inspectionMarkLocation = Entity(api=api, fields=inspectionMarkLocationValues, bundle_id=bundleId)
|
||||
api.save(inspectionMarkLocation)
|
||||
|
||||
print(f'Created inspectionMarkLocation {index}: {inspectionMarkLocation.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkLocationValues['f65178b07306225efb0b556f6e4f54a5'][0], 'uri': inspectionMarkLocation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarkLocation.csv', index=False)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing inspection mark locations')
|
||||
|
|
|
|||
|
|
@ -5,40 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importInspectionMarkRelation(api, engine):
|
||||
print('Importing inspection mark relations...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = 'c__67b7_beziehung'
|
||||
bundleId = 'bd9b0ff8dc3a6d9284e1798531389bf1'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedInspectionMarkRelation.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
inspectionMarkRelationsTable = pd.read_sql_table('c__67b7_beziehung', con=engine)
|
||||
|
||||
inspectionMarkRelationValues = {}
|
||||
inspectionMarkRelationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create inspectionMarkRelations
|
||||
for index, row in inspectionMarkRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and inspectionMarkRelationsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and inspectionMarkRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed inspectionMarkRelation {inspectionMarkRelationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
inspectionMarkRelationValues = {}
|
||||
|
|
@ -48,9 +34,9 @@ for index, row in inspectionMarkRelationsTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,10 +61,10 @@ for index, row in inspectionMarkRelationsTable.iterrows():
|
|||
inspectionMarkRelation = Entity(api=api, fields=inspectionMarkRelationValues, bundle_id='bd9b0ff8dc3a6d9284e1798531389bf1')
|
||||
api.save(inspectionMarkRelation)
|
||||
|
||||
print(f'Created inspectionMarkRelation {index}: {inspectionMarkRelation.uri}')
|
||||
print(f'Created inspection mark relation {index}: {inspectionMarkRelation.uri}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedInspectionMarkRelation.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': inspectionMarkRelationValues['ffd502413c286815811ae5546f73935b'][0], 'uri': inspectionMarkRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,40 +5,25 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importMarkDatingInfo(api, engine):
|
||||
print('Importing mark dating info...')
|
||||
tableName = 'c__68dm_datierung_marke'
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedDatingInfo.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
datingInfosTable = pd.read_sql_table('c__68dm_datierung_marke', con=engine)
|
||||
|
||||
datingInfoValues = {}
|
||||
datingInfosTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create datingInfos
|
||||
for index, row in datingInfosTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and datingInfosTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and datingInfosTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed datingInfo {datingInfosTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed datingInfo {datingInfosTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
datingInfoValues = {}
|
||||
|
|
@ -48,9 +33,9 @@ for index, row in datingInfosTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '###{{new_line}}###' in str(value):
|
||||
print('replaced curly braces')
|
||||
value = str(value).replace('###{{new_line}}###', '')
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,10 +60,10 @@ for index, row in datingInfosTable.iterrows():
|
|||
datingInfo = Entity(api=api, fields=datingInfoValues, bundle_id='b9cfb95e627e1710cf8d736d4ca5db64') #Dating Information Assignment
|
||||
api.save(datingInfo)
|
||||
|
||||
print(f'Created datingInfo {index}: {datingInfo.uri} of {len(datingInfosTable)}')
|
||||
print(f'Created mark dating info {index}: {datingInfo.uri} of {len(datingInfosTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedDatingInfo.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': datingInfoValues['f74baaf58e49393cc89d6616ee197901'][0], 'uri': datingInfo.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing mark dating info')
|
||||
|
|
|
|||
|
|
@ -1,97 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
|
||||
tableName = "c__6760_markenart"
|
||||
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
|
||||
case 'f__6894_anbr_ort':
|
||||
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
|
||||
case 'f__6760_markenart':
|
||||
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
|
||||
case 'f__684c_bedeutung_bz':
|
||||
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
|
||||
case 'f__684a_bedeutung_mz':
|
||||
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
|
||||
case 'f__6770_rosenb_nr_':
|
||||
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,44 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importBirth(api, engine):
|
||||
print('Importing birth...')
|
||||
|
||||
test = False
|
||||
tableName = "c__3270_geb_datum"
|
||||
bundleId = 'b54049ec931bffb62359b4bdb11435fc'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -87,12 +73,12 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created birth {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing birth')
|
||||
|
|
|
|||
|
|
@ -5,45 +5,28 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importDeath(api, engine):
|
||||
print('Importing death...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
test = True
|
||||
test = False
|
||||
|
||||
tableName = "c__3330_todes_dat_"
|
||||
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -90,11 +76,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created death {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
break
|
||||
|
|
|
|||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importDating(api, engine):
|
||||
print('Importing dating...')
|
||||
|
||||
tableName = "c__8100_datum"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -65,7 +50,7 @@ for index, row in sqlTable.iterrows():
|
|||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
uuid = value[0]
|
||||
fUuid = value[0]
|
||||
case 'f__8100_datum':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__81bm_bem__datierung':
|
||||
|
|
@ -77,10 +62,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created dating {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,23 +5,8 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importGoldsmithRelation(api, engine):
|
||||
print('Importing goldsmith relation...')
|
||||
|
||||
test = False
|
||||
|
||||
|
|
@ -29,21 +14,19 @@ tableName = "c__3007_bezieh__zu_gs"
|
|||
bundleId = 'bef43e8a958e6a9bee04534b3841f6a0'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -82,12 +68,12 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created goldsmith relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing goldsmith relation')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importClient(api, engine):
|
||||
print('Importing client...')
|
||||
|
||||
|
||||
tableName = "c__410a_auftraggeber"
|
||||
bundleId = 'b85d9987d762fb4e8ce89a69b0b8de31'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -81,10 +67,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created client {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
def importMentioned(api, engine):
|
||||
print('Importing mentioned...')
|
||||
|
||||
test = False
|
||||
tableName = "c__7060_erwaehnt__datum_"
|
||||
bundleId = 'b04b1756b09ba3260de278824332ad6c'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -93,11 +79,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created mentioned {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importNumDating(api, engine):
|
||||
print('Importing num dating...')
|
||||
|
||||
tableName = "c__5064_num__dat_"
|
||||
bundleId = 'b9cfb95e627e1710cf8d736d4ca5db64' # Dating Information Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.iloc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -65,7 +50,7 @@ for index, row in sqlTable.iterrows():
|
|||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f74baaf58e49393cc89d6616ee197901'] = value # UUID
|
||||
uuid = value[0]
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['f0da3b36d16e16602bb550aff7d36297'] = value # Date
|
||||
case 'f__50bm_bem__datierung':
|
||||
|
|
@ -77,10 +62,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(tableName)}')
|
||||
print(f'Created num dating {index}: {entity.uri} of {len(tableName)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
|
|||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importOriginAssignment(api, engine):
|
||||
print('Importing origin assignment...')
|
||||
test = False
|
||||
tableName = "c__3204_herkunft"
|
||||
bundleId = 'b1d5be81f8b3dfbf9d6d90379cc0a14f'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -77,13 +62,13 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created origin assignment {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing origin assignments')
|
||||
|
|
|
|||
|
|
@ -5,41 +5,27 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importWorkshops(api, engine):
|
||||
print('Importing workshops...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = "c__nfws_forts_werkst_"
|
||||
bundleId = 'beb03bccbdffdd31567df370303c1e2d'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedWorkshops.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
test = False
|
||||
# Load sources table
|
||||
workshopsTable = pd.read_sql_table('c__nfws_forts_werkst_', con=engine)
|
||||
|
||||
workshopValues = {}
|
||||
workshopsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create workshops
|
||||
for index, row in workshopsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and workshopsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
if index < len(processedRows) and workshopsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed workshop {workshopsTable.iloc[index, 0]}')
|
||||
print(f'Skipping already processed entity {workshopsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
workshopValues = {}
|
||||
|
|
@ -49,6 +35,9 @@ for index, row in workshopsTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -71,16 +60,16 @@ for index, row in workshopsTable.iterrows():
|
|||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
workshop = Entity(api=api, fields=workshopValues, bundle_id='beb03bccbdffdd31567df370303c1e2d')
|
||||
workshop = Entity(api=api, fields=workshopValues, bundle_id=bundleId)
|
||||
api.save(workshop)
|
||||
|
||||
print(f'Created workshop {index}: {workshop.uri} of {len(workshopsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedWorkshops.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': workshopValues['fa7c19f4d03d7d15acf588460654bbf2'][0], 'uri': workshop.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing workshops')
|
||||
|
|
|
|||
207
21_importArtifacts.py
Normal file
207
21_importArtifacts.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importArtifacts(api, engine):
|
||||
print('Importing artifacts...')
|
||||
|
||||
tableName = "c__obj"
|
||||
bundleId = 'bd30c2c64a3caa8bb1628c780c3f24bb'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load artifacts table
|
||||
artifactsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create artifacts
|
||||
for index, row in artifactsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artifactsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact {artifactsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artifactValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
|
||||
case 'f__5000_obj_dok_nr_':
|
||||
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
|
||||
docId = value[0]
|
||||
case 'f__500n_ngk_nr_':
|
||||
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
|
||||
case 'f__5130_entst_ort':
|
||||
# We map productions place to Production Place Assignment entity.
|
||||
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
|
||||
case 'f__5200_obj_titel':
|
||||
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
|
||||
case 'f__5210_status':
|
||||
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
|
||||
case 'f__5220_gattung':
|
||||
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
|
||||
case 'f__5223_form__attribut':
|
||||
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
|
||||
case 'f__5226_art':
|
||||
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
|
||||
case 'f__523f_funktion':
|
||||
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
|
||||
case 'f__5240_formtyp':
|
||||
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
|
||||
case 'f__524g_gestalt':
|
||||
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
|
||||
case 'f__5362_hoehe':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5364_breite':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5366_tiefe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5368_laenge':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5370_durchmesser':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5380_gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__538h_hist__gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__55ng_darst__schlagw_':
|
||||
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
|
||||
case 'f__5bes_beschreibung':
|
||||
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
|
||||
case 'f__5ges_geschichte':
|
||||
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
|
||||
case 'f__68an_abdruck_nr_':
|
||||
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__stwv_statwerkverz':
|
||||
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
|
||||
case 'f__9990_kommentar':
|
||||
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Production Place Assignment
|
||||
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
|
||||
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
|
||||
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Artifact and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
|
||||
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
|
||||
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
|
||||
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Artifact
|
||||
artifact = Entity(api=api, fields=artifactValues, bundle_id=bundleId)
|
||||
api.save(artifact)
|
||||
|
||||
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finished importing artifacts')
|
||||
|
|
@ -1,213 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtifacts.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['artifactId', 'uuid', 'uri'])
|
||||
|
||||
# Load artifacts table
|
||||
artifactsTable = pd.read_sql_table('c__obj', con=engine)
|
||||
|
||||
# Create artifacts
|
||||
for index, row in artifactsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artifactsTable.iloc[index, 0] == processedRows.iloc[index, 0]:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artifact {artifactsTable.iloc[index, 0]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artifactValues = {}
|
||||
creationValues = {}
|
||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||
imageValues = {}
|
||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
||||
productionPlaceAssignmentValues = {'f40cc95db3ccaa1dbbf27294338d9f07': [str(uuid.uuid4())]}
|
||||
dimensionValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'] = value # UUID
|
||||
case 'f__5000_obj_dok_nr_':
|
||||
artifactValues['f7e2a8a273ab3d577bf5854902550c09'] = value # Document Identifier
|
||||
docId = value[0]
|
||||
case 'f__500n_ngk_nr_':
|
||||
artifactValues['f6e041bd0b16b21596849732c01cb168'] = value # NGK Number
|
||||
case 'f__5130_entst_ort':
|
||||
# We map productions place to Production Place Assignment entity.
|
||||
productionPlaceAssignmentValues['f43f9589eef324fb12c26226dfe94246'] = value # Production Place
|
||||
case 'f__5200_obj_titel':
|
||||
artifactValues['fd06dcc49a29b1a63fa4a789ec17e5c6'] = value # Title
|
||||
case 'f__5210_status':
|
||||
artifactValues['f35c9c9b0991729c36acb41645fe81d1'] = value # Status
|
||||
case 'f__5220_gattung':
|
||||
artifactValues['f2fd7f8a81d5eb1a20371b9acfd1ab59'] = value # Genre
|
||||
case 'f__5223_form__attribut':
|
||||
artifactValues['f05bbd6e29a7d303e4370b04c12b3f75'] = value # Formattribute
|
||||
case 'f__5226_art':
|
||||
artifactValues['f593fa773a6ea458101ba2325a18abbe'] = value # artifact type
|
||||
case 'f__523f_funktion':
|
||||
artifactValues['f476ba24127d4dff1018acebf45a05f6'] = value # Function
|
||||
case 'f__5240_formtyp':
|
||||
artifactValues['fa7cfd9dbb3d2517c1898b3051d8dbed'] = value # Shape
|
||||
case 'f__524g_gestalt':
|
||||
artifactValues['f8309a21fa79bc6bd2506060b419d2df'] = value # Figure
|
||||
case 'f__5362_hoehe':
|
||||
# We map dimensions to Dimension entity.
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['height'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5364_breite':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['width'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5366_tiefe':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['depth'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5368_laenge':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['length'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5370_durchmesser':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['diameter'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
|
||||
case 'f__5380_gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__538h_hist__gewicht':
|
||||
dimensionValues.setdefault(key, {})['f31e9c7e2de5549daea1790a74615288'] = ['historical_weight'] # Type
|
||||
dimensionValues[key]['f3f805d270890837a6493e7e60a96487'] = value # Dimension
|
||||
dimensionValues[key]['f802fd7bf45be523a9b188411a591420'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__55ng_darst__schlagw_':
|
||||
artifactValues['f6abbd4f39a6f79de5de2b14b98e51ff'] = value # Keywords
|
||||
case 'f__5bes_beschreibung':
|
||||
artifactValues['f26ad2bc1f084478cd7011f7b8451526'] = value # Description
|
||||
case 'f__5ges_geschichte':
|
||||
artifactValues['f40120d7c13ef02b486c69245f6c2306'] = value # History
|
||||
case 'f__68an_abdruck_nr_':
|
||||
artifactValues['fd3740649cc06f45677eb0546908cdac'] = value # Print Number
|
||||
case 'f__8540_repro_nr_':
|
||||
# We map images to Image entity
|
||||
for item in value:
|
||||
if item is not None:
|
||||
# Replace dir paths in name
|
||||
item = item.replace('Objekte/', 'objects/')
|
||||
item = item.replace('Objekte\\', 'objects/')
|
||||
item = item.replace('Objekte3\\', 'objects/')
|
||||
item = item.replace('Objekte4\\', 'objects/')
|
||||
item = item.replace('Objekte5\\', 'objects/')
|
||||
item = item.replace('objekte5\\', 'objects/')
|
||||
item = item.replace('Marken\\', 'marks/')
|
||||
item = item.replace('Marken/', 'marks/')
|
||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||
case 'f__stwv_statwerkverz':
|
||||
artifactValues['fee0db94d62fae6370a89ff4757ff539'] = value # Catalogue_of_Works
|
||||
case 'f__9990_kommentar':
|
||||
artifactValues['fefe289aa0c9563a153be6da7d37e3ff'] = value # Comment
|
||||
case 'f__9900_datum_erfassung':
|
||||
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
||||
case 'f__99ae_datum_aenderung':
|
||||
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
||||
case 'f__efbm_bem_erfassung':
|
||||
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
||||
case 'f__ptxt_plug_in_text':
|
||||
artifactValues['ffb8b04e8d57929a596fc32d6a84d07d'] = value # Plugin text
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Production Place Assignment
|
||||
productionPlaceAssignment = Entity(api=api, fields=productionPlaceAssignmentValues, bundle_id='b13bc6dc04d4bbdafb9536987eb43244')
|
||||
api.save(productionPlaceAssignment) # Kai says, we can save all entities at once, but I save it instantly
|
||||
|
||||
|
||||
# Create Dimension entities and add their UUIDs to a list
|
||||
# because we link Artifact and Dimension over the UUID
|
||||
dimension = []
|
||||
for key, value in dimensionValues.items():
|
||||
dimensionItem = Entity(api=api, fields=value, bundle_id='b73258adf62f35bd1be3fa2863fab558')
|
||||
api.save(dimensionItem)
|
||||
dimension.append(value['f802fd7bf45be523a9b188411a591420'][0])
|
||||
|
||||
# Create Image entities and add their UUIDs to a list
|
||||
# because we link Image Assignment and Image over the UUID
|
||||
imageList = []
|
||||
for key, value in imageValues.items():
|
||||
imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3')
|
||||
api.save(imageItem)
|
||||
imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0])
|
||||
|
||||
# Create Image Assignment entities and add their UUIDs to a list
|
||||
# because we link Artifact and Image Assignment over the UUID
|
||||
if imageList:
|
||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
||||
api.save(imageAssignment)
|
||||
|
||||
# Create Digitisation Process
|
||||
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
||||
api.save(digitisationProcess)
|
||||
|
||||
# Add the field values for reference
|
||||
# UWAGA! Is the Value Production Place Assignment Correct? UWAGA!
|
||||
artifactValues['f2676a0fb8db6ab62235328ae7c7a4b3'] = [productionPlaceAssignmentValues['f40cc95db3ccaa1dbbf27294338d9f07'][0]] # Production Place Assignment
|
||||
artifactValues['fc700eb3f24f4f2a6c165128aa7117f1'] = dimension # Dimension
|
||||
artifactValues['f7af1cd9c77448281dd7ecf29ba57e3e'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
||||
artifactValues['f5a3f90d920da3db4cfdbaa6264b0e89'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||
|
||||
# Create Artifact
|
||||
artifact = Entity(api=api, fields=artifactValues, bundle_id='bd30c2c64a3caa8bb1628c780c3f24bb')
|
||||
api.save(artifact)
|
||||
|
||||
print(f'Created artifact {index}: {artifact.uri} of {len(artifactsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'artifactId': artifactValues['f7e2a8a273ab3d577bf5854902550c09'][0], 'uuid': artifactValues['feb48c9a7efc444449b4b8defcd6d8bd'][0], 'uri': artifact.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtifacts.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -5,44 +5,26 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
def importArtifactRelation(api, engine):
|
||||
print('Importing artifact relation...')
|
||||
|
||||
tableName = "c__5007_beziehung"
|
||||
bundleId = 'bf4a13ee46de57819f88834caaddc301' # Artifact relation assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.ioc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed artifact relation {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -65,7 +50,7 @@ for index, row in sqlTable.iterrows():
|
|||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['ff7ebd530eb53efc489e80d9bbef293e'] = value # UUID
|
||||
uuid = value[0]
|
||||
fUuid = value[0]
|
||||
case 'f__5008_bez_obj_nr_':
|
||||
entityValues['f39d0e5207a375070d84b958017a62e8'] = value # Artifact Document Identifier
|
||||
case 'f__bebm_bem_beziehung':
|
||||
|
|
@ -79,10 +64,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created artifact relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact relation')
|
||||
|
|
|
|||
|
|
@ -5,50 +5,38 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
def importArtistAssignment(api, engine):
|
||||
print('Importing artist assignment...')
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
tableName = "c__ob30_bez_kuenstler"
|
||||
bundleId = 'bc8826cc7d9c9373ce71cfc0251c2a4f'
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processedArtistAssignment.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
artistRelationsTable = pd.read_sql_table('c__ob30_bez_kuenstler', con=engine)
|
||||
|
||||
artistRelationValues = {}
|
||||
artistRelationsTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create artistRelations
|
||||
for index, row in artistRelationsTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and artistRelationsTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed artistRelation {artistRelationsTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed artistAssignment {artistRelationsTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
artistRelationValues = {}
|
||||
for key, value in row.items():
|
||||
print('value: ', value)
|
||||
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -71,13 +59,13 @@ for index, row in artistRelationsTable.iterrows():
|
|||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
|
||||
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id='bc8826cc7d9c9373ce71cfc0251c2a4f')
|
||||
artistRelation = Entity(api=api, fields=artistRelationValues, bundle_id=bundleId)
|
||||
api.save(artistRelation)
|
||||
|
||||
print(f'Created artistRelation {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
|
||||
print(f'Created artist assignment {index}: {artistRelation.uri} of {len(artistRelationsTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processedArtistAssignment.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': artistRelationValues['fc150259d31fea8a3f992e7beb901fa4'][0], 'uri': artistRelation.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artist assignment')
|
||||
|
|
|
|||
82
25_importMarkInformation.py
Normal file
82
25_importMarkInformation.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importMarkInformation(api, engine):
|
||||
print('Importing mark information...')
|
||||
|
||||
tableName = "c__6760_markenart"
|
||||
bundleId = 'bc7ce6906f78e760f22ff13226b1332d' # Mark information assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
continue
|
||||
case 'f__uuid':
|
||||
entityValues['f3b8aaf7e79229b4da8214d491e375ec'] = value # UUID
|
||||
fUuid = value[0]
|
||||
case 'f__5064_num__dat_':
|
||||
entityValues['fe6921098808e68cae68f0858411826c'] = value # Artist Assignment
|
||||
case 'f__6894_anbr_ort':
|
||||
entityValues['f694ed57271ab7be57249e0ee5c41ba4'] = value # Location
|
||||
case 'f__6700_mar_dok_nr_':
|
||||
entityValues['fdd3380d4a11654f32687429796cabc3'] = value # Mark Document Number
|
||||
case 'f__6760_markenart':
|
||||
entityValues['fd381aa9c3ebdf417e6cbccd60ede279'] = value # Mark Type
|
||||
case 'f__684c_bedeutung_bz':
|
||||
entityValues['f4947de52885f517baef0cdf3cb53b61'] = value # Meaning Inspection Mark
|
||||
case 'f__684a_bedeutung_mz':
|
||||
entityValues['f542c4c945725c6fdc5ab6409a877f02'] = value # Meaning Master Mark
|
||||
case 'f__6770_rosenb_nr_':
|
||||
entityValues['f0ff7020a9c25ea2706875837fe61b04'] = value # Rosenberg Number
|
||||
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created mark information {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('default')
|
||||
|
||||
|
||||
tableName = "c__8490_fotograf"
|
||||
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
entityValues = {}
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
|
||||
docUuid = value[0]
|
||||
case 'f__8490_fotograf':
|
||||
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
|
||||
case 'f__8494_aufn_datum':
|
||||
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
|
||||
case 'f__8470_aufnahmenr_':
|
||||
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
|
||||
case 'f__849r_repro_datei': # Image Assignment
|
||||
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
75
26_importPhotographer.py
Normal file
75
26_importPhotographer.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import uuid # For UUID creation
|
||||
from initDb import initDb # For database initialization
|
||||
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
||||
import os # For environment variable loading
|
||||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
def importPhotographer(api, engine):
|
||||
print('Importing photographer...')
|
||||
|
||||
tableName = "c__8490_fotograf"
|
||||
bundleId = 'b821fb6c518948b7f40d17803b6ce293' # Photographer assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
||||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
for key, value in row.items():
|
||||
# For every column in row...
|
||||
if (value is None) or (value == ''):
|
||||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
else:
|
||||
# ...Or parse to array
|
||||
value = [value]
|
||||
# Map columns to fields. We use assignments for reification.
|
||||
docId = ''
|
||||
match key:
|
||||
case 'id':
|
||||
docId = value[0]
|
||||
case 'f__uuid':
|
||||
entityValues['f6c3c3e35af2f2073fd517aabf88fa7c'] = value # UUID
|
||||
docUuid = value[0]
|
||||
case 'f__8490_fotograf':
|
||||
entityValues['fe8f8b235f896862b74caa0fa8f3682d'] = value # Photographer
|
||||
case 'f__8494_aufn_datum':
|
||||
entityValues['f12c7538643314f0f46ba76a5140a87d'] = value # Recording Date
|
||||
case 'f__8470_aufnahmenr_':
|
||||
entityValues['ff6ec986fb4cc5a2f34deb7144f2f817'] = value # Recording number
|
||||
case 'f__849r_repro_datei': # Image Assignment
|
||||
entityValues['f24a609593559a904a0a0f2e215db584'] = value # Reproduction Number
|
||||
case _:
|
||||
print(f'{key} is not a valid field, skipping.')
|
||||
|
||||
# Create Material
|
||||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created Photographer {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': docUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finished importing photographer')
|
||||
|
|
@ -5,31 +5,14 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
|
||||
def importArtifactToArtistRelationRelation(api, engine):
|
||||
print('importing artifact to artist relation relation')
|
||||
tableName = "r__obj__ob30_bez_kuenstler"
|
||||
bundleId = 'b8b4e3b3fb7e3b83cec037aea51814bf' # Artifact to artist relation relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +22,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
|
|
@ -50,6 +33,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -73,10 +59,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created artifact to artist relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to artist relation relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToClientAssignmentRelation(api, engine):
|
||||
print('importing artifact to client assignment relation')
|
||||
|
||||
tableName = "r__obj__410a_auftraggeber"
|
||||
bundleId = 'b20d53dcc2bad79457251a581611b43f' # Artifact to client assignment relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,10 +62,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Client Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to client assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
|
||||
def importArtifactToInspectionMarkLocationRelation(api, engine):
|
||||
print('importing artifact to inspection mark location relation')
|
||||
tableName = "r__obj__67b0_bz_dok_nr"
|
||||
bundleId = 'b7fe64e0326c107a1a4a705be08392fa' # Artifact to inspection mark location relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
for key, value in row.items():
|
||||
|
|
@ -51,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,10 +60,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Inspection Mark Location Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to inspection mark location relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,14 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
|
||||
def importArtifactToLiteratureReferenceAssignmentRelation(api, engine):
|
||||
print('importing artifact to literature reference assignment relation')
|
||||
tableName = "r__obj__8330_lit_kurzt_"
|
||||
bundleId = 'b6a7b7aad942ecff4b3beadf907d51c8' # Artifact to literature relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +22,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +34,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,10 +60,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to literature reference assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToMarkInformationAssignmentRelation(api, engine):
|
||||
print('importing artifact to mark information assignment relation')
|
||||
|
||||
tableName = "r__obj__6760_markenart"
|
||||
bundleId = 'b7112c2a7ea92a1d263d42d5572a05fc' # Artifact to mark information assignment relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,10 +61,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Mark Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to mark information assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToMaterialRelation(api, engine):
|
||||
print('importing artifact to material relation')
|
||||
|
||||
tableName = "r__obj__5280_material"
|
||||
bundleId = 'b825aff7df3d48bd875e2a081c796305' # Artifact to material relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,10 +61,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Material Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to material relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToNumericeDateRelation(api, engine):
|
||||
print('importing artifact to numeric date relation')
|
||||
|
||||
tableName = "r__obj__5064_num__dat_"
|
||||
bundleId = 'b795fcfa6c684fa707c236c4b0882ad7' # Artifact to numeric date relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,10 +62,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Numeric Date Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to numeric date relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToPhotographRelation(api, engine):
|
||||
print('importing artifact to photograph relation')
|
||||
|
||||
tableName = "r__obj__8490_fotograf"
|
||||
bundleId = 'b63cd713e60b6e5bc3b2235dffc0dba9' # Artifact to photograph relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,10 +61,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Photograph Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to photograph relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToRelationRelation(api, engine):
|
||||
print('importing artifact to relation relation')
|
||||
|
||||
tableName = "r__obj__5007_beziehung"
|
||||
bundleId = 'bb878dd9c44c83a70fbd151f1dc06b4d' # Artifact to relation relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,10 +62,10 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to relation relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToSourceRelation(api, engine):
|
||||
print('importing artifact to source relation')
|
||||
|
||||
tableName = "r__obj__8130_que_kurzt_"
|
||||
bundleId = 'bcf720dc0b796043915d6da536414451' # Artifact to source relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,9 +61,9 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
print('finish')
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
print('finished importing artifact to source relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtifactToStatusAdministratorRelation(api, engine):
|
||||
print('importing artifact to status administrator relation')
|
||||
test = False
|
||||
tableName = "r__obj__ob28_status_verwalt_"
|
||||
bundleId = 'bd4922f100ab534fc1213f767770ed6d' # Artifact to status adminstrator relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -39,9 +23,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -51,6 +35,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -74,13 +61,13 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artifact to Status Administrator Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artifact to status administrator relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToBirthRelation(api, engine):
|
||||
print('importing artist to birth relation')
|
||||
|
||||
test = False
|
||||
tableName = "r__kue__3270_geb_datum"
|
||||
bundleId = 'b82e4404cdf641db57c03d7e3b23947c' # Artist to birth relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,12 +63,12 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Birth Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
|
||||
print('finish')
|
||||
print('finished importing artist to birth relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToDeathRelation(api, engine):
|
||||
print('importing artist to death relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__3330_todes_dat_"
|
||||
bundleId = 'b91ed11c8063a363063582f001a3f5a2' # Artist to death relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,11 +62,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Death Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to death relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToGoldsmithRelation(api, engine):
|
||||
print('importing artist to goldsmith relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__3007_bezieh__zu_gs"
|
||||
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Goldsmith Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to goldsmith relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToLiteratureReferenceRelation(api, engine):
|
||||
print('importing artist to literature reference relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__8330_lit_kurzt_"
|
||||
bundleId = 'b7a87e3f3d5f671c1f163101bff30eb6' # Artist to literature relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to literature reference relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToMentionedRelation(api, engine):
|
||||
print('importing artist to mentioned relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__7060_erwaehnt__datum_"
|
||||
bundleId = 'bc2b0ddca583320a56a67b304dc0a045' # Artist to mentioned relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Mentioned Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to mentioned relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToOriginRelation(api, engine):
|
||||
print('importing artist to origin relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__3204_herkunft"
|
||||
bundleId = 'b5cf6b3e6fd2e4b5575da4347999d6ea' # Artist to origin relation
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,11 +62,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Origin Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to origin relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importArtistToWorkshopRelation(api, engine):
|
||||
print('importing artist to workshop relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__kue__nfws_forts_werkst_"
|
||||
bundleId = 'becb95326a733bdbd0c2dd3d36e3399d' # Artist to workshop relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Artist to Workshop Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing artist to workshop relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importInspectionMarkDatingInformationAssignmentRelation(api, engine):
|
||||
print('importing inspection mark dating information assignment relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__bez__68dm_datierung_marke"
|
||||
bundleId = 'b1fee832598b2d42ed17a927dad43b90' # Inspection Mark to dating information assignment relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -76,11 +60,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Dating Information Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing inspection mark dating information assignment relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importInspectionMarkRelationRelation(api, engine):
|
||||
print('importing inspection mark relation relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__bez__67b7_beziehung"
|
||||
bundleId = 'bc8dcd233a9b539db407bad219715988' # Inspection Mark Relation Relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -77,11 +64,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Relation Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing inspection mark relation relation')
|
||||
|
|
|
|||
|
|
@ -5,31 +5,15 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importInspectionMarkToLiteratureReferenceRelation(api, engine):
|
||||
print('importing inspection mark to literature reference relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__bez__8330_lit_kurzt_"
|
||||
bundleId = 'b32fc778865a1ffd5b165515425f38c6' # Inspection Mark to Dating Assignment
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uri'])
|
||||
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Inspection Mark to Literature Reference Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing inspection mark to literature reference relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importLiteratureToJournalRelation(api, engine):
|
||||
print('importing literature to journal relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__lit__8310_zeitschrift"
|
||||
bundleId = 'b6c2ce0add1e7999f48d66b7ef1a4a26' # Literature to journal relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Literature to Journal Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing literature to journal relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importLiteratureToParentPublicationRelation(api, engine):
|
||||
print('importing literature to parent publication relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__lit__8292_uebergeordn_publ_"
|
||||
bundleId = 'b2adaaa15714d83ea83cd3333af437df' # Literature to parent publication relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Literature to Parent Publication Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing literature to parent publication relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importMarkToDatingRelation(api, engine):
|
||||
print('importing mark to dating relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__mar__68dm_datierung_marke"
|
||||
bundleId = 'b105b749b25de3aa55329b82fe18c18d' # Mark to dating relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -76,11 +60,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Dating Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing mark to dating relation')
|
||||
|
|
|
|||
|
|
@ -5,34 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
test = True
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importMarkToLiteratureRelation(api, engine):
|
||||
print('importing mark to literature relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__mar__8330_lit_kurzt_"
|
||||
bundleId = 'bd58cc7d59ce9f3e593e758a28dfcf4a' # Mark to literature relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -42,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -54,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -77,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Literature Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing mark to literature relation')
|
||||
|
|
|
|||
|
|
@ -5,32 +5,16 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importMarkToMarkInformationRelation(api, engine):
|
||||
print('importing mark to mark information relation')
|
||||
test = False
|
||||
tableName = "r__mar__6760_markenart"
|
||||
bundleId = 'b241e8063b9259428967fa4ff134a8bd' # Mark to mark information relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -40,9 +24,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -52,6 +36,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -75,11 +62,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Mark Information Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing mark to mark information relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importMarkToSourceRelation(api, engine):
|
||||
print('importing mark to source relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__mar__8130_que_kurzt_"
|
||||
bundleId = 'b0edbf644e07765a5ae319802ec0289b' # Mark to source relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Mark to Source Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing mark to source relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importSourceToDateRelation(api, engine):
|
||||
print('importing source to date relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__que__8100_datum"
|
||||
bundleId = 'b4b8ba242075bf2c778894911c7f3264' # Source to date relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Source to Date Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing source to date relation')
|
||||
|
|
|
|||
|
|
@ -5,33 +5,17 @@ import os # For environment variable loading
|
|||
from dotenv import load_dotenv # For environment variable loading
|
||||
import pandas as pd # For dataframe handling
|
||||
|
||||
# Initialize the database
|
||||
print('Initializing the database...')
|
||||
engine, metadata, Session = initDb(True, './schemas/')
|
||||
if engine == False:
|
||||
print('Database initialization failed.')
|
||||
exit()
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the WissKI API
|
||||
print('Initializing the WissKI API...')
|
||||
api_url = os.getenv('API_URL')
|
||||
auth = (os.getenv('API_USERNAME'), os.getenv('API_PASSWORD'))
|
||||
headers = {"Cache-Control": "no-cache"}
|
||||
api = Api(api_url, auth, headers)
|
||||
api.pathbuilder = api.get_pathbuilder('relations')
|
||||
|
||||
def importSourceToLiteratureReferenceAssignmentRelation(api, engine):
|
||||
print('importing source to literature reference assignment relation')
|
||||
test = False
|
||||
|
||||
tableName = "r__que__8330_lit_kurzt_"
|
||||
bundleId = 'bed2f320214a0344287c6c4db40e9331' # Source to literature reference assignemnt relation
|
||||
|
||||
try:
|
||||
processedRows = pd.read_csv(f'./logs/processed-{tableName}.csv')
|
||||
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
||||
except FileNotFoundError:
|
||||
processedRows = pd.DataFrame(columns=['docId', 'uuid', 'uri'])
|
||||
processedRows = pd.DataFrame(columns=[ 'id', 'uuid', 'uri'])
|
||||
|
||||
# Load sources table
|
||||
sqlTable = pd.read_sql_table(tableName, con=engine)
|
||||
|
|
@ -41,9 +25,9 @@ entityValues = {}
|
|||
# Create entities
|
||||
for index, row in sqlTable.iterrows():
|
||||
# For every row in table...
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
||||
# skip if already processed
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||
print(f'Skipping already processed entity {sqlTable.loc[index, "id"]}')
|
||||
continue
|
||||
# Create Entity property dicts
|
||||
entityValues = {}
|
||||
|
|
@ -53,6 +37,9 @@ for index, row in sqlTable.iterrows():
|
|||
# skip if cell has no value
|
||||
continue
|
||||
# Properties of an entity have to be an array, so...
|
||||
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
||||
value = str(value).replace('###{{new_line}}###', '&')
|
||||
value = str(value).replace(' & ', '&')
|
||||
if '&' in str(value):
|
||||
# ...Explode "&"-separated values to array items
|
||||
value = [x.strip() for x in str(value).split('&')]
|
||||
|
|
@ -76,11 +63,11 @@ for index, row in sqlTable.iterrows():
|
|||
entity = Entity(api=api, fields=entityValues, bundle_id=bundleId)
|
||||
api.save(entity)
|
||||
|
||||
print(f'Created entity {index}: {entity.uri} of {len(sqlTable)}')
|
||||
print(f'Created Source to Literature Reference Assignment Relation {index}: {entity.uri} of {len(sqlTable)}')
|
||||
|
||||
# Write log
|
||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||
processedRows = processedRows._append({'id': row['id'], 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
||||
if test:
|
||||
exit()
|
||||
print('finish')
|
||||
print('finished importing source to literature reference assignment relation')
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ def initDb(_production, schemaDir):
|
|||
return (False, False)
|
||||
|
||||
if _production:
|
||||
dbName = 'ngk'
|
||||
dbName = 'ngk_data_alt'
|
||||
else:
|
||||
dbName = 'testngk'
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ def createClass(name, columns):
|
|||
tableName = name.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
|
||||
|
||||
# Transform columns and add prefix
|
||||
attrs = {'__tablename__': tableName}
|
||||
attrs = {'__tablename__': tableName, '__table_args__': {'extend_existing': True}}
|
||||
attrs.update({prop.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')','_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_'): (Column(String(36), primary_key=True) if prop.lower() == 'uuid' else Column(Text)) for prop in columns})
|
||||
|
||||
# If 'uuid' is not in columns, add 'id' as primary key
|
||||
|
|
@ -30,9 +30,6 @@ def createClass(name, columns):
|
|||
# Create SQLAlchemy class
|
||||
cls = type(className, (Base,), attrs)
|
||||
|
||||
# Define the table with extend_existing=True
|
||||
Table(tableName, Base.metadata, extend_existing=True)
|
||||
|
||||
return cls
|
||||
|
||||
def initClassesFromSchemas(schemaDir):
|
||||
|
|
|
|||
|
|
@ -3,4 +3,4 @@ pandas
|
|||
pymysql
|
||||
sqlalchemy
|
||||
tqdm
|
||||
wisski_py
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue