112 lines
5.9 KiB
Python
112 lines
5.9 KiB
Python
import uuid # For UUID creation
|
|
from initDb import initDb # For database initialization
|
|
from wisski.api import Api, Pathbuilder, Entity # For WissKI API
|
|
import os # For environment variable loading
|
|
from dotenv import load_dotenv # For environment variable loading
|
|
import pandas as pd # For dataframe handling
|
|
|
|
def importLiterature(api, engine):
|
|
print('Importing literature...')
|
|
tableName = 'c__lit'
|
|
bundleId = 'bafe9c3d3b640d4d1a16b104f367ac91'
|
|
|
|
try:
|
|
processedRows = pd.read_csv(f'./logs/{tableName}.csv')
|
|
except FileNotFoundError:
|
|
processedRows = pd.DataFrame(columns=['id', 'docId', 'uuid', 'uri'])
|
|
|
|
# Load sources table
|
|
literaturesTable = pd.read_sql_table(tableName, con=engine)
|
|
|
|
|
|
# Create literatures
|
|
for index, row in literaturesTable.iterrows():
|
|
# For every row in table...
|
|
if index < len(processedRows) and literaturesTable.loc[index, 'id'] == processedRows.loc[index, 'id']:
|
|
# skip if already processed
|
|
print(f'Skipping already processed literature {literaturesTable.loc[index, "id"]}')
|
|
continue
|
|
# Create Entity property dicts
|
|
literatureValues = {}
|
|
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
|
for key, value in row.items():
|
|
# For every column in row...
|
|
if (value is None) or (value == ''):
|
|
# skip if cell has no value
|
|
continue
|
|
# Properties of an entity have to be an array, so...
|
|
value = str(value).replace('&###{{new_line}}###'.format(), '&')
|
|
value = str(value).replace('###{{new_line}}###', '&')
|
|
value = str(value).replace(' & ', '&')
|
|
if '&' in str(value):
|
|
# ...Explode "&"-separated values to array items
|
|
value = [x.strip() for x in str(value).split('&')]
|
|
else:
|
|
# ...Or parse to array
|
|
value = [value]
|
|
# Map columns to fields. We use assignments for reification.
|
|
match key:
|
|
case 'id':
|
|
continue
|
|
case 'f__uuid':
|
|
literatureValues['fd58e0884f7cf63f8436c2789fcd2745'] = value # UUID
|
|
case 'f__9990_kommentar':
|
|
literatureValues['f3208633f7767cc9f5e44e768818df20'] = value # Comment
|
|
case 'f__8270_verfasser':
|
|
literatureValues['f60a88060c75068b4bf2eefd5221793f'] = value # Creator
|
|
case 'f__8324_ersch_jahr':
|
|
literatureValues['fdae7bd743ae58bf623feca3a26bcf6c'] = value # Date
|
|
case 'f__8280_hrsg':
|
|
literatureValues['fd0bc706876adee304892f8f9e34567f'] = value # Editor
|
|
case 'f__8346_signatur':
|
|
literatureValues['fb434c214be21f7e82a851d6524c2850'] = value # Identifier
|
|
case 'f__9970_schlagwort':
|
|
literatureValues['f1a55055944adf5d4e866a1768633a7f'] = value # Keyword
|
|
case 'f__8200_lit_dok_nr_':
|
|
literatureValues['f3bdd54b9ea5808a571200e9c60e103e'] = value # Literature Document Identifier
|
|
case 'f__9971_sw_goldschmied':
|
|
literatureValues['f21a286fec5d48ea238c10877ee2b0db'] = value # Mentioned Actor
|
|
case 'f__8308_bibl_zusatz':
|
|
literatureValues['f1674a743a13a3d74b0c6ebb2cf0043f'] = value # Note
|
|
case 'f__8319_seitenangabe':
|
|
literatureValues['f0d1716a40498f52abd4a6522aa5f3ef'] = value # Pages
|
|
case 'f__8320_ersch_ort':
|
|
literatureValues['fc3cafc0f542cef2a0e1189873ff58a3'] = value # Publication Place
|
|
case 'f__8300_serientitel':
|
|
literatureValues['f660f34eb7091c1b0f4b492e49a0e71b'] = value # Series Title
|
|
case 'f__8330_lit_kurzt_':
|
|
literatureValues['f84416d4380cdd30e8b9fcea57f58957'] = value # Shorttitle
|
|
case 'f__8307_titelzusatz':
|
|
literatureValues['f8521679ac8f6441ddb086f1c5ed7528'] = value # Subtitle
|
|
case 'f__8290_titel':
|
|
literatureValues['fa1ae40cc9940569d5a1e3ea13e33488'] = value # Title
|
|
case 'f__8260_art':
|
|
literatureValues['f92c6453d265a952a56252e7d93cedea'] = value # Type
|
|
# Digitisation Process
|
|
case 'f__9900_datum_erfassung':
|
|
digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date
|
|
case 'f__99ae_datum_aenderung':
|
|
digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date
|
|
case 'f__efbm_bem_erfassung':
|
|
digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note
|
|
case _:
|
|
print(f'{key} is not a valid field, skipping.')
|
|
|
|
# Create Digitisation Process
|
|
digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b')
|
|
api.save(digitisationProcess)
|
|
|
|
# Set Digitisation Process
|
|
literatureValues['f59a2ad5cce3e51f172215ea88afac41'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
|
|
|
# Create Material
|
|
literature = Entity(api=api, fields=literatureValues, bundle_id='bafe9c3d3b640d4d1a16b104f367ac91')
|
|
api.save(literature)
|
|
|
|
print(f'Created literature {index}: {literature.uri} of {len(literaturesTable)}')
|
|
|
|
# Write log
|
|
processedRows = processedRows._append({'id': row['id'], 'docId': literatureValues['f3bdd54b9ea5808a571200e9c60e103e'][0], 'uuid': literatureValues['fd58e0884f7cf63f8436c2789fcd2745'][0], 'uri': literature.uri}, ignore_index=True)
|
|
processedRows.to_csv(f'./logs/{tableName}.csv', index=False)
|
|
|
|
print('finish')
|