import uuid # For UUID creation from initDb import initDb # For database initialization from wisski.api import Api, Pathbuilder, Entity # For WissKI API import os # For environment variable loading from dotenv import load_dotenv # For environment variable loading import pandas as pd # For dataframe handling def importArtist(api, engine): print('Importing artists...') tableName = 'c__kue' bundleId = 'bc322be33491dacc600dd43fdee09a5c' test = False try: processedRows = pd.read_csv(f'./logs/{tableName}.csv') except FileNotFoundError: processedRows = pd.DataFrame(columns=['id', 'uuid', 'uri']) # Load sources table artistsTable = pd.read_sql_table(tableName, con=engine) # Create artists for index, row in artistsTable.iterrows(): # For every row in table... if index < len(processedRows) and artistsTable.loc[index, 'id'] == processedRows.loc[index, 'id']: # skip if already processed print(f'Skipping already processed artist {artistsTable.loc[index, "id"]}') continue # Create Entity property dicts artistValues = {} digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]} imageValues = {} reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]} for key, value in row.items(): # For every column in row... if (value is None) or (value == ''): # skip if cell has no value continue # Properties of an entity have to be an array, so... value = str(value).replace('&###{{new_line}}###'.format(), '&') value = str(value).replace('###{{new_line}}###', '&') value = str(value).replace(' & ', '&') if '&' in str(value): # ...Explode "&"-separated values to array items value = [x.strip() for x in str(value).split('&')] else: # ...Or parse to array value = [value] # Map columns to fields. We use assignments for reification. match key: case 'id': continue case 'f__uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'] = value # UUID case 'f__3170_and__taetigkeit': artistValues['f01f51e385e5f206653e029ff5c845c4'] = value # Alternate occupation case 'f__3000_kue_dok_nr_': artistValues['f61deac361ac5e0731edbf214761d15c'] = value # Artist Document Number case 'f__3002_pub_kue_nr_': artistValues['f46b2ec14ce05d2618427c526198d64e'] = value # Artist published number case 'f__9990_kommentar': artistValues['fedc08e4225ac800e5d9f16bf345d181'] = value # Comment case 'f__3360_letzte_erw_': artistValues['f1419788b918f4c4a13393fd09ff37b3'] = value # Last Mentioned case 'f__6700_mar_dok_nr_': artistValues['f3d63eec34c00556cbadf635f78d815a'] = value # Mark Assignment case 'f__33gs_meister_als': artistValues['f30b60be791fb13f919c31510ca4de50'] = value # Master Education case 'f__33mj_meisterjahr': artistValues['fd2d07bb9ea1eadacdf28e41cacb92c1'] = value # Master Year case 'f__3100_name': artistValues['f71c047dad23083850a13d489386bf31'] = value # Name case 'f__3105_abw_schreibw_': artistValues['fbe84024bf9fad8f6a545b3af75d8b1b'] = value # Name Variants case 'f__3166_fakt__taetig_als': artistValues['fb0373e9fd949984cf9c09ec1ea0746c'] = value # Occupation case 'f__336p_1__posth__erw_': artistValues['fe079424bb6196d4a9721f84c43361f8'] = value # Posthumous Mentioned case 'f__8540_repro_nr_': # We map images to Image entity for item in value: if item is not None: # Replace dir paths in name item = item.replace('Objekte\\', 'objects/') item = item.replace('Objekte3\\', 'objects/') item = item.replace('Objekte4\\', 'objects/') item = item.replace('Objekte5\\', 'objects/') item = item.replace('objekte5\\', 'objects/') item = item.replace('Marken\\', 'marks/') item = item.replace('Marken/', 'marks/') item = item.replace('MArken\\', 'marks/') item = item.replace('Goldschmiede/', 'goldsmiths/') item = item.replace('Goldschmiede\\', 'goldsmiths/') item = item.replace('Epitaphien/', 'epitaphies/') item = item.replace('Epitaphien\\', 'epitaphies/') imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image) imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID case 'f__6770_rosenb_nr_': artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number # Digitisation Process case 'f__9900_datum_erfassung': digitisationProcessValues['f1f5dd22371e5c1de41e0fb099e0e862'] = value # Recording date case 'f__99ae_datum_aenderung': digitisationProcessValues['f8976c6a9e5d91fe9caba8a57c27f204'] = value # Change date case 'f__efbm_bem_erfassung': digitisationProcessValues['f78a6310d13c717b82ddba814ac59024'] = value # Recording note case _: print(f'{key} is not a valid field, skipping.') # Create Digitisation Process digitisationProcess = Entity(api=api, fields=digitisationProcessValues, bundle_id='b22e6c47ccb3ab8a974b37279e1bc33b') api.save(digitisationProcess) # Create Image entities and add their UUIDs to a list # because we link Image Assignment and Image over the UUID imageList = [] for key, value in imageValues.items(): if value: imageItem = Entity(api=api, fields=value, bundle_id='b8c6c4b478ead1c80e175ad0f98dafe3') api.save(imageItem) imageList.append(value['f11beac4b638016479e6f3fbc7e55d1a'][0]) # add UUID to list # Create Image Assignment entities and add their UUIDs to a list # because we link Artist and Image Assignment over the UUID if imageList: reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9') api.save(reproNumberAssignment) else: reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = [] if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44']: artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]: artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process # Create Material artist = Entity(api=api, fields=artistValues, bundle_id=bundleId) api.save(artist) print(f'Created artist {index}: {artist.uri} of {len(artistsTable)}') # Write log processedRows = processedRows._append({'id': row['id'], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True) processedRows.to_csv(f'./logs/{tableName}.csv', index=False) if test: print('Testing mode activated. Exiting.') exit() print('finished importing artists')