diff --git a/functions/notebook_functions.py b/functions/notebook_functions.py index 29ae032..fabb575 100644 --- a/functions/notebook_functions.py +++ b/functions/notebook_functions.py @@ -496,6 +496,8 @@ def change_additional_fields(patch_body, item, item_type, item_level, change_lev 'badge', 'organism', 'genomic_region', + 'gene', + 'bio_feature', 'target', 'imaging_path', 'publication', @@ -507,6 +509,7 @@ def change_additional_fields(patch_body, item, item_type, item_level, change_lev 'modification', 'protocol', 'sop_map', + 'experiment_type', 'biosample_cell_culture', 'individual_human', 'individual_mouse', @@ -524,6 +527,7 @@ def change_additional_fields(patch_body, item, item_type, item_level, change_lev 'quality_metric_dedupqc_repliseq', 'quality_metric_chipseq', 'quality_metric_atacseq', + 'quality_metric_workflowrun', 'microscope_setting_d1', 'microscope_setting_d2', 'microscope_setting_a1', diff --git a/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb b/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb index 6b25192..7a8fd12 100644 --- a/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb +++ b/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb @@ -1,302 +1,417 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 4, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WORKING ON https://data.4dnucleome.org \n", - "\n", - "dict_keys(['file_vistrack'])\n", - "105\n", - "\n" - ] - } - ], "source": [ - "### PLEASE COPY NOTEBOOKS TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS\n", + "### PLEASE COPY NOTEBOOK TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS\n", + "\n", + "#### This notebook can be used to copy metadata from one environment and post or patch items (if they already exist) into a different environment\n", + "\n", + "#### *This notebook uses ES functions so needs to be run in a personal EC2*\n", + "\n", + "This cell does set up including auths for both the environment you will transfer from and that to be transferred to.\n", + "\n", + "You can set some options for overwriting existing data on the env you are transferring to and whether actually doing the operation or performing a dry run.\n", + "\n", + "And then retrieves the starting items from provided IDs or a search\n", + "\n", + "**Note:** some items types can be excluded by modifying the parameters to `expand_es_metadata`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "outputs": [], + "source": [ "from dcicutils import ff_utils\n", "from functions.notebook_functions import *\n", "import json\n", + "import time\n", "\n", - "# get key from keypairs.json\n", - "my_env = 'data'\n", - "my_key = get_key('koray_data')\n", - "schema_name = get_schema_names(my_key) \n", - "print('WORKING ON', my_key['server'], '\\n')\n", + "# options you may want to change\n", + "overwrite_existing = True # if the item exist in the target, should it overwrite it\n", + "action = True # set True to post/patch in the indicated env\n", "\n", - "##### COLLECT ITEMS TO Release #####\n", - "# use either a starting item to fetch all linked items\n", + "# auth for source\n", + "# get auth from keypairs.json\n", + "src_auth = get_key('andyprod')\n", "\n", - "# Use a starting item to find linked ones\n", - "# starting_items = ['46db06ad-b399-4cf4-9acc-07b3e25ef132']\n", - "#add_items = get_query_or_linked(my_key, linked=starting_items)\n", + "# or alternatively from env name\n", + "# source_env = 'fourfront-production-green'\n", + "# src_auth = ff_utils.get_authentication_with_server({}, ff_env=source_env)\n", "\n", - "# or a search query\n", - "#my_query = '/search/?q=GOLD&type=Item&limit=all'\n", - "#add_items = get_query_or_linked(my_key, query=my_query)\n", + "# auth for target\n", + "# get auth from keypairs.json\n", + "# target_auth = get_key('andywebdev')\n", "\n", - "# if you want you can dump them to separate json files (will work as test insert)\n", - "# dump_to_json(add_items, destination folder)\n", + "# or alternatively from env name\n", + "target_env = 'fourfront-webdev'\n", + "target_auth = ff_utils.get_authentication_with_server({}, ff_env=target_env)\n", "\n", - "my_query = '/search/?type=FileVistrack'\n", - "store = get_query_or_linked(my_key, query=my_query, linked_frame='raw')\n", - "print(store.keys())\n", - "print(len([i['uuid'] for key in store for i in store[key]]))\n", - "print()\n", + "print('TRANSFERRING envs:\\nFROM: {}\\nTO:{}\\n'.format(src_auth['server'], target_auth['server']))\n", "\n", - "# find_linked = ['48732435-5a16-4d86-a0f6-ace18dc62b6c']\n", - "# store = get_query_or_linked(my_key, linked=find_linked, linked_frame='raw')\n", - "# print(store.keys())\n", - "# print(len([i['uuid'] for key in store for i in store[key]]))" + "schema_name = get_schema_names(src_auth) \n", + "\n", + "# use either a list of IDS of starting items or search that retrieves them\n", + "items_in_scope = ['c0319394-33f9-4f4e-8e20-b3d413c4eed6'] # ['4DNACCCC', '4DNACCCCC']\n", + "\n", + "#search_url = '/search/?award.project=4DN&experiments_in_set.experiment_type=dilution+Hi-C&experimentset_type=replicate&lab.display_title=Bing+Ren%2C+UCSD&status=pre-release&type=ExperimentSetReplicate'\n", + "\n", + "time1 = time.time()\n", + "if items_in_scope:\n", + " starting_item_uuids = [ff_utils.get_metadata(i, src_auth)['uuid'] for i in items_in_scope]\n", + "elif search_url:\n", + " starting_item_uuids = [i['uuid'] for i in ff_utils.search_metadata(search_url, src_auth)]\n", + "\n", + "store={}\n", + "item_uuids=[]\n", + "store, item_uuids = ff_utils.expand_es_metadata(starting_item_uuids, src_auth, store_frame='raw',add_pc_wfr=True, ignore_field = ['submitted_by', 'last_modified'])\n", + "\n", + "print(len(starting_item_uuids), 'parent item(s) to transfer')\n", + "print(len(item_uuids), 'items collected')\n", + "time2 = time.time()\n", + "print(round((time2-time1), 1), 'sec for collection')\n", + "# new_store = {}\n", + "for itype, found in store.items():\n", + " print(\"{}\\t{}\".format(itype, len(found)))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Here you can specifically exclude certain types from the store\n", + "\n", + "### WARNING - use with care as there may be dependencies to items you exclude. However using this is combination with the ignore_field parameter in the call the expand_es_metadata can be useful at time." ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "file_vistrack\n", - "{'lab': '2319a476-9817-4c9d-9ad5-f2858c1aea50', 'last_modified': {'modified_by': '986b362f-4eb6-4a9c-8173-3ab267307e3a', 'date_modified': '2018-11-29T01:27:37.341121+00:00'}, 'project_release': '2018-11-29', 'file_classification': 'visualization', 'genome_assembly': 'GRCh38', 'file_format': 'd1311111-218e-4f61-aaf0-91f226248b2c', 'award': '8842e72e-7b38-4a2a-9557-e6b22616ecaa', 'file_type': 'fold change over control', 'filename': 'ENCFF134SMY.bigWig', 'status': 'released', 'project_lab': 'Richard Myers, HAIB', 'assay_info': 'RXRA', 'public_release': '2018-11-29', 'replicate_identifiers': ['Biorep 1 Techrep 1', 'Biorep 2 Techrep 1'], 'uuid': 'f4b9fbaf-caae-4773-93a8-58600494cee1', 'md5sum': '622678392bcef8909dcb4dd0fa160ccf', 'schema_version': '1', 'content_md5sum': '622678392bcef8909dcb4dd0fa160ccf', 'biosource': '68172441-97c4-40cc-b73f-d0f5dbc5cc05', 'submitted_by': '986b362f-4eb6-4a9c-8173-3ab267307e3a', 'dataset_type': 'ChIP-seq', 'aliases': ['encode-dcc-lab:ENCFF134SMY'], 'accession': '4DNFI3E4H24J', 'dbxrefs': ['ENC:ENCSR000BJW', 'ENC:ENCFF134SMY'], 'file_size': 365761527, 'date_created': '2018-11-20T16:50:43.029519+00:00', 'project_release_date': '2011-07-18', 'description': 'bigWig file of fold change over control for RXRA ChIP-seq on H1-hESC from Richard Myers, HAIB (merged replicates)', 'dataset_description': 'RXRA ChIP-seq protocol v041610.2 on human H1-hESC', 'higlass_uid': 'aC5J5fLhQQ-PPL_LWS7XsA'}\n" - ] - } - ], + "execution_count": null, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# list of types in lower_case format\n", + "types2exclude = ['user', 'lab', 'award']\n", + "store = {k: v for k,v in store.items() if k not in types2exclude}\n", + "for itype, found in store.items():\n", + " print(\"{}\\t{}\".format(itype, len(found)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ - "for i in store:\n", - " print(i)\n", - " print (store[i][0])" + "#### Set up some globalish variables\n", + "#### And helper functions" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "file_vistrack\n", - "105 items exist on source\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "exsiting item will be patched in second step\n", - "105 items transfered to target\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "outputs": [], "source": [ - "#### This part should only run once!\n", - "\n", - "transfer_env = 'fourfront-hotseat'\n", - "transfer_key = ff_utils.get_authentication_with_server({}, ff_env=transfer_env)\n", - "# reverse lookup dictionary for schema names\n", - "\n", - "# if the item exist in the target, should it overwrite it (will include user/award etc)\n", - "overwrite_existing = True\n", - "action = False\n", + "# set up some generally global variables\n", "rev_schema_name = {}\n", "for key, name in schema_name.items():\n", " rev_schema_name[name] = schema_name[key]\n", - "\n", + "missing_types = [i for i in store.keys() if i not in ORDER]\n", "my_types = [i for i in ORDER if i in store.keys()]\n", + "if missing_types:\n", + " print(\"MISSING FROM ORDER BUT IN STORE - WILL BE WORKED ON LAST:\")\n", + " print(missing_types)\n", + " # we know there is a schema so assume ordering does not matter for these types and add to end\n", + " my_types.extend(missing_types)\n", "\n", "second_round_items = {}\n", + "id_swappers = {}\n", + "\n", + "\n", + "# helper functions\n", + "def camel_case(name):\n", + " return ''.join(x for x in name.title() if not x == '_')\n", + "\n", + "def search_for_existing(id_list, item, itype, transfer_auth):\n", + " ''' if the uuid is not found tries to find the item by other identifying properties\n", + " and if found will return it in raw frame\n", + " '''\n", + " base_query = 'search/?type={}&{}={}&frame=object'\n", + " itype = camel_case(itype)\n", + " for id2chk in id_list:\n", + " if id2chk == 'uuid':\n", + " continue\n", + " val2chk = item.get(id2chk)\n", + " if not val2chk:\n", + " continue\n", + " query = base_query.format(itype, id2chk, val2chk)\n", + " try:\n", + " srch_res = ff_utils.search_metadata(query, transfer_auth)\n", + " # print(srch_res)\n", + " if len(srch_res) == 1:\n", + " return ff_utils.get_metadata(srch_res[0].get('uuid'), transfer_auth, add_on='frame=raw')\n", + " except:\n", + " continue\n", + " return None\n", + "\n", + "def check_for_swaps(props_to_check, swaps):\n", + " ''' replaces uuids that have swapped with the value to use taken from the swaps dict \n", + " '''\n", + " checked_props = {}\n", + " for prop, val in props_to_check.items():\n", + " if isinstance(val, list):\n", + " ulist = []\n", + " for v in val:\n", + " if isinstance(v, str):\n", + " if v in swaps:\n", + " v = swaps.get(v)\n", + " elif isinstance(v, dict):\n", + " v = check_for_swaps(v, swaps)\n", + " else:\n", + " print(\"WARNING - unexpected data structure!\")\n", + " ulist.append(v)\n", + " checked_props[prop] = ulist\n", + " elif isinstance(val, dict):\n", + " udict = {}\n", + " for k, v in val.items():\n", + " if v in swaps:\n", + " v = swaps.get(v)\n", + " udict[k] = v\n", + " checked_props[prop] = udict\n", + " elif val in swaps:\n", + " checked_props[prop] = swaps.get(val)\n", + " else:\n", + " checked_props[prop] = val\n", + " return checked_props\n", + "\n", + "\n", + "def check_for_swaps(props_to_check, swaps):\n", + " ''' replaces uuids that have swapped with the value to use taken from the swaps dict \n", + " '''\n", + " checked_props = None\n", + " if isinstance(props_to_check, dict):\n", + " checked_props = {}\n", + " for prop, val in props_to_check.items():\n", + " res = check_for_swaps(val, swaps)\n", + " checked_props[prop] = res\n", + " elif isinstance(props_to_check, list):\n", + " checked_props = []\n", + " for val in props_to_check:\n", + " res = check_for_swaps(val, swaps)\n", + " checked_props.append(res)\n", + " elif isinstance(props_to_check, str):\n", + " checked_props = ''\n", + " if props_to_check in swaps:\n", + " checked_props = swaps.get(props_to_check)\n", + " else:\n", + " checked_props = props_to_check\n", + " \n", + " else:\n", + " checked_props = props_to_check\n", + " return checked_props\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### The following cells should generally only be run once but adjusting parameters can change the behavior so items are patched if they already exist or not over-written - nonetheless care should be taken\n", + "\n", + "#### **WARNING - running first with `action=False` is recommended but if a conflict is encountered this will not be picked up by the dry run and you may end up with only partially posted items.\n", + "\n", + "#### This does initial posting of items that do not yet exist in the target environment\n", + "\n", + "##### NOTE: only required fields are posted for new items so if you stop here you will end up with partially posted items with only a few properties. \n", + "\n", + "### Round I - only post the required fields for new items - skip if exists already" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# sets action for the cell overriding action set at top if not None\n", + "post_action = False\n", + "if post_action is None:\n", + " post_action = action \n", "\n", - "# Round I - only put the required - skip if exists already\n", "for a_type in my_types:\n", " print(a_type)\n", " obj_type = rev_schema_name[a_type]\n", " # find required field\n", - " schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=transfer_key)\n", + " schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=target_auth)\n", " req_fields = schema_info['required']\n", " ids = schema_info['identifyingProperties']\n", " first_fields = list(set(req_fields+ids))\n", " remove_existing_items = []\n", + " problems = []\n", " counter=0\n", " print(len(store[a_type]), 'items exist on source')\n", - " for an_item in store[a_type]:\n", + " for idx, an_item in enumerate(store[a_type]):\n", " counter += 1\n", "\n", " # does the item exist\n", " exists = False\n", " try:\n", - " # TODO check with all identifiers\n", - " existing = ff_utils.get_metadata(an_item['uuid'], key=transfer_key)\n", + " existing = ff_utils.get_metadata(an_item['uuid'], key=target_auth, add_on='frame=raw')\n", " exists = True\n", " except:\n", - " exists = False\n", + " existing = search_for_existing(ids, an_item, a_type, target_auth)\n", + " if existing:\n", + " exists = True\n", + " \n", " \n", + " if existing:\n", + " if (existing == an_item):\n", + " print(\"No updates needed\")\n", + " continue\n", + " \n", " # skip the items that exists\n", " if exists and existing:\n", " if overwrite_existing:\n", + " if existing.get('uuid') != an_item.get('uuid'):\n", + " print('WARNING - mismatched uuids: will patch {}!'.format(existing.get('uuid')))\n", + " # here is where we need to swap info somehow so right thing can be updated\n", + " # how to transfer the properties or swap uuids and scan items for use of these in linkTos?\n", + " id_swappers[an_item.get('uuid')] = existing.get('uuid')\n", + " remove_existing_items.append(an_item['uuid']) # first make sure we don't try to post with the other uuid and get a conflict\n", + " an_item['uuid'] = existing.get('uuid')\n", + " print(\"uuid swapped\")\n", + " store[a_type][idx] = an_item\n", + " \n", " # patch in second step will update the item\n", - " print('exsiting item will be patched in second step')\n", + " print('existing item will be patched in second step')\n", " continue\n", " else:\n", " remove_existing_items.append(an_item['uuid'])\n", " print(\"{} {} can not post item\".format(obj_type, an_item['uuid']))\n", " continue\n", " post_first = {key:value for (key,value) in an_item.items() if key in first_fields}\n", - " print('posting')\n", - " if action:\n", - " ff_utils.post_metadata(post_first, obj_type, key = transfer_key)\n", + " # for posting of required fields we need to assume ordering is respected so can just scan the post_first fields\n", + " # for any swapped uuids\n", + " post_first = check_for_swaps(post_first, id_swappers)\n", + " print('WILL POST {}'.format(post_first.get('uuid')))\n", + " if post_action:\n", + " try:\n", + " res = ff_utils.post_metadata(post_first, obj_type, key = target_auth)\n", + " print(res.get('status'))\n", + " except Exception as e:\n", + " problems.append(an_item.get('uuid'))\n", " \n", - " second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items]\n", - " print(len(second_round_items[a_type]), 'items transfered to target')\n", + " second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items + problems]\n", + " if problems:\n", + " print(\"WARNING! {} items of {} not posted due to problems\".format(len(problems), a_type))\n", + " print(len(second_round_items[a_type]), ' items transfered to target')\n", " print()\n", "\n", " " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Round II - patch the rest of the metadata updating pre-existing items if `overwrite_existing=True`" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Round II - patch the rest of the metadata\n", + "# sets action for the cell overriding action set at top if not None\n", + "patch_action = False\n", + "if patch_action is None:\n", + " patch_action = action \n", + "\n", + "for itype, found in second_round_items.items():\n", + " print(\"{}\\t{}\".format(itype, len(found)))\n", + "counter = 0\n", "for a_type in my_types:\n", " obj_type = rev_schema_name[a_type]\n", " if not second_round_items[a_type]:\n", " continue \n", " for an_item in second_round_items[a_type]:\n", " counter += 1\n", - " ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = transfer_key)" + " an_item = check_for_swaps(an_item, id_swappers)\n", + " if patch_action:\n", + " res = ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = target_auth)\n", + " print(res.get('status'))\n", + "print(\"{} items patched in second round\".format(counter))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WARNING!\n", + "\n", + "### Nothing below here has been updated or tested so may or may not work!!!\n", + "\n", + "### set up stuff for copying things on s3" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Round III - move attachments\n", "import boto3\n", "s3 = boto3.resource('s3')\n", "\n", "#source_addresses\n", - "source_health = ff_utils.get_metadata('/health', key = my_key)\n", + "source_health = ff_utils.get_metadata('/health', key = src_auth)\n", "source_raw = source_health['file_upload_bucket'] \n", "source_pf = source_health['processed_file_bucket'] \n", "source_att = source_health['blob_bucket']\n", "\n", "#target_addresses\n", - "target_health = ff_utils.get_metadata('/health', key = transfer_key)\n", + "target_health = ff_utils.get_metadata('/health', key = target_auth)\n", "target_raw = target_health['file_upload_bucket'] \n", "target_pf = target_health['processed_file_bucket'] \n", - "target_att = target_health['blob_bucket'] \n", + "target_att = target_health['blob_bucket'] " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Round III - move attachments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "s3 = boto3.resource('s3')\n", "\n", "# Round III - move attachments\n", "for a_type in my_types:\n", @@ -305,131 +420,31 @@ " if 'attachment' in an_item.keys():\n", " at_key = an_item['attachment']['blob_id']\n", " copy_source = {'Bucket': source_att, 'Key': at_key}\n", - " try:\n", - " s3.meta.client.copy(copy_source, target_att, at_key)\n", - " except:\n", - " print('Can not find attachment on source', an_item['uuid'])\n", - " continue\n", - " print('attachment copied')\n" + " print(\"Attachment {} from {} to be copied from {} to {}\".format(at_key, an_item.get('uuid'), source_att, target_att))\n", + " if action:\n", + " try:\n", + " # OK this seems to work but it would be nice to check to see if the file already exists to avoid recopying\n", + " s3.meta.client.copy(copy_source, target_att, at_key)\n", + " except:\n", + " print('Can not find attachment on source', an_item['uuid'])\n", + " continue\n", + " print('attachment copied')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WARNING - not tested\n", + "\n", + "### Round IV - move files" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n", - "file copied\n" - ] - } - ], + "outputs": [], "source": [ "import boto3\n", "s3 = boto3.resource('s3')\n", @@ -440,12 +455,12 @@ "source_att = source_health['blob_bucket']\n", "\n", "#target_addresses\n", - "target_health = ff_utils.get_metadata('/health', key = transfer_key)\n", + "target_health = ff_utils.get_metadata('/health', key = target_auth)\n", "target_raw = target_health['file_upload_bucket'] \n", "target_pf = target_health['processed_file_bucket'] \n", "target_att = target_health['blob_bucket'] \n", "\n", - "# Round IV - move files\n", + "\n", "for a_type in my_types:\n", " if a_type in ['file_processed', 'file_vistrack']:\n", " source_file_bucket = source_pf\n", @@ -475,7 +490,7 @@ " print('Can not find file on source', file_key)\n", " continue\n", " print('file copied')\n", - " \n" + " " ] }, { @@ -488,7 +503,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -502,9 +517,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.12" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }