#@markdown # ? **Step 1.** Press this button to set up the server (only run this once) #@markdown Sets up environment for running our ILM demo. #@markdown This will take a few seconds to finish. #@markdown You only need to run this once. # Install Ngrok (exposes colab server to public URL) !!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip !!unzip ngrok-stable-linux-amd64.zip # Install Python deps !!pip install torch>=1.2.0 !!pip install transformers==2.0.0 !!pip install Flask==1.1.1 !!pip install bs4==0.0.1 !!pip install nltk==3.4.5 import os import subprocess import time import nltk nltk.download('punkt') # https://medium.com/@paudelanjanchandra/download-google-drive-files-using-wget-3c2c025a8b99 DOWNLOAD_TEMPLATE = """wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=FILEID' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=FILEID" -O FILENAME && rm -rf /tmp/cookies.txt""" def download_gdrive_file(file_id, output_fp): stem = os.path.split(output_fp)[0] if len(stem.strip()) > 0: if not os.path.isdir(stem): os.makedirs(stem) cmd = DOWNLOAD_TEMPLATE.replace('FILEID', file_id).replace('FILENAME', output_fp) !!{cmd} def spawn_background_task_get_pid(cmd): process = subprocess.Popen(cmd.split()) return process.pid DATA_FILES = { 'stories': { 'id': '1APYZigQoYBAcQ-55JWQHtknlTgXrdFxW', 'fp': 'demo_resources/data/stories/valid.txt', 'style': 'abstract' }, 'abstracts': { 'id': '1puRxh1b1Qnqt8d9KTaf8q0i8QZYXOf0L', 'fp': 'demo_resources/data/abstracts/valid.txt', 'style': 'abstract' }, 'lyrics': { 'id': '1l9ijVM88Hult6hTIjXWJBXg4f8FGgwFD', 'fp': 'demo_resources/data/lyrics/valid.txt', 'style': 'verse' }, } SERVER_CODE_ID = '1FGMV8MV-HLYMuRK6e36AuD3ao-2lZjHk' CONFIG_ID = '11KQ0y_n9DqplaBPAow-xh99G7hzNm2Qd' MODEL_FILES = { 'stories': { 'id': '1ixEkWRjGS-JhfZ6Pu9_wugyUKEJfvc9b', 'fp': 'demo_resources/models/stories/pytorch_model.bin', 'cfg_fp': 'demo_resources/models/stories/config.json' }, 'abstracts': { 'id': '1bg-LI3p5rIS4-GvKWy5kxVRuc9Id9amX', 'fp': 'demo_resources/models/abstracts/pytorch_model.bin', 'cfg_fp': 'demo_resources/models/abstracts/config.json' }, 'lyrics': { 'id': '1EMRFDiBHezjGRoPX_aZMrBqD6lfOzbDG', 'fp': 'demo_resources/models/lyrics/pytorch_model.bin', 'cfg_fp': 'demo_resources/models/lyrics/config.json' }, } # Download server code download_gdrive_file(SERVER_CODE_ID, 'server.tar.gz') !!tar xvfz server.tar.gz from IPython.display import clear_output clear_output() setup_complete = True print('Setup complete! Please run the next cell now :)') #@markdown #? **Step 2.** Press this button to start the server! #@markdown Once the server has started, a server address will be printed. #@markdown #@markdown **Leave this tab/window running** and open the demo in a new tab: chrisdonahue.com/ilm #@markdown #@markdown Paste the server address into that tab. #@markdown #@markdown ### **Step 3.** Try out different text domains #@markdown #@markdown To switch text domains, re-run this cell after selecting the desired domain: domain = 'Stories' #@param ['Stories', 'Abstracts', 'Lyrics'] #@markdown - **Stories** are short stories #@markdown - **Abstracts** are CS paper abstracts from arXiv #@markdown - **Lyrics** are song lyrics #@markdown #@markdown ### (_Optional_) Improve demo reliability #@markdown #@markdown If the demo is giving you connection errors, please try the following: #@markdown - Sign up for a free ngrok account: ngrok.com #@markdown - Copy your authtoken from dashboard.ngrok.com/auth and paste it below (won't be shared with us) #@markdown - Rerun this cell optional_ngrok_auth_token = '22khKjhyqFTSp3qnMhvDrfEiGT3_4gwAYYKXqMu1A3YLEh2Yd' #@param {type:"string"} domain = domain.lower() # Ensure setup has been run try: setup_complete except: raise SystemExit('Please run the setup cell first (above this one).') # Download data from Google Drive print('Downloading text data') download_gdrive_file(DATA_FILES[domain]['id'], DATA_FILES[domain]['fp']) print('Downloading model') download_gdrive_file(MODEL_FILES[domain]['id'], MODEL_FILES[domain]['fp']) download_gdrive_file(CONFIG_ID, MODEL_FILES[domain]['cfg_fp']) for fp in [DATA_FILES[domain]['fp'], MODEL_FILES[domain]['fp'], MODEL_FILES[domain]['cfg_fp']]: try: size = os.path.getsize(fp) except: size = 0 if size < 500: raise ValueError('Failed to download. Please run this cell again!') # Spawn server, killing previous print('Starting server...') server_cmd = 'python api_server.py demo_resources/models/{domain} demo_resources/data/{domain}/valid.txt {style}'.format(domain=domain, style=DATA_FILES[domain]['style']) try: server_pid !!kill -9 {server_pid} except: pass server_pid = spawn_background_task_get_pid(server_cmd) # Server takes a few seconds to load... hold tight time.sleep(15) # Authenticate ngrok optional_ngrok_auth_token = optional_ngrok_auth_token.strip() if len(optional_ngrok_auth_token) > 0: print('Authenticating ngrok') !!./ngrok authtoken {optional_ngrok_auth_token} # Spawn ngrok ngrok_cmd = './ngrok http 6006' try: ngrok_pid !!kill -9 {ngrok_pid} except: pass ngrok_pid = spawn_background_task_get_pid(ngrok_cmd) # Ngrok takes a bit to load... hold tight time.sleep(10) # Print address print('Leave this notebook open and copy the following to https://chrisdonahue.com/ilm :') !curl -s http://localhost:4040/api/tunnels | python3 -c \ "import sys, json; address = json.load(sys.stdin)['tunnels'][0]['public_url']; print(address.split('/')[-1])"