Source code for general_tools.url_utils

from __future__ import print_function, unicode_literals
import json
import shutil
import sys
import ssl
from contextlib import closing

try:
    import urllib.request as urllib2
except ImportError:
    import urllib2


[docs]def get_url(url, catch_exception=False):
    """
    :param str|unicode url: URL to open
    :param bool catch_exception: If <True> catches all exceptions and returns <False>
    """
    return _get_url(url, catch_exception, urlopen=urllib2.urlopen)


def _get_url(url, catch_exception, urlopen):
    if catch_exception:
        # noinspection PyBroadException
        try:
            with closing(urlopen(url)) as request:
                response = request.read()
        except:
            response = False
    else:
        with closing(urlopen(url)) as request:
            response = request.read()

    # convert bytes to str (Python 3.5)
    if type(response) is bytes:
        return response.decode('utf-8')
    else:
        return response


[docs]def download_file(url, outfile):
    """Downloads a file and saves it."""
    _download_file(url, outfile, urlopen=urllib2.urlopen)


def _download_file(url, outfile, urlopen):
    try:
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        with closing(urlopen(url)) as request:
            with open(outfile, 'wb') as fp:
                shutil.copyfileobj(request, fp)
    except IOError as err:
        print('ERROR retrieving %s' % url)
        print(err)
        sys.exit(1)


[docs]def get_languages():
    """
    Returns an array of over 7000 dictionaries.

    Structure:
    [
      {
        cc: ["DJ", "US", "CA"],
        pk: 2,
        lr: "Africa",
        ln: "Afaraf",
        ang: "Afar",
        gw: false,
        ld: "ltr",
        alt: ["Afaraf", "Danakil"],
        lc: aa
      },
      ...
    ]
    """
    url = 'http://td.unfoldingword.org/exports/langnames.json'
    return json.loads(get_url(url))


[docs]def join_url_parts(*args):
    """
    Joins a list of segments into a URL-like string.

    :type args: List<string>
    """
    # check for edge case
    if len(args) == 1:
        return args[0]

    return_val = clean_url_segment(args[0])

    for i in range(1, len(args)):
        arg = args[i]

        if i == len(args) - 1:
            # no need to remove a trailing slash if this is the last segment
            return_val += '/' + arg
        else:
            # remove a trailing slash so it won't be duplicated
            return_val += '/' + clean_url_segment(arg)

    return return_val


[docs]def clean_url_segment(segment):

    if segment[-1:] == '/':
        return segment[:-1]

    return segment