source: rtems-source-builder/source-builder/sb/download.py @ 1bbb766

4.104.95
Last change on this file since 1bbb766 was 1bbb766, checked in by Chris Johns <chrisj@…>, on 03/17/16 at 05:47:31

sb: Change urlib to urllib2 on Python2 and add user agent support.

Some sites block the standard python user agent. Change to Wget
as it seems to make the tested sites behave.

Limit the size of long URLs when logging to the user.

Closes #2656.

  • Property mode set to 100644
File size: 22.5 KB
Line 
1#
2# RTEMS Tools Project (http://www.rtems.org/)
3# Copyright 2010-2016 Chris Johns (chrisj@rtems.org)
4# All rights reserved.
5#
6# This file is part of the RTEMS Tools package in 'rtems-tools'.
7#
8# Permission to use, copy, modify, and/or distribute this software for any
9# purpose with or without fee is hereby granted, provided that the above
10# copyright notice and this permission notice appear in all copies.
11#
12# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
20#
21# This code builds a package given a config file. It only builds to be
22# installed not to be package unless you run a packager around this.
23#
24
25from __future__ import print_function
26
27import hashlib
28import os
29import stat
30import sys
31try:
32    import urllib.request as urllib_request
33    import urllib.parse as urllib_parse
34except ImportError:
35    import urllib2 as urllib_request
36    import urlparse as urllib_parse
37
38import cvs
39import error
40import git
41import log
42import path
43import sources
44import version
45
46def _do_download(opts):
47    download = True
48    if opts.dry_run():
49        download = False
50        wa = opts.with_arg('download')
51        if wa is not None:
52            if wa[0] == 'with_download' and wa[1] == 'yes':
53                download = True
54    return download
55
56def _humanize_bytes(bytes, precision = 1):
57    abbrevs = (
58        (1 << 50, 'PB'),
59        (1 << 40, 'TB'),
60        (1 << 30, 'GB'),
61        (1 << 20, 'MB'),
62        (1 << 10, 'kB'),
63        (1, ' bytes')
64    )
65    if bytes == 1:
66        return '1 byte'
67    for factor, suffix in abbrevs:
68        if bytes >= factor:
69            break
70    return '%.*f%s' % (precision, float(bytes) / factor, suffix)
71
72def _sensible_url(url, used = 0):
73    space = 150 - used - 15
74    if len(url) > space:
75        size = (space - 5) / 2
76        url = url[:size] + ' ... ' + url[-size:]
77    return url
78
79def _hash_check(file_, absfile, macros, remove = True):
80    failed = False
81    hash = sources.get_hash(file_.lower(), macros)
82    if hash is not None:
83        hash = hash.split()
84        if len(hash) != 2:
85            raise error.internal('invalid hash format: %s' % (file_))
86        try:
87            hashlib_algorithms = hashlib.algorithms
88        except:
89            hashlib_algorithms = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
90        if hash[0] not in hashlib_algorithms:
91            raise error.general('invalid hash algorithm for %s: %s' % (file_, hash[0]))
92        hasher = None
93        _in = None
94        try:
95            hasher = hashlib.new(hash[0])
96            _in = open(path.host(absfile), 'rb')
97            hasher.update(_in.read())
98        except IOError as err:
99            log.notice('hash: %s: read error: %s' % (file_, str(err)))
100            failed = True
101        except:
102            msg = 'hash: %s: error' % (file_)
103            log.stderr(msg)
104            log.notice(msg)
105            if _in is not None:
106                _in.close()
107            raise
108        if _in is not None:
109            _in.close()
110        log.output('checksums: %s: %s => %s' % (file_, hasher.hexdigest(), hash[1]))
111        if hasher.hexdigest() != hash[1]:
112            log.warning('checksum error: %s' % (file_))
113            failed = True
114        if failed and remove:
115            log.warning('removing: %s' % (file_))
116            if path.exists(absfile):
117                try:
118                    os.remove(path.host(absfile))
119                except IOError as err:
120                    raise error.general('hash: %s: remove: %s' % (absfile, str(err)))
121                except:
122                    raise error.general('hash: %s: remove error' % (file_))
123        if hasher is not None:
124            del hasher
125    else:
126        if version.released():
127            raise error.general('%s: no hash found in released RSB' % (file_))
128        log.warning('%s: no hash found' % (file_))
129    return not failed
130
131def _local_path(source, pathkey, config):
132    for p in config.define(pathkey).split(':'):
133        local = path.join(path.abspath(p), source['file'])
134        if source['local'] is None:
135            source['local_prefix'] = path.abspath(p)
136            source['local'] = local
137        if path.exists(local):
138            source['local_prefix'] = path.abspath(p)
139            source['local'] = local
140            _hash_check(source['file'], local, config.macros)
141            break
142
143def _http_parser(source, pathkey, config, opts):
144    #
145    # Hack for gitweb.cgi patch downloads. We rewrite the various fields.
146    #
147    if 'gitweb.cgi' in source['url']:
148        url = source['url']
149        if '?' not in url:
150            raise error.general('invalid gitweb.cgi request: %s' % (url))
151        req = url.split('?')[1]
152        if len(req) == 0:
153            raise error.general('invalid gitweb.cgi request: %s' % (url))
154        #
155        # The gitweb.cgi request should have:
156        #    p=<what>
157        #    a=patch
158        #    h=<hash>
159        # so extract the p and h parts to make the local name.
160        #
161        p = None
162        a = None
163        h = None
164        for r in req.split(';'):
165            if '=' not in r:
166                raise error.general('invalid gitweb.cgi path: %s' % (url))
167            rs = r.split('=')
168            if rs[0] == 'p':
169                p = rs[1].replace('.', '-')
170            elif rs[0] == 'a':
171                a = rs[1]
172            elif rs[0] == 'h':
173                h = rs[1]
174        if p is None or h is None:
175            raise error.general('gitweb.cgi path missing p or h: %s' % (url))
176        source['file'] = '%s-%s.patch' % (p, h)
177    #
178    # Check the source file name for any extra request query data and remove if
179    # found. Some hosts do not like file names containing them.
180    #
181    if '?' in source['file']:
182        qmark = source['file'].find('?')
183        source['file'] = source['file'][:qmark]
184    #
185    # Check local path
186    #
187    _local_path(source, pathkey, config)
188    #
189    # Is the file compressed ?
190    #
191    esl = source['ext'].split('.')
192    if esl[-1:][0] == 'gz':
193        source['compressed-type'] = 'gzip'
194        source['compressed'] = '%{__gzip} -dc'
195    elif esl[-1:][0] == 'bz2':
196        source['compressed-type'] = 'bzip2'
197        source['compressed'] = '%{__bzip2} -dc'
198    elif esl[-1:][0] == 'zip':
199        source['compressed-type'] = 'zip'
200        source['compressed'] = '%{__unzip} -u'
201    elif esl[-1:][0] == 'xz':
202        source['compressed-type'] = 'xz'
203        source['compressed'] = '%{__xz} -dc'
204
205def _patchworks_parser(source, pathkey, config, opts):
206    #
207    # Check local path
208    #
209    _local_path(source, pathkey, config)
210    source['url'] = 'http%s' % (source['path'][2:])
211
212def _git_parser(source, pathkey, config, opts):
213    #
214    # Check local path
215    #
216    _local_path(source, pathkey, config)
217    #
218    # Symlink.
219    #
220    us = source['url'].split('?')
221    source['path'] = path.dirname(us[0])
222    source['file'] = path.basename(us[0])
223    source['name'], source['ext'] = path.splitext(source['file'])
224    if len(us) > 1:
225        source['args'] = us[1:]
226    source['local'] = \
227        path.join(source['local_prefix'], 'git', source['file'])
228    source['symlink'] = source['local']
229
230def _cvs_parser(source, pathkey, config, opts):
231    #
232    # Check local path
233    #
234    _local_path(source, pathkey, config)
235    #
236    # Symlink.
237    #
238    if not source['url'].startswith('cvs://'):
239        raise error.general('invalid cvs path: %s' % (source['url']))
240    us = source['url'].split('?')
241    try:
242        url = us[0]
243        source['file'] = url[url[6:].index(':') + 7:]
244        source['cvsroot'] = ':%s:' % (url[6:url[6:].index('/') + 6:])
245    except:
246        raise error.general('invalid cvs path: %s' % (source['url']))
247    for a in us[1:]:
248        _as = a.split('=')
249        if _as[0] == 'module':
250            if len(_as) != 2:
251                raise error.general('invalid cvs module: %s' % (a))
252            source['module'] = _as[1]
253        elif _as[0] == 'src-prefix':
254            if len(_as) != 2:
255                raise error.general('invalid cvs src-prefix: %s' % (a))
256            source['src_prefix'] = _as[1]
257        elif _as[0] == 'tag':
258            if len(_as) != 2:
259                raise error.general('invalid cvs tag: %s' % (a))
260            source['tag'] = _as[1]
261        elif _as[0] == 'date':
262            if len(_as) != 2:
263                raise error.general('invalid cvs date: %s' % (a))
264            source['date'] = _as[1]
265    if 'date' in source and 'tag' in source:
266        raise error.general('cvs URL cannot have a date and tag: %s' % (source['url']))
267    # Do here to ensure an ordered path, the URL can include options in any order
268    if 'module' in source:
269        source['file'] += '_%s' % (source['module'])
270    if 'tag' in source:
271        source['file'] += '_%s' % (source['tag'])
272    if 'date' in source:
273        source['file'] += '_%s' % (source['date'])
274    for c in '/@#%.-':
275        source['file'] = source['file'].replace(c, '_')
276    source['local'] = path.join(source['local_prefix'], 'cvs', source['file'])
277    if 'src_prefix' in source:
278        source['symlink'] = path.join(source['local'], source['src_prefix'])
279    else:
280        source['symlink'] = source['local']
281
282def _file_parser(source, pathkey, config, opts):
283    #
284    # Check local path
285    #
286    _local_path(source, pathkey, config)
287    #
288    # Get the paths sorted.
289    #
290    source['file'] = source['url'][6:]
291
292parsers = { 'http': _http_parser,
293            'ftp':  _http_parser,
294            'pw':   _patchworks_parser,
295            'git':  _git_parser,
296            'cvs':  _cvs_parser,
297            'file': _file_parser }
298
299def parse_url(url, pathkey, config, opts):
300    #
301    # Split the source up into the parts we need.
302    #
303    source = {}
304    source['url'] = url
305    colon = url.find(':')
306    if url[colon + 1:colon + 3] != '//':
307        raise error.general('malforned URL (no protocol prefix): %s' % (url))
308    source['path'] = url[:colon + 3] + path.dirname(url[colon + 3:])
309    source['file'] = path.basename(url)
310    source['name'], source['ext'] = path.splitext(source['file'])
311    if source['name'].endswith('.tar'):
312        source['name'] = source['name'][:-4]
313        source['ext'] = '.tar' + source['ext']
314    #
315    # Get the file. Checks the local source directory first.
316    #
317    source['local'] = None
318    for p in parsers:
319        if url.startswith(p):
320            source['type'] = p
321            if parsers[p](source, pathkey, config, opts):
322                break
323    source['script'] = ''
324    return source
325
326def _http_downloader(url, local, config, opts):
327    if path.exists(local):
328        return True
329    #
330    # Hack for GitHub.
331    #
332    if url.startswith('https://api.github.com'):
333        url = urllib_parse.urljoin(url, config.expand('tarball/%{version}'))
334    dst = os.path.relpath(path.host(local))
335    log.output('download: %s -> %s' % (url, dst))
336    log.notice('download: %s -> %s' % (_sensible_url(url, len(dst)), dst))
337    failed = False
338    if _do_download(opts):
339        _in = None
340        _out = None
341        _length = None
342        _have = 0
343        _chunk_size = 256 * 1024
344        _chunk = None
345        _last_percent = 200.0
346        _last_msg = ''
347        _have_status_output = False
348        _url = url
349        try:
350            try:
351                _in = None
352                _ssl_context = None
353                # See #2656
354                _req = urllib_request.Request(_url)
355                _req.add_header('User-Agent', 'Wget/1.16.3 (freebsd10.1)')
356                try:
357                    import ssl
358                    _ssl_context = ssl._create_unverified_context()
359                    _in = urllib_request.urlopen(_req, context = _ssl_context)
360                except:
361                    _ssl_context = None
362                if _ssl_context is None:
363                    _in = urllib_request.urlopen(_req)
364                if _url != _in.geturl():
365                    _url = _in.geturl()
366                    log.output(' redirect: %s' % (_url))
367                    log.notice(' redirect: %s' % (_sensible_url(_url)))
368                _out = open(path.host(local), 'wb')
369                try:
370                    _length = int(_in.info()['Content-Length'].strip())
371                except:
372                    pass
373                while True:
374                    _msg = '\rdownloading: %s - %s ' % (dst, _humanize_bytes(_have))
375                    if _length:
376                        _percent = round((float(_have) / _length) * 100, 2)
377                        if _percent != _last_percent:
378                            _msg += 'of %s (%0.0f%%) ' % (_humanize_bytes(_length), _percent)
379                    if _msg != _last_msg:
380                        extras = (len(_last_msg) - len(_msg))
381                        log.stdout_raw('%s%s' % (_msg, ' ' * extras + '\b' * extras))
382                        _last_msg = _msg
383                        _have_status_output = True
384                    _chunk = _in.read(_chunk_size)
385                    if not _chunk:
386                        break
387                    _out.write(_chunk)
388                    _have += len(_chunk)
389                log.stdout_raw('\n\r')
390            except:
391                if _have_status_output:
392                    log.stdout_raw('\n\r')
393                raise
394        except IOError as err:
395            log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
396            if path.exists(local):
397                os.remove(path.host(local))
398            failed = True
399        except ValueError as err:
400            log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
401            if path.exists(local):
402                os.remove(path.host(local))
403            failed = True
404        except:
405            msg = 'download: %s: error' % (_sensible_url(_url))
406            log.stderr(msg)
407            log.notice(msg)
408            if _in is not None:
409                _in.close()
410            if _out is not None:
411                _out.close()
412            raise
413        if _out is not None:
414            _out.close()
415        if _in is not None:
416            _in.close()
417            del _in
418        if not failed:
419            if not path.isfile(local):
420                raise error.general('source is not a file: %s' % (path.host(local)))
421            if not _hash_check(path.basename(local), local, config.macros, False):
422                raise error.general('checksum failure file: %s' % (dst))
423    return not failed
424
425def _git_downloader(url, local, config, opts):
426    repo = git.repo(local, opts, config.macros)
427    rlp = os.path.relpath(path.host(local))
428    us = url.split('?')
429    #
430    # Handle the various git protocols.
431    #
432    # remove 'git' from 'git://xxxx/xxxx?protocol=...'
433    #
434    url_base = us[0][len('git'):]
435    for a in us[1:]:
436        _as = a.split('=')
437        if _as[0] == 'protocol':
438            if len(_as) != 2:
439                raise error.general('invalid git protocol option: %s' % (_as))
440            if _as[1] == 'none':
441                # remove the rest of the protocol header leaving nothing.
442                us[0] = url_base[len('://'):]
443            else:
444                if _as[1] not in ['ssh', 'git', 'http', 'https', 'ftp', 'ftps', 'rsync']:
445                    raise error.general('unknown git protocol: %s' % (_as[1]))
446                us[0] = _as[1] + url_base
447    if not repo.valid():
448        log.notice('git: clone: %s -> %s' % (us[0], rlp))
449        if _do_download(opts):
450            repo.clone(us[0], local)
451    else:
452        repo.clean(['-f', '-d'])
453        repo.reset('--hard')
454        repo.checkout('master')
455    for a in us[1:]:
456        _as = a.split('=')
457        if _as[0] == 'branch' or _as[0] == 'checkout':
458            if len(_as) != 2:
459                raise error.general('invalid git branch/checkout: %s' % (_as))
460            log.notice('git: checkout: %s => %s' % (us[0], _as[1]))
461            if _do_download(opts):
462                repo.checkout(_as[1])
463        elif _as[0] == 'submodule':
464            if len(_as) != 2:
465                raise error.general('invalid git submodule: %s' % (_as))
466            log.notice('git: submodule: %s <= %s' % (us[0], _as[1]))
467            if _do_download(opts):
468                repo.submodule(_as[1])
469        elif _as[0] == 'fetch':
470            log.notice('git: fetch: %s -> %s' % (us[0], rlp))
471            if _do_download(opts):
472                repo.fetch()
473        elif _as[0] == 'merge':
474            log.notice('git: merge: %s' % (us[0]))
475            if _do_download(opts):
476                repo.merge()
477        elif _as[0] == 'pull':
478            log.notice('git: pull: %s' % (us[0]))
479            if _do_download(opts):
480                repo.pull()
481        elif _as[0] == 'reset':
482            arg = []
483            if len(_as) > 1:
484                arg = ['--%s' % (_as[1])]
485            log.notice('git: reset: %s' % (us[0]))
486            if _do_download(opts):
487                repo.reset(arg)
488        elif _as[0] == 'protocol':
489            pass
490        else:
491            raise error.general('invalid git option: %s' % (_as))
492    return True
493
494def _cvs_downloader(url, local, config, opts):
495    rlp = os.path.relpath(path.host(local))
496    us = url.split('?')
497    module = None
498    tag = None
499    date = None
500    src_prefix = None
501    for a in us[1:]:
502        _as = a.split('=')
503        if _as[0] == 'module':
504            if len(_as) != 2:
505                raise error.general('invalid cvs module: %s' % (a))
506            module = _as[1]
507        elif _as[0] == 'src-prefix':
508            if len(_as) != 2:
509                raise error.general('invalid cvs src-prefix: %s' % (a))
510            src_prefix = _as[1]
511        elif _as[0] == 'tag':
512            if len(_as) != 2:
513                raise error.general('invalid cvs tag: %s' % (a))
514            tag = _as[1]
515        elif _as[0] == 'date':
516            if len(_as) != 2:
517                raise error.general('invalid cvs date: %s' % (a))
518            date = _as[1]
519    repo = cvs.repo(local, opts, config.macros, src_prefix)
520    if not repo.valid():
521        if not path.isdir(local):
522            log.notice('Creating source directory: %s' % \
523                           (os.path.relpath(path.host(local))))
524            if _do_download(opts):
525                path.mkdir(local)
526            log.notice('cvs: checkout: %s -> %s' % (us[0], rlp))
527            if _do_download(opts):
528                repo.checkout(':%s' % (us[0][6:]), module, tag, date)
529    for a in us[1:]:
530        _as = a.split('=')
531        if _as[0] == 'update':
532            log.notice('cvs: update: %s' % (us[0]))
533            if _do_download(opts):
534                repo.update()
535        elif _as[0] == 'reset':
536            log.notice('cvs: reset: %s' % (us[0]))
537            if _do_download(opts):
538                repo.reset()
539    return True
540
541def _file_downloader(url, local, config, opts):
542    if not path.exists(local):
543        try:
544            src = url[7:]
545            dst = local
546            log.notice('download: copy %s -> %s' % (src, dst))
547            path.copy(src, dst)
548        except:
549            return False
550    return True
551
552downloaders = { 'http': _http_downloader,
553                'ftp':  _http_downloader,
554                'pw':   _http_downloader,
555                'git':  _git_downloader,
556                'cvs':  _cvs_downloader,
557                'file': _file_downloader }
558
559def get_file(url, local, opts, config):
560    if local is None:
561        raise error.general('source/patch path invalid')
562    if not path.isdir(path.dirname(local)) and not opts.download_disabled():
563        log.notice('Creating source directory: %s' % \
564                       (os.path.relpath(path.host(path.dirname(local)))))
565    log.output('making dir: %s' % (path.host(path.dirname(local))))
566    if _do_download(opts):
567        path.mkdir(path.dirname(local))
568    if not path.exists(local) and opts.download_disabled():
569        raise error.general('source not found: %s' % (path.host(local)))
570    #
571    # Check if a URL has been provided on the command line. If the package is
572    # released push to the start the RTEMS URL unless overrided by the command
573    # line option --with-release-url. The variant --without-release-url can
574    # override the released check.
575    #
576    url_bases = opts.urls()
577    try:
578        rtems_release_url_value = config.macros.expand('%{rtems_release_url}/%{rsb_version}/sources')
579    except:
580        rtems_release_url_value = None
581        log.output('RTEMS release URL could not be expanded')
582    rtems_release_url = None
583    if version.released() and rtems_release_url_value:
584        rtems_release_url = rtems_release_url_value
585    with_rel_url = opts.with_arg('release-url')
586    if with_rel_url[1] == 'not-found':
587        if config.defined('without_release_url'):
588            with_rel_url = ('without_release-url', 'yes')
589    if with_rel_url[0] == 'with_release-url':
590        if with_rel_url[1] == 'yes':
591            if rtems_release_url_value is None:
592                raise error.general('no valid release URL')
593            rtems_release_url = rtems_release_url_value
594        elif with_rel_url[1] == 'no':
595            pass
596        else:
597            rtems_release_url = with_rel_url[1]
598    elif with_rel_url[0] == 'without_release-url' and with_rel_url[1] == 'yes':
599        rtems_release_url = None
600    if rtems_release_url is not None:
601        log.trace('release url: %s' % (rtems_release_url))
602        #
603        # If the URL being fetched is under the release path do not add the
604        # sources release path because it is already there.
605        #
606        if not url.startswith(rtems_release_url):
607            if url_bases is None:
608                url_bases = [rtems_release_url]
609            else:
610                url_bases.append(rtems_release_url)
611    urls = []
612    if url_bases is not None:
613        #
614        # Split up the URL we are being asked to download.
615        #
616        url_path = urllib_parse.urlsplit(url)[2]
617        slash = url_path.rfind('/')
618        if slash < 0:
619            url_file = url_path
620        else:
621            url_file = url_path[slash + 1:]
622        log.trace('url_file: %s' %(url_file))
623        for base in url_bases:
624            if base[-1:] != '/':
625                base += '/'
626            next_url = urllib_parse.urljoin(base, url_file)
627            log.trace('url: %s' %(next_url))
628            urls.append(next_url)
629    urls += url.split()
630    log.trace('_url: %s -> %s' % (','.join(urls), local))
631    for url in urls:
632        for dl in downloaders:
633            if url.startswith(dl):
634                if downloaders[dl](url, local, config, opts):
635                    return
636    if _do_download(opts):
637        raise error.general('downloading %s: all paths have failed, giving up' % (url))
Note: See TracBrowser for help on using the repository browser.