Coverage for pyguymer3/download_text.py: 6%
17 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-08 18:47 +0000
1#!/usr/bin/env python3
3# Define function ...
4def download_text(
5 sess,
6 url,
7 /,
8 *,
9 cookies = None,
10 debug = __debug__,
11 ensureNFC = True,
12 headers = None,
13 timeout = 10.0,
14 verify = True,
15):
16 """GET a URL and return the text
18 This function performs a HTTP GET operation on a URL and returns the content
19 as text, and optionally ensure that the Unicode encoding is NFC.
21 Parameters
22 ----------
23 sess : requests.sessions.Session
24 the :mod:`requests` session to use
25 url : str
26 the URL
27 cookies : dict, optional
28 the cookie jar
29 debug : bool, optional
30 print debug messages
31 ensureNFC : bool, optional
32 ensure that the Unicode encoding is NFC
33 headers : dict, optional
34 extra headers to send
35 timeout : float, optional
36 the timeout of the GET request
37 verify : bool, optional
38 verify the server's certificates
40 Returns
41 -------
42 text : bool, str
43 `False` if unsuccessful or a `str` if successful
45 Notes
46 -----
47 Copyright 2017 Thomas Guymer [1]_
49 References
50 ----------
51 .. [1] PyGuymer3, https://github.com/Guymer/PyGuymer3
52 """
54 # Import standard modules ...
55 import html
56 import unicodedata
58 # Import sub-functions ...
59 from .download import download
61 # Populate default values ...
62 if cookies is None:
63 cookies = {}
64 if headers is None:
65 headers = {}
67 # **************************************************************************
69 # Try to download the page ...
70 resp = download(
71 sess,
72 "get",
73 url,
74 cookies = cookies,
75 headers = headers,
76 timeout = timeout,
77 verify = verify,
78 )
80 # Check response ...
81 if resp is False:
82 return False
84 # Convert HTML characters ...
85 text = html.unescape(resp.text)
87 # Change Unicode encoding if needed ...
88 if ensureNFC and not unicodedata.is_normalized("NFC", text):
89 if debug:
90 print(f"DEBUG: Converting \"{url}\" to Unicode NFC.")
91 text = unicodedata.normalize("NFC", text)
93 # Return answer ...
94 return text