python-hard-way/Misc. learning/python-parse-html.py

# import SimpleHTTPServer
# import SocketServer
#
# PORT = 8000
#
# Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
#
# httpd = SocketServer.TCPServer(("", PORT), Handler)
#
# print("serving at port", PORT)
# httpd.serve_forever()

from bs4 import BeautifulSoup
# import urllib.request
#
# url = "https://google.com/"
#
# yo = urllib.request.urlopen(url)
# soup = BeautifulSoup(url.read(), 'html.parser')
# print(soup)

html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

soup = BeautifulSoup(html_doc, 'html.parser')
print(soup.prettify())
print("\n")
print(str(soup.title) + "\n")
print(str(soup.title.name) + "\n")
print(str(soup.title.string) + "\n")
print(str(soup.title.parent.name) + "\n")
print(str(soup.p) + "\n")
print(str(soup.p['class']) + "\n")
print(str(soup.a) + "\n")
print(str(soup.find_all('a')) + "\n")
print(str(soup.find(id="link3")) + "\n")
More python-y stuff. 2018-10-06 22:28:23 +00:00			`# import SimpleHTTPServer`
			`# import SocketServer`
			`#`
			`# PORT = 8000`
			`#`
			`# Handler = SimpleHTTPServer.SimpleHTTPRequestHandler`
			`#`
			`# httpd = SocketServer.TCPServer(("", PORT), Handler)`
			`#`
			`# print("serving at port", PORT)`
			`# httpd.serve_forever()`

			`from bs4 import BeautifulSoup`
			`# import urllib.request`
			`#`
			`# url = "https://google.com/"`
			`#`
			`# yo = urllib.request.urlopen(url)`
			`# soup = BeautifulSoup(url.read(), 'html.parser')`
			`# print(soup)`

			`html_doc = """`
			`<html><head><title>The Dormouse's story</title></head>`
			`<body>`
			`<p class="title"><b>The Dormouse's story</b></p>`

			`<p class="story">Once upon a time there were three little sisters; and their names were`
			`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,`
			`<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and`
			`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;`
			`and they lived at the bottom of a well.</p>`

			`<p class="story">...</p>`
			`"""`

			`soup = BeautifulSoup(html_doc, 'html.parser')`
			`print(soup.prettify())`
			`print("\n")`
			`print(str(soup.title) + "\n")`
			`print(str(soup.title.name) + "\n")`
			`print(str(soup.title.string) + "\n")`
			`print(str(soup.title.parent.name) + "\n")`
			`print(str(soup.p) + "\n")`
			`print(str(soup.p['class']) + "\n")`
			`print(str(soup.a) + "\n")`
			`print(str(soup.find_all('a')) + "\n")`
			`print(str(soup.find(id="link3")) + "\n")`