# Selenium 基本使用
# 简介
Selenium
是一种驱动浏览器的库,它通过调用 Webdriver 来驱动电脑中的浏览器从而使我们可以获得一个真实的浏览器环境而不是模拟浏览器环境获取网页数据。它提供了一系列 API 来帮助我们获取浏览器中的内容。
使用它之前需要下载对应浏览器版本的 Webdriver
:
- Chrome Webdriver
- Edge Webdriver
- FireFox Webdriver
- Safari Webdriver
在合适的目录中放置好 Webdriver
后我们就可以通过 Selenium
调用它,从而获取网页内容。
安装 Selenium
:
pip install selenium |
前置工作做好后,我们就可以开始抓取网页内容。
# Chromedriver 使用
# 载入需要的库 | |
from selenium import webdriver | |
# 开启浏览器视窗 (Chrome) | |
# 方法一:执行前需要启动 chromedriver.exe 且与该代码文件在相同的工作目录 | |
driver = webdriver.Chrome() | |
# 方法二:或是直接指定 exe 文件路径 | |
driver = webdriver.Chrome("Desktop\chromedriver") |
若是想要使用以上其他的浏览器,那只要在浏览器的名称上做一些更改。
# 调用 FireFox | |
driver = webdriver.Firefox() | |
# 调用 Safari | |
driver = webdriver.Safari() |
获得 driver
对象后调用 get
函数传入想要访问网页的 url 即可
# 访问本站 | |
driver.get("https://asuhe.fun") | |
# 关闭浏览器视窗 | |
driver.close() |
# 使用 Selenium 控制页面
当我们操作页面时,例如点击页面、提交表单,我们需要预先获取到该事件触发的 DOM元素
。类似于使用 javascript 去操纵页面,只是这里的语言换成了 python。
<input class="usernameBox" /> |
# 获得输入框 | |
element = driver.find_element_by_class_name("usernameBox") | |
# 填充内容 | |
element.send_keys("hello selenium") | |
# 清除内容 | |
element.clear() | |
# 点击元素 | |
element.click() |
其中 send_keys
函数不仅可以填充内容,还可以模拟所有的键盘操作。当使用非功能性按键如 26 个字母时,直接使用即可。
# 载入对应库 | |
from selenium.webdriver.common.keys import Keys | |
# 模拟 ctrl + c | |
element.send_keys(Keys.CONTROL, "c") | |
# 模拟 ctrl + v | |
element.send_keys(Keys.CONTROL, "v") |
功能按键字符对照表
NULL = '\ue000' | |
CANCEL = '\ue001' # ^break | |
HELP = '\ue002' | |
BACKSPACE = '\ue003' | |
BACK_SPACE = BACKSPACE | |
TAB = '\ue004' | |
CLEAR = '\ue005' | |
RETURN = '\ue006' | |
ENTER = '\ue007' | |
SHIFT = '\ue008' | |
LEFT_SHIFT = SHIFT | |
CONTROL = "\ue009" | |
LEFT_CONTROL = CONTROL | |
ALT = "\ue00a" | |
LEFT_ALT = ALT | |
PAUSE = "\ue00b" | |
ESCAPE = "\ue00c" | |
SPACE = "\ue00d" | |
PAGE_UP = "\ue00e" | |
PAGE_DOWN = "\ue00f" | |
END = "\ue010" | |
HOME = "\ue011" | |
LEFT = "\ue012" | |
ARROW_LEFT = LEFT | |
UP = "\ue013" | |
ARROW_UP = UP | |
RIGHT = "\ue014" | |
ARROW_RIGHT = RIGHT | |
DOWN = "\ue015" | |
ARROW_DOWN = DOWN | |
INSERT = "\ue016" | |
DELETE = "\ue017" | |
SEMICOLON = "\ue018" | |
EQUALS = "\ue019" | |
NUMPAD0 = "\ue01a" # number pad keys | |
NUMPAD1 = "\ue01b" | |
NUMPAD2 = "\ue01c" | |
NUMPAD3 = "\ue01d" | |
NUMPAD4 = "\ue01e" | |
NUMPAD5 = "\ue01f" | |
NUMPAD6 = "\ue020" | |
NUMPAD7 = "\ue021" | |
NUMPAD8 = "\ue022" | |
NUMPAD9 = "\ue023" | |
MULTIPLY = "\ue024" | |
ADD = "\ue025" | |
SEPARATOR = "\ue026" | |
SUBTRACT = "\ue027" | |
DECIMAL = "\ue028" | |
DIVIDE = "\ue029" | |
F1 = "\ue031" | |
F2 = "\ue032" | |
F3 = "\ue033" | |
F4 = "\ue034" | |
F5 = "\ue035" | |
F6 = "\ue036" | |
F7 = "\ue037" | |
F8 = "\ue038" | |
F9 = "\ue039" | |
F10 = "\ue03a" | |
F11 = "\ue03b" | |
F12 = "\ue03c" | |
META = "\ue03d" | |
COMMAND = "\ue03d" |
# 页面跳转
# 前进 | |
driver.forward() | |
# 后退 | |
driver.back() |