百度360必应搜狗淘宝本站头条
当前位置:网站首页 > IT技术 > 正文

python常用的自动化脚本汇总_python 自动脚本

wptr33 2025-09-06 14:04 16 浏览

以下是python常用的自动化脚本,包括数据、网络、文件、性能等操作。

具体内容如下:

数据处理工具
网络检测工具
系统任务自动化工具
测试自动化工具
文件管理自动化工具
性能监控工具
日志分析工具
邮件自动化工具
数据库交互工具
OCR识别
PDF操作自动化
网络抓取自动化
EXCEL电子表格自动化
图像编辑自动化
import pandas as pd

def clean_data(input_file, output_file):
    df = pd.read_csv(input_file)
    df.dropna(inplace=True) # 删除空值
    df.drop_duplicates(inplace=True) # 删除重复值
    df.to_csv(output_file, index=False)

# 使用示例
clean_data("data.csv", "cleaned_data.csv")
1.2  数据对比

import pandas as pd

def compare_data(file1, file2):
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    diff = df1.compare(df2)
    return diff

# 使用示例
result = compare_data("file1.csv", "file2.csv")
print(result)
2.1  检测端口是否开放

import socket

def check_port(host, port):
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = sock.connect_ex((host, port))
    sock.close()
    return result == 0

# 使用示例
if check_port("example.com", 80):
    print("Port 80 is open")
else:
    print("Port 80 is closed")
2.2  批量ping测试

import os

def ping_hosts(hosts):
    for host in hosts:
        response = os.system(f"ping -c 1 {host}")
     
        if response == 0:
            print(f"{host} is up")
        else:
            print(f"{host} is down")

# 使用示例
hosts = ["google.com", "example.com", "localhost"]
ping_hosts(hosts)
3. 监控磁盘空间

import shutil
def check_disk_space(path, threshold):
    total, used, free = shutil.disk_usage(path)
    free_gb = free // (2**30)
    if free_gb < threshold:
        print(f"Warning: Free disk space is below {threshold} GB.")
    else:
        print(f"Free disk space: {free_gb} GB.")
# 使用示例
check_disk_space('/', 10)
4. 使用unittest进行单元测试

import unittest
class TestMyFunction (unittest.TestCase):
     def test_addition(self):
        result = add(1, 2)
        self.assertEqual(result, 3)
def add(a, b):
    return a + b
# 使用示例
if __name__ == '__main__':
    unittest.main()
5.1  按扩展名排序文件

import os
from shutil import move
def sort_files(directory_path):
    for filename in os.listdir(directory_path):
      if os.path.isfile(os.path.join(directory_path, filename)):
         file_extension = filename.split('.')[-1]
            destination_directory = os.path.join(directory_path, file_extension)
            ifnot os.path.exists(destination_directory):
                os.makedirs(destination_directory)
            move(os.path.join(directory_path, filename), os.path.join(destination_directory, filename))
# 使用示例
sort_files('/path/to/directory')
5.2  删除空文件夹

import os
def remove_empty_folders(directory_path):
     for root, dirs, files in os.walk(directory_path, topdown=False):
     for folder in dirs:
         folder_path = os.path.join(root, folder)
         if not os.listdir(folder_path):
                os.rmdir(folder_path)

# 使用示例
remove_empty_folders('/path/to/directory')
5.3  批量重命名文件

import os
def batch_rename(directory, prefix):
    for count, filename in enumerate(os.listdir(directory)):
        new_name = f"{prefix}_{count}.txt"
        os.rename(os.path.join(directory, filename), os.path.join(directory, new_name))

# 使用示例
batch_rename("/path/to/files", "file")
5.4  查找大文件

import os

def find_large_files(directory, size_limit_mb):
    size_limit = size_limit_mb * 1024 * 1024# 转换为字节
    large_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if os.path.getsize(file_path) > size_limit:
                large_files.append(file_path)
    
    
return large_files
# 使用示例
large_files = find_large_files("/path/to/directory", 100) 
# 查找大于100MB的文件
print (large_files)
6.1  监控cpu和内存使用情况

mport psutil
import time

def monitor_system(interval=1):
    while True:
        cpu_usage = psutil.cpu_percent(interval=interval)
        memory_usage = psutil.virtual_memory().percent
        print(f"CPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}%")
        time.sleep(interval)

# 使用示例
monitor_system(interval=2)
6.2 监控GPU使用情况

import pynvml

def monitor_gpu_usage():
    pynvml.nvmlInit()
    device_count = pynvml.nvmlDeviceGetCount()
    
    
   for i in range(device_count):
        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
        util = pynvml.nvmlDeviceGetUtilizationRates(handle)
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        print(f"GPU {i}: Usage={util.gpu}%, Memory Used={memory_info.used / 1024 ** 2} MB")

# 使用示例
monitor_gpu_usage()
6.3 监控网络带宽

import psutil
import time

def monitor_network_usage(interval=1):
    old_value = psutil.net_io_counters().bytes_sent + psutil.net_io_counters().bytes_recv
    
    
while True:
        new_value = psutil.net_io_counters().bytes_sent + psutil.net_io_counters().bytes_recv
        bandwidth = (new_value - old_value) / interval # 计算带宽(字节/秒)
        print(f"Network Bandwidth: {bandwidth} B/s")
        
        old_value = new_value
        time.sleep(interval)

# 使用示例
monitor_network_usage(interval=2)
6.4 监控磁盘IO

import psutil
import time

def monitor_disk_io(interval=1):
    old_read = psutil.disk_io_counters().read_bytes
    old_write = psutil.disk_io_counters().write_bytes
    
    
while True:
        new_read = psutil.disk_io_counters().read_bytes
        new_write = psutil.disk_io_counters().write_bytes
        
        read_speed = (new_read - old_read) / interval
        write_speed = (new_write - old_write) / interval
        
        print(f"Read Speed: {read_speed / 1024} KB/s | Write Speed: {write_speed / 1024} KB/s")
        
        old_read = new_read
        old_write = new_write
        time.sleep(interval)

# 使用示例
monitor_disk_io(interval=2)
6.5 监控进程资源占用

import psutil

def monitor_process(pid):
    process = psutil.Process(pid)
    
    
while True:
        cpu_usage = process.cpu_percent(interval=1)
        memory_usage = process.memory_info().rss / 1024 ** 2# 转换为MB
        
        print(f"PID {pid}: CPU={cpu_usage}%, Memory={memory_usage} MB")

# 使用示例
monitor_process(1234) # 替换为目标进程的PID
7.1 统计日志中高频错误

from collections import Counter
import re

def top_n_errors(log_file, n=5):
    error_pattern = re.compile(r"ERROR: (.+)")
    errors = []
    
    with open(log_file, 'r') as f:
     for line in f:
            match = error_pattern.search(line)
            if match:
                errors.append(match.group(1))
    
    
return Counter(errors).most_common(n)

# 使用示例
top_errors = top_n_errors("app.log", n=3)
print(top_errors)
7.2 按时间范围过滤日志

from datetime import datetime

def filter_logs_by_time(log_file, start_time, end_time, output_file):
    
   start = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
   end = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")
    
 withopen(log_file, 'r') as f:
        
 logs = f.readlines()
 filtered_logs = []
    
 forloginlogs:
        log_time_str = log.split()[0] + " " + log.split()[1] # 假设时间戳在日志的前两部分
        log_time = datetime.strptime(log_time_str, "%Y-%m-%d %H:%M:%S")
        
ifstart <= log_time <= end:
            filtered_logs.append(log)
    
    
withopen(output_file, 'w') as f:
        f.writelines(filtered_logs)

# 使用示例
filter_logs_by_time("app.log", "2025-02-26 12:00:00", "2025-02-06 14:00:00", "filtered_logs.log")
7.3 提取日志中错误信息

def extract_errors(log_file, output_file):
    
with open(log_file, 'r') as f:
        lines = f.readlines()
    
    errors = [line for line in lines if"ERROR"in line]    
    
with open(output_file, 'w') as f:
        f.writelines(errors)

# 使用示例
extract_errors("app.log", "errors.log")
7.4 日志文件合并

def merge_log_files(log_files, output_file):
    
with open(output_file, 'w') as outfile:
        
for log_file in log_files:
    with open(log_file, 'r') as infile:
    outfile.write(infile.read())

# 使用示例
merge_log_files(["log1.log", "log2.log", "log3.log"], "merged_logs.log")
7.5 日志文件实时监控

import time

def tail_log_file(log_file):
    
with open(log_file, 'r') as f:
        f.seek(0, 2) # 移动到文件末尾
        
     while True:
            line = f.readline()
     if line:
            print(line.strip())
     else:
            time.sleep(0.1)

# 使用示例
tail_log_file("app.log")
8. 发送个性化邮件

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def send_personalized_email(sender_email, sender_password, recipients, subject, body):
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()
    server.login(sender_email, sender_password)
for recipient_email in recipients:
        message = MIMEMultipart()
        message['From'] = sender_email
        message['To'] = recipient_email
        message['Subject'] = subject
        message.attach(MIMEText(body, 'plain'))
        server.send_message(message)
    server.quit()

# 使用示例
sender_email = 'your_email@gmail.com'
sender_password = 'your_password'
recipients = ['recipient1@example.com', 'recipient2@example.com']
subject = 'Hello'
body = 'This is a test email.'
send_personalized_email(sender_email, sender_password, recipients, subject, body)
9. 连接到数据库

import sqlite3
def connect_to_database(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
   return conn, cursor

def execute_query(cursor, query):
    cursor.execute(query)
    results = cursor.fetchall()
    return results

# 使用示例
conn, cursor = connect_to_database('/path/to/database.db')
query = 'SELECT * FROM table_name'
results = execute_query(cursor, query)
print(results)
conn.close()
10. 识别图像中的文本

import pytesseract
from PIL import Image
def recognize_text(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image,)  # 使用简体中文
    return text

# 使用示例
text = recognize_text('/path/to/image.jpg')
print(text)
11.  从PDF中提取文本

import PyPDF2
def extract_text_from_pdf(pdf_path):
   with open(pdf_path, 'rb') asfile:
        reader = PyPDF2.PdfFileReader(file)
        text = ''
        for page_num inrange(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extractText()
    return text

# 使用示例
text = extract_text_from_pdf('/path/to/document.pdf')
print(text)
12.1 从网站提取数据

import requests
from bs4 import BeautifulSoup

def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # 从网站提取相关数据的代码在此处
    return soup

# 使用示例
url = 'https://example.com'
soup = scrape_data(url)
print(soup.title.string)
12.2 批量下载图片

import requests
def download_images(url, save_directory):
    response = requests.get(url)
    if response.status_code == 200:
        images = response.json()  # 假设API返回一个图片URL的JSON数组
        for index, image_url in enumerate(images):
            image_response = requests.get(image_url)
        if image_response.status_code == 200:
        with open(f"{save_directory}/image_{index}.jpg", "wb") as f:
        f.write(image_response.content)

# 使用示例
download_images('https://api.example.com/images', '/path/to/save')
13. 读取和写入Excel

import pandas as pd
def read_excel (file_path):
    df = pd.read_excel(file_path)
    return df
def write_to_excel(data, file_path):
    df = pd.DataFrame(data)
    df.to_excel(file_path, index=False)

# 使用示例
data = {'Column1': [1, 2, 3], 'Column2': [4, 5, 6]}
write_to_excel(data, '/path/to/output.xlsx')
df = read_excel('/path/to/output.xlsx')
print(df)
14. 调整图像大小

from PIL import Image
def resize_image(input_path, output_path, width, height):
    image = Image.open(input_path)
    resized_image = image.resize((width, height), Image.ANTIALIAS)
    resized_image.save(output_path)

# 使用示例
resize_image('/path/to/input.jpg', '/path/to/output.jpg', 800, 600)

相关推荐

[常用工具] git基础学习笔记_git工具有哪些

添加推送信息,-m=messagegitcommit-m“添加注释”查看状态...

centos7安装部署gitlab_centos7安装git服务器

一、Gitlab介1.1gitlab信息GitLab是利用RubyonRails一个开源的版本管理系统,实现一个自托管的Git项目仓库,可通过Web界面进行访问公开的或者私人项目。...

太高效了!玩了这么久的Linux,居然不知道这7个终端快捷键

作为Linux用户,大家肯定在Linux终端下敲过无数的命令。有的命令很短,比如:ls、cd、pwd之类,这种命令大家毫无压力。但是,有些命令就比较长了,比如:...

提高开发速度还能保证质量的10个小窍门

养成坏习惯真是分分钟的事儿,而养成好习惯却很难。我发现,把那些对我有用的习惯写下来,能让我坚持住已经花心思养成的好习惯。...

版本管理最好用的工具,你懂多少?

版本控制(Revisioncontrol)是一种在开发的过程中用于管理我们对文件、目录或工程等内容的修改历史,方便查看更改历史记录,备份以便恢复以前的版本的软件工程技术。...

Git回退到某个版本_git回退到某个版本详细步骤

在开发过程,有时会遇到合并代码或者合并主分支代码导致自己分支代码冲突等问题,这时我们需要回退到某个commit_id版本1,查看所有历史版本,获取git的某个历史版本id...

Kubernetes + Jenkins + Harbor 全景实战手册

Kubernetes+Jenkins+Harbor全景实战手册在现代企业级DevOps体系中,Kubernetes(K8s)、Jenkins和Harbor组成的CI/CD流水...

git常用命令整理_git常见命令

一、Git仓库完整迁移完整迁移,就是指,不仅将所有代码移植到新的仓库,而且要保留所有的commit记录1.随便找个文件夹,从原地址克隆一份裸版本库...

第三章:Git分支管理(多人协作基础)

3.1分支基本概念分支是Git最强大的功能之一,它允许你在主线之外创建独立的开发线路,互不干扰。理解分支的工作原理是掌握Git的关键。核心概念:HEAD:指向当前分支的指针...

云效Codeup怎么创建分支并进行分支管理

云效Codeup怎么创建分支并进行分支管理,分支是为了将修改记录分叉备份保存,不受其他分支的影响,所以在同一个代码库里可以同时进行多个修改。创建仓库时,会自动创建Master分支作为默认分支,后续...

git 如何删除本地和远程分支?_git怎么删除远程仓库

Git分支对于开发人员来说是一项强大的功能,但要维护干净的存储库,就需要知道如何删除过时的分支。本指南涵盖了您需要了解的有关本地和远程删除Git分支的所有信息。了解Git分支...

git 实现一份代码push到两个git地址上

一直以来想把自己的博客代码托管到github和coding上想一次更改一次push两个地址一起更新今天有空查资料实践了下本博客的github地址coding的git地址如果是Gi...

git操作:cherry-pick和rebase_git cherry-pick bad object

在编码中经常涉及到分支之间的代码同步问题,那就需要cherry-pick和rebase命令问题:如何将某个分支的多个commit合并到另一个分支,并在另一个分支只保留一个commit记录解答:假设有两...

模型文件硬塞进 Git,GitHub 直接打回原形:使用Git-LFS管理大文件

前言最近接手了一个计算机视觉项目代码是屎山就不说了,反正我也不看代码主要就是构建一下docker镜像,测试一下部署的兼容性这本来不难但是,国内服务器的网络环境实在是恶劣,需要配置各种镜像(dock...

防弹少年团田柾国《Euphoria》2周年 获世界实时趋势榜1位 恭喜呀

当天韩国时间凌晨3时左右,该曲在Twitter上以“2YearsWithEuphoria”的HashTag登上了世界趋势1位。在韩国推特实时趋势中,从上午开始到现在“Euphoria2岁”的Has...